diff --git a/youtube_dl/extractor/stitcher.py b/youtube_dl/extractor/stitcher.py index 3044aaa42..1866b5ee5 100644 --- a/youtube_dl/extractor/stitcher.py +++ b/youtube_dl/extractor/stitcher.py @@ -1,11 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals - from .common import InfoExtractor +import re class StitcherIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?stitcher\.com/podcast/[\/a-z\-]+\d+\?.+' + _VALID_URL = r'https?://(?:www\.)?stitcher\.com/podcast/[\/a-z\-]+(?P\d+)|\?[a-z=]+' _TEST = { 'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true', 'md5': '391dd4e021e6edeb7b8e68fbf2e9e940', @@ -17,14 +17,14 @@ class StitcherIE(InfoExtractor): } def _real_extract(self, url): - audio_id = self._search_regex(r'[a-z\/\-\:\/\/.]+?(\d+?)\?.+', url, "audio_id") + audio_id = self._match_id(url) webpage = self._download_webpage(url, audio_id) title = self._og_search_title(webpage) - url = self._search_regex(r'[\s\S]*episodeURL: "(.+?)"[\s\S]*', webpage, 'url') - episode_image = self._search_regex(r'[\s\S]*episodeImage: "(.+?)"[\s\S]*', webpage, 'thumbnail') - duration = int(self._search_regex(r'[\s\S]*duration: (\d+?),[\s\S]*', webpage, 'duration')) / 60 + url = self._search_regex(r'episodeURL: "(.+?)"', webpage, 'url') + episode_image = self._search_regex(r'episodeImage: "(.+?)"', webpage, 'episode_image', fatal=False) + duration = self._search_regex(r'simpleDuration: "(\d+?) minutes"', webpage, 'duration', fatal=False) return { 'id': audio_id,