From 51d2453c7ade642e7f2253ed2211824bd0a2a9ff Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Tue, 21 May 2013 16:07:27 +0200 Subject: [PATCH 01/24] small tweaks --- youtube_dl/InfoExtractors.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 6b644e15f..42e4d0352 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4066,7 +4066,7 @@ class TumblrIE(InfoExtractor): re_video = r'src=\\x22(?Phttp://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P.*?)\\x22' % (blog, video_id) video = re.search(re_video, webpage) if video is None: - self.to_screen("No video founded") + self.to_screen("No video found") return [] video_url = video.group('video_url') ext = video.group('ext') @@ -4281,7 +4281,7 @@ class VineIE(InfoExtractor): class FlickrIE(InfoExtractor): """Information Extractor for Flickr videos""" - _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P[\w\-]+)/(?P\d+).*' + _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P[\w\-_@]+)/(?P\d+).*' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -4291,15 +4291,13 @@ class FlickrIE(InfoExtractor): webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id webpage = self._download_webpage(webpage_url, video_id) - self.report_extraction(video_id) - mobj = re.search(r"photo_secret: '(\w+)'", webpage) if mobj is None: raise ExtractorError(u'Unable to extract video secret') secret = mobj.group(1) first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self' - first_xml = self._download_webpage(first_url, video_id) + first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage') mobj = re.search(r'(\d+-\d+)', first_xml) if mobj is None: @@ -4307,7 +4305,9 @@ class FlickrIE(InfoExtractor): node_id = mobj.group(1) second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1' - second_xml = self._download_webpage(second_url, video_id) + second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage') + + self.report_extraction(video_id) mobj = re.search(r' Date: Thu, 23 May 2013 13:34:33 +0200 Subject: [PATCH 02/24] Fix HowCast IE --- youtube_dl/InfoExtractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 42e4d0352..d318b4b03 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4205,7 +4205,7 @@ class HowcastIE(InfoExtractor): self.report_extraction(video_id) - mobj = re.search(r'\'file\': "(http://mobile-media\.howcast\.com/\d+\.mp4)"', webpage) + mobj = re.search(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)"', webpage) if mobj is None: raise ExtractorError(u'Unable to extract video URL') video_url = mobj.group(1) From 57adeaea87fd6fdd3cdd49e97d8d9e7f7feab833 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 23 May 2013 13:37:19 +0200 Subject: [PATCH 03/24] release 2013.05.23 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index dbc928394..1cda7fa74 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.05.14' +__version__ = '2013.05.23' From f2cd958c0a09bca580a955c60c19e177f6ff45b8 Mon Sep 17 00:00:00 2001 From: mc2avr Date: Thu, 23 May 2013 21:42:03 +0200 Subject: [PATCH 04/24] add ZDFIE and _download_with_mplayer(mms://,rtsp://) --- youtube_dl/FileDownloader.py | 37 ++++++++++++++++++++++ youtube_dl/InfoExtractors.py | 59 ++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 49f3a8712..2c35a05d8 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -810,6 +810,39 @@ class FileDownloader(object): self.report_error(u'rtmpdump exited with code %d' % retval) return False + def _download_with_mplayer(self, filename, url): + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + +# args = ['mmsclient', url] # doesn't work anymore +# args = ['wpro', url, '-O', tmpfilename] # dont work + args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url] + # Check for mplayer first + try: + subprocess.call(args[0], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) + except (OSError, IOError): + self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] ) + return False + + # Download using mplayer. + retval = subprocess.call(args) + if retval == 0: + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize)) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + }) + return True + else: + self.to_stderr(u"\n") + self.report_error(u'%s exited with code %d' % (args[0], retval)) + return False + + def _do_download(self, filename, info_dict): url = info_dict['url'] @@ -830,6 +863,10 @@ class FileDownloader(object): info_dict.get('play_path', None), info_dict.get('tc_url', None)) + # Attempt to download using mplayer + if url.startswith('mms') or url.startswith('rtsp'): + return self._download_with_mplayer(filename, url) + tmpfilename = self.temp_name(filename) stream = None diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 42e4d0352..4ca744daf 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4052,6 +4052,64 @@ class ARDIE(InfoExtractor): info["url"] = stream["video_url"] return [info] +class ZDFIE(InfoExtractor): + _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek\/(.*beitrag\/video\/)(?P[^/\?]+)(?:\?.*)?' + _TITLE = r'(?P.*)</h1>' + _MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>' + _MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"' + _RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + raise ExtractorError(u'Invalid URL: %s' % url) + video_id = mobj.group('video_id') + + html = self._download_webpage(url, video_id) + streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)] + if streams is None: + raise ExtractorError(u'No media url found.') + + # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' und mms url + # s['media_type'] == 'hstreaming' -> use 'Quicktime' und rtsp url + # choose first/default media type and highest quality for now + for s in streams: #find 300 - dsl1000mbit + if s['quality'] == '300' and s['media_type'] == 'wstreaming': + stream_=s + break + for s in streams: #find veryhigh - dsl2000mbit + if s['quality'] == 'veryhigh' and s['media_type'] == 'wstreaming': # 'hstreaming' - rtsp is not working + stream_=s + break + if stream_ is None: + raise ExtractorError(u'No stream found.') + + media_link = self._download_webpage(stream_['video_url'], video_id,'Get stream URL') + + self.report_extraction(video_id) + mobj = re.search(self._TITLE, html) + if mobj is None: + raise ExtractorError(u'Cannot extract title') + title = unescapeHTML(mobj.group('title')) + + mobj = re.search(self._MMS_STREAM, media_link) + if mobj is None: + mobj = re.search(self._RTSP_STREAM, media_link) + if mobj is None: + raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL') + mms_url = mobj.group('video_url') + + mobj = re.search('(.*)[.](?P<ext>[^.]+)', mms_url) + if mobj is None: + raise ExtractorError(u'Cannot extract extention') + ext = mobj.group('ext') + + return [{'id': video_id, + 'url': mms_url, + 'title': title, + 'ext': ext + }] + class TumblrIE(InfoExtractor): _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)' @@ -4440,6 +4498,7 @@ def gen_extractors(): SpiegelIE(), LiveLeakIE(), ARDIE(), + ZDFIE(), TumblrIE(), BandcampIE(), RedTubeIE(), From 1b2b22ed9f641eef34c05afb4230f2ff0aa57e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 28 May 2013 15:12:39 +0200 Subject: [PATCH 05/24] BlipTV: accept urls in the format http://a.blip.tv/api.swf#{id} (closes #857) Tweak the regex so that BlipTV can be before BlipTVUser. --- youtube_dl/InfoExtractors.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index d318b4b03..7a882b4ae 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1884,7 +1884,7 @@ class FacebookIE(InfoExtractor): class BlipTVIE(InfoExtractor): """Information extractor for blip.tv""" - _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$' + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$' _URL_EXT = r'^.*\.([a-z0-9]+)$' IE_NAME = u'blip.tv' @@ -1897,6 +1897,10 @@ class BlipTVIE(InfoExtractor): if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) + # See https://github.com/rg3/youtube-dl/issues/857 + api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P<video_id>[\d\w]+)', url) + if api_mobj is not None: + url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id') urlp = compat_urllib_parse_urlparse(url) if urlp.path.startswith('/play/'): request = compat_urllib_request.Request(url) @@ -4405,8 +4409,8 @@ def gen_extractors(): YahooSearchIE(), DepositFilesIE(), FacebookIE(), - BlipTVUserIE(), BlipTVIE(), + BlipTVUserIE(), VimeoIE(), MyVideoIE(), ComedyCentralIE(), From dc1c355b7214657d0649cef3ab0854d07eff6997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 26 May 2013 10:46:09 +0200 Subject: [PATCH 06/24] YoutubeIE: fallback to automatic captions when subtitles aren't found (closes #843) Also modify test_youtube_subtitles to support running the tests in any order. --- test/test_youtube_subtitles.py | 12 ++++++++++- youtube_dl/InfoExtractors.py | 37 +++++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index a123e6d72..c80c90cbe 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -28,7 +28,9 @@ compat_urllib_request.install_opener(opener) class FakeDownloader(FileDownloader): def __init__(self): self.result = [] - self.params = parameters + # Different instances of the downloader can't share the same dictionary + # some test set the "sublang" parameter, which would break the md5 checks. + self.params = dict(parameters) def to_screen(self, s): print(s) def trouble(self, s, tb=None): @@ -96,6 +98,14 @@ class TestYoutubeSubtitles(unittest.TestCase): IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') self.assertEqual(info_dict, None) + def test_youtube_automatic_captions(self): + DL = FakeDownloader() + DL.params['writesubtitles'] = True + DL.params['subtitleslang'] = 'it' + IE = YoutubeIE(DL) + info_dict = IE.extract('8YoUxe5ncPo') + sub = info_dict[0]['subtitles'][0] + self.assertTrue(sub[2] is not None) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index d318b4b03..937cf9447 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -376,6 +376,34 @@ class YoutubeIE(InfoExtractor): return (u'Did not fetch video subtitles', None, None) return (None, sub_lang, sub) + def _request_automatic_caption(self, video_id, webpage): + """We need the webpage for getting the captions url, pass it as an + argument to speed up the process.""" + sub_lang = self._downloader.params.get('subtitleslang') + sub_format = self._downloader.params.get('subtitlesformat') + self.to_screen(u'%s: Looking for automatic captions' % video_id) + mobj = re.search(r';ytplayer.config = ({.*?});', webpage) + err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang + if mobj is None: + return [(err_msg, None, None)] + player_config = json.loads(mobj.group(1)) + try: + args = player_config[u'args'] + caption_url = args[u'ttsurl'] + timestamp = args[u'timestamp'] + params = compat_urllib_parse.urlencode({ + 'lang': 'en', + 'tlang': sub_lang, + 'fmt': sub_format, + 'ts': timestamp, + 'kind': 'asr', + }) + subtitles_url = caption_url + '&' + params + sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') + return [(None, sub_lang, sub)] + except KeyError: + return [(err_msg, None, None)] + def _extract_subtitle(self, video_id): """ Return a list with a tuple: @@ -623,7 +651,14 @@ class YoutubeIE(InfoExtractor): if video_subtitles: (sub_error, sub_lang, sub) = video_subtitles[0] if sub_error: - self._downloader.report_error(sub_error) + # We try with the automatic captions + video_subtitles = self._request_automatic_caption(video_id, video_webpage) + (sub_error_auto, sub_lang, sub) = video_subtitles[0] + if sub is not None: + pass + else: + # We report the original error + self._downloader.report_error(sub_error) if self._downloader.params.get('allsubtitles', False): video_subtitles = self._extract_all_subtitles(video_id) From 4a76d1dbe50b7ee6743b74d8e6d5f61314abd30e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20DAVID?= <c.david86@gmail.com> Date: Mon, 3 Jun 2013 22:16:55 +0200 Subject: [PATCH 07/24] Add tests for justin.tv and twitch.tv --- test/test_justin_tv.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100755 test/test_justin_tv.py diff --git a/test/test_justin_tv.py b/test/test_justin_tv.py new file mode 100755 index 000000000..2cd213404 --- /dev/null +++ b/test/test_justin_tv.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python + +import sys +import unittest + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.InfoExtractors import JustinTVIE + +class TestJustinTVMatching(unittest.TestCase): + def test_justin_tv_channelid_matching(self): + self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/")) + self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/")) + + def test_justintv_videoid_matching(self): + self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483")) + + def test_justin_tv_chapterid_matching(self): + self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) + +if __name__ == '__main__': + unittest.main() From c794cbbb19784a2cca88cad53f76fd837e74aa4a Mon Sep 17 00:00:00 2001 From: Jacob Kaplan-Moss <jacob@jacobian.org> Date: Mon, 3 Jun 2013 18:03:59 -0500 Subject: [PATCH 08/24] Fixed an error downloading vimeo pro videos. --- youtube_dl/InfoExtractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 9fbe6d627..58d082648 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1099,7 +1099,7 @@ class VimeoIE(InfoExtractor): # Extract uploader and uploader_id video_uploader = config["video"]["owner"]["name"] - video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] + video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None # Extract video thumbnail video_thumbnail = config["video"]["thumbnail"] From eda60e8251fc41fadf243f10b681a69e8cf12a83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 4 Jun 2013 12:04:54 +0200 Subject: [PATCH 09/24] VimeoIE: support videos from vimeopro.com --- youtube_dl/InfoExtractors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 9fbe6d627..fe3d12776 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1060,7 +1060,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)' + _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)' IE_NAME = u'vimeo' def _real_extract(self, url, new_video=True): @@ -1072,7 +1072,7 @@ class VimeoIE(InfoExtractor): video_id = mobj.group('id') if not mobj.group('proto'): url = 'https://' + url - if mobj.group('direct_link'): + if mobj.group('direct_link') or mobj.group('pro'): url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information From 50b4d2598025a672510c4008a80a51cabd9fba5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20DAVID?= <c.david86@gmail.com> Date: Tue, 4 Jun 2013 13:06:49 +0200 Subject: [PATCH 10/24] Merge within test_all_urls --- test/test_all_urls.py | 16 ++++++++++++++++ test/test_justin_tv.py | 30 ------------------------------ 2 files changed, 16 insertions(+), 30 deletions(-) delete mode 100755 test/test_justin_tv.py diff --git a/test/test_all_urls.py b/test/test_all_urls.py index a40360122..a35fba3c9 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -29,6 +29,22 @@ class TestAllURLsMatching(unittest.TestCase): self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')) self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')) + def test_justin_tv_channelid_matching(self): + self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/")) + self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/")) + + def test_justintv_videoid_matching(self): + self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483")) + + def test_justin_tv_chapterid_matching(self): + self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) + def test_youtube_extract(self): self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') diff --git a/test/test_justin_tv.py b/test/test_justin_tv.py deleted file mode 100755 index 2cd213404..000000000 --- a/test/test_justin_tv.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python - -import sys -import unittest - -# Allow direct execution -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from youtube_dl.InfoExtractors import JustinTVIE - -class TestJustinTVMatching(unittest.TestCase): - def test_justin_tv_channelid_matching(self): - self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) - self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) - self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv")) - self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv")) - self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv")) - self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv")) - self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/")) - self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/")) - - def test_justintv_videoid_matching(self): - self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483")) - - def test_justin_tv_chapterid_matching(self): - self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) - -if __name__ == '__main__': - unittest.main() From 57bde0d9c7959bd9dadd4a9f3c0b065d5b7978bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20DAVID?= <c.david86@gmail.com> Date: Tue, 4 Jun 2013 13:10:12 +0200 Subject: [PATCH 11/24] Fix the test_all_urls (Import issue) --- test/test_all_urls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index a35fba3c9..dd67286a7 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -7,7 +7,7 @@ import unittest import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.InfoExtractors import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE +from youtube_dl.InfoExtractors import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE class TestAllURLsMatching(unittest.TestCase): def test_youtube_playlist_matching(self): From 71e458d43792d6fb225b25e8a40dd5f1561c310b Mon Sep 17 00:00:00 2001 From: "M.Yasoob Ullah Khalid" <yasoob.khld@gmail.com> Date: Tue, 4 Jun 2013 17:30:54 +0500 Subject: [PATCH 12/24] Added support for xhamster in infoextractors --- youtube_dl/InfoExtractors.py | 58 ++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index fe3d12776..5811ef0da 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4425,6 +4425,63 @@ class TeamcocoIE(InfoExtractor): 'thumbnail': thumbnail, 'description': description, }] + +class XHamsterIE(InfoExtractor): + """Information Extractor for xHamster""" + _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html' + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + mrss_url='http://xhamster.com/movies/%s/.html' % video_id + webpage = self._download_webpage(mrss_url, video_id) + mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage) + if mobj is None: + raise ExtractorError(u'Unable to extract media URL') + if len(mobj.group('server')) == 0: + video_url = compat_urllib_parse.unquote(mobj.group('file')) + else: + video_url = mobj.group('server')+'/key='+mobj.group('file') + video_extension = video_url.split('.')[-1] + + mobj = re.search(r'<title>(?P<title>.+?) - xHamster\.com', webpage) + if mobj is None: + raise ExtractorError(u'Unable to extract title') + video_title = unescapeHTML(mobj.group('title')) + + mobj = re.search(r'Description: (?P[^<]+)', webpage) + if mobj is None: + video_description = u'' + else: + video_description = unescapeHTML(mobj.group('description')) + + mobj = re.search(r'hint=\'(?P[0-9]{4})-(?P[0-9]{2})-(?P[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage) + if mobj is None: + raise ExtractorError(u'Unable to extract upload date') + video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d') + + mobj = re.search(r']+>(?P[^>]+)', webpage) + if mobj is None: + video_uploader_id = u'anonymous' + else: + video_uploader_id = mobj.group('uploader_id') + + mobj = re.search(r'\'image\':\'(?P[^\']+)\'', webpage) + if mobj is None: + raise ExtractorError(u'Unable to extract thumbnail URL') + video_thumbnail = mobj.group('thumbnail') + + return [{ + 'id': video_id, + 'url': video_url, + 'ext': video_extension, + 'title': video_title, + 'description': video_description, + 'upload_date': video_upload_date, + 'uploader_id': video_uploader_id, + 'thumbnail': video_thumbnail + }] def gen_extractors(): """ Return a list of an instance of every supported extractor. @@ -4487,6 +4544,7 @@ def gen_extractors(): VineIE(), FlickrIE(), TeamcocoIE(), + XHamsterIE(), GenericIE() ] From c978a96c02bf775672f59c1daf88615c100edad9 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Ullah Khalid" Date: Tue, 4 Jun 2013 17:33:02 +0500 Subject: [PATCH 13/24] Added test for XHamster.com --- test/tests.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/tests.json b/test/tests.json index f57ebf1c9..dc2671daa 100644 --- a/test/tests.json +++ b/test/tests.json @@ -482,5 +482,14 @@ "title": "Louis C.K. Interview Pt. 1 11/3/11", "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one." } + }, + { + "name": "XHamster", + "url": "http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html", + "file": "1509445.flv", + "md5": "9f48e0e8d58e3076bb236ff412ab62fa", + "info_dict":{ + "title":"FemaleAgent Shy beauty takes the bait" + } } ] From 9131bde941c49482affe9b10dd26eb187e073b79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 4 Jun 2013 19:31:06 +0200 Subject: [PATCH 14/24] SpiegelE: the page layout has changed a bit --- youtube_dl/InfoExtractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index c24577cb7..37f9c1449 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3980,7 +3980,7 @@ class SpiegelIE(InfoExtractor): video_id = m.group('videoID') webpage = self._download_webpage(url, video_id) - m = re.search(r'
(.*?)
', webpage) + m = re.search(r'
(.*?)
', webpage) if not m: raise ExtractorError(u'Cannot find title') video_title = unescapeHTML(m.group(1)) From 157b864a0122a87b322e8448ff974d3f4917ed21 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 5 Jun 2013 19:16:53 +0500 Subject: [PATCH 15/24] added HypemIE rebased, closes PR #871 --- test/tests.json | 9 ++++++ youtube_dl/InfoExtractors.py | 54 ++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/test/tests.json b/test/tests.json index dc2671daa..c39d1d9c1 100644 --- a/test/tests.json +++ b/test/tests.json @@ -491,5 +491,14 @@ "info_dict":{ "title":"FemaleAgent Shy beauty takes the bait" } + }, + { + "name": "Hypem", + "url": "http://hypem.com/track/1v6ga/BODYWORK+-+TAME", + "file": "1v6ga.mp3", + "md5": "b9cc91b5af8995e9f0c1cee04c575828", + "info_dict":{ + "title":"TAME" + } } ] diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index a6294e1e1..105f90e2f 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4483,6 +4483,59 @@ class XHamsterIE(InfoExtractor): 'thumbnail': video_thumbnail }] +class HypemIE(InfoExtractor): + """Information Extractor for hypem""" + _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + raise ExtractorError(u'Invalid URL: %s' % url) + data = {'ax':1 , + 'ts': time.time() + } + id = mobj.group(1) + data_encoded = compat_urllib_parse.urlencode(data) + complete_url = url + "?"+data_encoded + request = compat_urllib_request.Request(complete_url) + response,urlh = self._download_webpage_handle(request, id, u'Downloading webpage with the url') + cookie = urlh.headers.get('Set-Cookie', '') + track_list = [] + list_data = re.search(r'',response) + html_tracks = list_data.group(1) + if html_tracks is None: + tracks = track_list + try: + track_list = json.loads(html_tracks) + tracks = track_list[u'tracks'] + except ValueError: + self.to_screen("Hypemachine contained invalid JSON.") + tracks = track_list + + for track in tracks: + key = track[u"key"] + id = track[u"id"] + artist = track[u"artist"] + title = track[u"song"] + serve_url = "http://hypem.com/serve/source/%s/%s"%(str(id), str(key)) + self.report_extraction(id) + request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'}) + request.add_header('cookie', cookie) + response = compat_urllib_request.urlopen(request) + song_data_json = response.read() + response.close() + (song_data_json, response) = self._download_webpage_handle(request, id, u'Downloading webpage with the url') + song_data = json.loads(song_data_json) + final_url = song_data[u"url"] + return [{ + 'id': id, + 'url': final_url, + 'ext': "mp3", + 'title': title, + 'artist': artist, + }] + + def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. @@ -4545,6 +4598,7 @@ def gen_extractors(): FlickrIE(), TeamcocoIE(), XHamsterIE(), + HypemIE(), GenericIE() ] From 868d62a5093c08af3d7acd1086d8bccf1d0bc6c0 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Thu, 6 Jun 2013 12:02:36 +0200 Subject: [PATCH 16/24] style and error handling edits to HypemIE --- youtube_dl/InfoExtractors.py | 55 ++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 105f90e2f..b40edf5fb 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4487,48 +4487,47 @@ class HypemIE(InfoExtractor): """Information Extractor for hypem""" _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' - def _real_extract(self,url): + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) - data = {'ax':1 , - 'ts': time.time() - } - id = mobj.group(1) + track_id = mobj.group(1) + + data = { 'ax': 1, 'ts': time.time() } data_encoded = compat_urllib_parse.urlencode(data) - complete_url = url + "?"+data_encoded + complete_url = url + "?" + data_encoded request = compat_urllib_request.Request(complete_url) - response,urlh = self._download_webpage_handle(request, id, u'Downloading webpage with the url') + response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url') cookie = urlh.headers.get('Set-Cookie', '') - track_list = [] - list_data = re.search(r'',response) - html_tracks = list_data.group(1) - if html_tracks is None: - tracks = track_list + + self.report_extraction(track_id) + mobj = re.search(r'', response, flags=re.MULTILINE|re.DOTALL) + if mobj is None: + raise ExtractorError(u'Unable to extrack tracks') + html_tracks = mobj.group(1).strip() try: track_list = json.loads(html_tracks) - tracks = track_list[u'tracks'] + track = track_list[u'tracks'][0] except ValueError: - self.to_screen("Hypemachine contained invalid JSON.") - tracks = track_list + raise ExtractorError(u'Hypemachine contained invalid JSON.') - for track in tracks: - key = track[u"key"] - id = track[u"id"] - artist = track[u"artist"] - title = track[u"song"] - serve_url = "http://hypem.com/serve/source/%s/%s"%(str(id), str(key)) - self.report_extraction(id) + key = track[u"key"] + track_id = track[u"id"] + artist = track[u"artist"] + title = track[u"song"] + + serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key)) request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'}) request.add_header('cookie', cookie) - response = compat_urllib_request.urlopen(request) - song_data_json = response.read() - response.close() - (song_data_json, response) = self._download_webpage_handle(request, id, u'Downloading webpage with the url') - song_data = json.loads(song_data_json) + song_data_json = self._download_webpage(request, track_id, u'Downloading metadata') + try: + song_data = json.loads(song_data_json) + except ValueError: + raise ExtractorError(u'Hypemachine contained invalid JSON.') final_url = song_data[u"url"] + return [{ - 'id': id, + 'id': track_id, 'url': final_url, 'ext': "mp3", 'title': title, From ecb3e676a52a0b478511751ca84da74188972b39 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Sat, 8 Jun 2013 12:44:38 +0500 Subject: [PATCH 17/24] Added Vbox7 Infoextractor --- test/tests.json | 9 +++++++++ youtube_dl/InfoExtractors.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/test/tests.json b/test/tests.json index c39d1d9c1..04be912ce 100644 --- a/test/tests.json +++ b/test/tests.json @@ -500,5 +500,14 @@ "info_dict":{ "title":"TAME" } + }, + { + "name": "Vbox7", + "url": "http://vbox7.com/play:249bb972c2", + "file": "249bb972c2.flv", + "md5": "9c70d6d956f888bdc08c124acc120cfe", + "info_dict":{ + "title":"Смях! Чудо - чист за секунди - Скрита камера" + } } ] diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index b40edf5fb..8535a3e9a 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4534,6 +4534,39 @@ class HypemIE(InfoExtractor): 'artist': artist, }] +class Vbox7IE(InfoExtractor): + """Information Extractor for Vbox7""" + _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)' + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + raise ExtractorError(u'Invalid URL: %s' % url) + video_id = mobj.group(1) + + redirect_page, urlh = self._download_webpage_handle(url, video_id) + redirect_url = urlh.geturl() + re.search(r'window\.location = \'(.*)\';', redirect_page).group(1) + webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page') + + title = re.search(r'(.*)', webpage) + title = (title.group(1)).split('/')[0].strip() + + ext = "flv" + info_url = "http://vbox7.com/play/magare.do" + data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id}) + info_request = compat_urllib_request.Request(info_url, data) + info_request.add_header('Content-Type', 'application/x-www-form-urlencoded') + info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage') + if info_response is None: + raise ExtractorError(u'Unable to extract the media url') + final_url = (info_response.split('&')[0]).split('=')[1] + + return [{ + 'id': video_id, + 'url': final_url, + 'ext': ext, + 'title': title, + }] def gen_extractors(): """ Return a list of an instance of every supported extractor. @@ -4598,6 +4631,7 @@ def gen_extractors(): TeamcocoIE(), XHamsterIE(), HypemIE(), + Vbox7IE(), GenericIE() ] From 8027175600c694537cecf0e5de2c6b9de559592b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 8 Jun 2013 12:08:44 +0200 Subject: [PATCH 18/24] Set the extractor key in playlists entries If they were videos the extractor key wasn't being set anywhere else Closes 877 --- youtube_dl/FileDownloader.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index bf0f5bb9e..a8517a390 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -539,6 +539,11 @@ class FileDownloader(object): 'playlist': playlist, 'playlist_index': i + playliststart, } + if not 'extractor' in entry: + # We set the extractor, if it's an url it will be set then to + # the new extractor, but if it's already a video we must make + # sure it's present: see issue #877 + entry['extractor'] = ie_result['extractor'] entry_result = self.process_ie_result(entry, download=download, extra_info=extra) From 53f72b11e58a22674476b8695eca13516d11f3eb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 9 Jun 2013 23:43:18 +0200 Subject: [PATCH 19/24] Allow unsetting the proxy with the --proxy option --- youtube_dl/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 308c48fe6..9279ce776 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -392,8 +392,11 @@ def _real_main(argv=None): # General configuration cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) - if opts.proxy: - proxies = {'http': opts.proxy, 'https': opts.proxy} + if opts.proxy is not None: + if opts.proxy == '': + proxies = {} + else: + proxies = {'http': opts.proxy, 'https': opts.proxy} else: proxies = compat_urllib_request.getproxies() # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) From 8cd252f115c67a67a2c230bc1b28401a88106d70 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 10 Jun 2013 18:14:44 +0200 Subject: [PATCH 20/24] Use long rtmpdump options Note that we accidentally called rtmpdump with -v (--live) instead of -V (--verbose) because we missed this. --- youtube_dl/FileDownloader.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index a8517a390..72f03c217 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -763,21 +763,21 @@ class FileDownloader(object): except (OSError, IOError): self.report_error(u'RTMP download detected but "rtmpdump" could not be run') return False + verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet' # Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrumpted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. - basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename] - if self.params.get('verbose', False): basic_args[1] = '-v' + basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename] if player_url is not None: - basic_args += ['-W', player_url] + basic_args += ['--swfVfy', player_url] if page_url is not None: basic_args += ['--pageUrl', page_url] if play_path is not None: - basic_args += ['-y', play_path] + basic_args += ['--playpath', play_path] if tc_url is not None: basic_args += ['--tcUrl', url] - args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)] + args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] if self.params.get('verbose', False): try: import pipes From 9abc6c8b3114083edd080a9f4357dcf4b3fd288c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 10 Jun 2013 19:42:02 +0200 Subject: [PATCH 21/24] Update YahooIE test The old test video is no longer available. --- test/tests.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/tests.json b/test/tests.json index 04be912ce..6c2373321 100644 --- a/test/tests.json +++ b/test/tests.json @@ -434,11 +434,11 @@ }, { "name": "Yahoo", - "url": "http://screen.yahoo.com/obama-celebrates-iraq-victory-27592561.html", - "file": "27592561.flv", - "md5": "c6179bed843512823fd284fa2e7f012d", + "url": "http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html", + "file": "214727115.flv", + "md5": "2e717f169c1be93d84d3794a00d4a325", "info_dict": { - "title": "Obama Celebrates Iraq Victory" + "title": "Julian Smith & Travis Legg Watch Julian Smith" }, "skip": "Requires rtmpdump" }, From f380401bbd1f41e00bc2d75a1354aed64bb18c30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 11 Jun 2013 19:15:07 +0200 Subject: [PATCH 22/24] YoutubeSearchIE: the query is a str, in python 3 it fails if decode is called --- youtube_dl/InfoExtractors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 24a77a1ab..282334635 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1424,7 +1424,6 @@ class YoutubeSearchIE(SearchInfoExtractor): def report_download_page(self, query, pagenum): """Report attempt to download search page with given number.""" - query = query.decode(preferredencoding()) self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) def _get_n_results(self, query, n): From fb8f7280bce30d45009c429f0095a9d15cbc9de2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 13 Jun 2013 08:26:39 +0200 Subject: [PATCH 23/24] GenericIE: try to find videos from twitter cards info --- youtube_dl/InfoExtractors.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 282334635..33ba0fdd1 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1373,6 +1373,9 @@ class GenericIE(InfoExtractor): if mobj is None: # Broaden the search a little bit: JWPlayer JS loader mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage) + if mobj is None: + # Try to find twitter cards info + mobj = re.search(r' Date: Sat, 15 Jun 2013 11:20:22 +0200 Subject: [PATCH 24/24] Update test_issue_673 in Youtube Lists Some videos have been removed. Delete the title check, it's not the purpose of that test. --- test/test_youtube_lists.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 78657b51c..b842e6cc1 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -53,8 +53,7 @@ class TestYoutubeLists(unittest.TestCase): dl = FakeDownloader() ie = YoutubePlaylistIE(dl) result = ie.extract('PLBB231211A4F62143')[0] - self.assertEqual(result['title'], 'Team Fortress 2') - self.assertTrue(len(result['entries']) > 40) + self.assertTrue(len(result['entries']) > 25) def test_youtube_playlist_long(self): dl = FakeDownloader()