From e1a0bfdffe25dda494a9da8b02fba0c9ad39f4fe Mon Sep 17 00:00:00 2001 From: dyn888 Date: Sun, 3 Jan 2016 04:11:19 +0100 Subject: [PATCH 001/110] [youtube] added vcodec/acodec/abr for multiple itags Should make downloading with filters more precise and easier, ie. bestvideo[vcodec=h264]. By default a lot of codecs are specified as avc1.xxxxxx and unique for each format, which makes them unusable for bestvideo selection. --- youtube_dl/extractor/youtube.py | 120 ++++++++++++++++---------------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4aac2cc03..64386f34a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -277,55 +277,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor): $""" _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' _formats = { - '5': {'ext': 'flv', 'width': 400, 'height': 240}, - '6': {'ext': 'flv', 'width': 450, 'height': 270}, - '13': {'ext': '3gp'}, - '17': {'ext': '3gp', 'width': 176, 'height': 144}, - '18': {'ext': 'mp4', 'width': 640, 'height': 360}, - '22': {'ext': 'mp4', 'width': 1280, 'height': 720}, - '34': {'ext': 'flv', 'width': 640, 'height': 360}, - '35': {'ext': 'flv', 'width': 854, 'height': 480}, - '36': {'ext': '3gp', 'width': 320, 'height': 240}, - '37': {'ext': 'mp4', 'width': 1920, 'height': 1080}, - '38': {'ext': 'mp4', 'width': 4096, 'height': 3072}, - '43': {'ext': 'webm', 'width': 640, 'height': 360}, - '44': {'ext': 'webm', 'width': 854, 'height': 480}, - '45': {'ext': 'webm', 'width': 1280, 'height': 720}, - '46': {'ext': 'webm', 'width': 1920, 'height': 1080}, - '59': {'ext': 'mp4', 'width': 854, 'height': 480}, - '78': {'ext': 'mp4', 'width': 854, 'height': 480}, + '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, + '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, + '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'}, + '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'}, + '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'}, + '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '36': {'ext': '3gp', 'width': 320, 'height': 240, 'acodec': 'aac', 'abr': 32, 'vcodec': 'mp4v'}, + '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, + '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, + '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, + '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, + '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, - # 3d videos - '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20}, - '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20}, - '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20}, - '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20}, - '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20}, - '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20}, - '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20}, + # 3D videos + '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, + '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, + '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, + '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, + '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20}, + '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, + '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, # Apple HTTP Live Streaming - '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10}, - '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10}, - '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10}, - '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10}, - '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10}, - '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10}, - '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10}, + '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, + '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, + '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, + '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, + '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, + '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, + '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10}, # DASH mp4 video - '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) - '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'}, - '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'}, - '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'}, + '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) + '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'fps': 60, 'preference': -40}, + '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'fps': 60, 'preference': -40}, + '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, # Dash mp4 audio '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'}, @@ -339,26 +339,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, - '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'}, - '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40}, + '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug) - '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, - '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, - '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, - '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'}, - '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, + '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, + '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, + '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, + '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, # Dash webm audio - '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, - '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50}, + '171': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, + '172': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50}, # Dash webm audio with opus inside '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50}, From 383496e65ed2a965ab82e5cc3012fd3e0ca2afbf Mon Sep 17 00:00:00 2001 From: igv Date: Thu, 21 Jan 2016 12:36:09 +0300 Subject: [PATCH 002/110] Additional regex for yahoo extractor --- youtube_dl/extractor/yahoo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 4a492f784..92b50f6f7 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -221,6 +221,7 @@ class YahooIE(InfoExtractor): r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', r'"first_videoid"\s*:\s*"([^"]+)"', r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), + r'yahoo:\/\/article\/view\?uuid=([^&]+)&', ] video_id = self._search_regex( CONTENT_ID_REGEXES, webpage, 'content ID') From e87d98b0ddbb2cfa873b4d0c13c751bb12224290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Jan 2016 22:42:50 +0600 Subject: [PATCH 003/110] [yahoo] Add improve content id regexes (Closes #8290) --- youtube_dl/extractor/yahoo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 92b50f6f7..4c6142927 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -221,7 +221,8 @@ class YahooIE(InfoExtractor): r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', r'"first_videoid"\s*:\s*"([^"]+)"', r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), - r'yahoo:\/\/article\/view\?uuid=([^&]+)&', + r']data-uuid=["\']([^"\']+)', + r'yahoo://article/view\?.*\buuid=([^&"\']+)', ] video_id = self._search_regex( CONTENT_ID_REGEXES, webpage, 'content ID') From 582f4f834e5cb6fc783eeb51bd977aa237330e79 Mon Sep 17 00:00:00 2001 From: Dimitre Liotev Date: Thu, 21 Jan 2016 08:10:41 +0200 Subject: [PATCH 004/110] Fix issue #8109 (error when downloading automatic captions) --- youtube_dl/extractor/youtube.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d31161d21..567877920 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -964,6 +964,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): try: args = player_config['args'] caption_url = args['ttsurl'] + if caption_url is None or caption_url == "" or caption_url.isspace(): + self._downloader.report_warning("No automatic captions") + return {} timestamp = args['timestamp'] # We get the available subtitles list_params = compat_urllib_parse.urlencode({ From 51290d8457aa8460382407796740063ced464481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Jan 2016 22:58:03 +0600 Subject: [PATCH 005/110] [youtube] Simplify automatic captions URL check (Closes #8287) --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 567877920..8e8fc14d2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -964,8 +964,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): try: args = player_config['args'] caption_url = args['ttsurl'] - if caption_url is None or caption_url == "" or caption_url.isspace(): - self._downloader.report_warning("No automatic captions") + if not caption_url: + self._downloader.report_warning(err_msg) return {} timestamp = args['timestamp'] # We get the available subtitles From 1e10d02fec559618f877e6694b6eeec53d59a65a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Jan 2016 23:28:22 +0600 Subject: [PATCH 006/110] [hitbox] Skip subscribe only formats (Closes #8217) --- youtube_dl/extractor/hitbox.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py index 421f55bbe..ff797438d 100644 --- a/youtube_dl/extractor/hitbox.py +++ b/youtube_dl/extractor/hitbox.py @@ -159,6 +159,9 @@ class HitboxLiveIE(HitboxIE): cdns = player_config.get('cdns') servers = [] for cdn in cdns: + # Subscribe URLs are not playable + if cdn.get('rtmpSubscribe') is True: + continue base_url = cdn.get('netConnectionUrl') host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1) if base_url not in servers: From 6b45f9aba2dad6e965ab51b4d18f4bb05336eaf1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 22 Jan 2016 02:14:47 +0800 Subject: [PATCH 007/110] [iqiyi] Update key (closes #8292) --- youtube_dl/extractor/iqiyi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 66a70a181..691cb66d6 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -214,8 +214,8 @@ class IqiyiIE(InfoExtractor): def get_enc_key(self, swf_url, video_id): # TODO: automatic key extraction - # last update at 2015-12-18 for Zombie::bite - enc_key = '8b6b683780897eb8d9a48a02ccc4817d'[::-1] + # last update at 2016-01-22 for Zombie::bite + enc_key = '6ab6d0280511493ba85594779759d4ed' return enc_key def _real_extract(self, url): From 4d318be1951d6bbae0eae7aff69a58de353c8337 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Mon, 14 Dec 2015 02:18:13 +0000 Subject: [PATCH 008/110] [update] fix (unexploitable) BB'06 vulnerability in rsa_verify The rsa_verify code was vulnerable to a BB'06 attack, allowing to forge signatures for arbitrary messages if and only if the public key exponent is 3. Since the updates key is hardcoded to 65537, there is no risk for youtube-dl, but I don't want vulnerable code in the wild. The new function adopts a way safer approach of encoding-and-comparing to replace the dangerous parsing code. --- test/test_update.py | 30 ++++++++++++++++++++++++++++++ test/versions.json | 34 ++++++++++++++++++++++++++++++++++ youtube_dl/update.py | 32 ++++++++------------------------ 3 files changed, 72 insertions(+), 24 deletions(-) create mode 100644 test/test_update.py create mode 100644 test/versions.json diff --git a/test/test_update.py b/test/test_update.py new file mode 100644 index 000000000..d9c71511d --- /dev/null +++ b/test/test_update.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import json +from youtube_dl.update import rsa_verify + + +class TestUpdate(unittest.TestCase): + def test_rsa_verify(self): + UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) + with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'versions.json'), 'rb') as f: + versions_info = f.read().decode() + versions_info = json.loads(versions_info) + signature = versions_info['signature'] + del versions_info['signature'] + self.assertTrue(rsa_verify( + json.dumps(versions_info, sort_keys=True).encode('utf-8'), + signature, UPDATES_RSA_KEY)) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/versions.json b/test/versions.json new file mode 100644 index 000000000..6cccc2259 --- /dev/null +++ b/test/versions.json @@ -0,0 +1,34 @@ +{ + "latest": "2013.01.06", + "signature": "72158cdba391628569ffdbea259afbcf279bbe3d8aeb7492690735dc1cfa6afa754f55c61196f3871d429599ab22f2667f1fec98865527b32632e7f4b3675a7ef0f0fbe084d359256ae4bba68f0d33854e531a70754712f244be71d4b92e664302aa99653ee4df19800d955b6c4149cd2b3f24288d6e4b40b16126e01f4c8ce6", + "versions": { + "2013.01.02": { + "bin": [ + "http://youtube-dl.org/downloads/2013.01.02/youtube-dl", + "f5b502f8aaa77675c4884938b1e4871ebca2611813a0c0e74f60c0fbd6dcca6b" + ], + "exe": [ + "http://youtube-dl.org/downloads/2013.01.02/youtube-dl.exe", + "75fa89d2ce297d102ff27675aa9d92545bbc91013f52ec52868c069f4f9f0422" + ], + "tar": [ + "http://youtube-dl.org/downloads/2013.01.02/youtube-dl-2013.01.02.tar.gz", + "6a66d022ac8e1c13da284036288a133ec8dba003b7bd3a5179d0c0daca8c8196" + ] + }, + "2013.01.06": { + "bin": [ + "http://youtube-dl.org/downloads/2013.01.06/youtube-dl", + "64b6ed8865735c6302e836d4d832577321b4519aa02640dc508580c1ee824049" + ], + "exe": [ + "http://youtube-dl.org/downloads/2013.01.06/youtube-dl.exe", + "58609baf91e4389d36e3ba586e21dab882daaaee537e4448b1265392ae86ff84" + ], + "tar": [ + "http://youtube-dl.org/downloads/2013.01.06/youtube-dl-2013.01.06.tar.gz", + "fe77ab20a95d980ed17a659aa67e371fdd4d656d19c4c7950e7b720b0c2f1a86" + ] + } + } +} \ No newline at end of file diff --git a/youtube_dl/update.py b/youtube_dl/update.py index 995b8ed96..e4a1aaa64 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -15,33 +15,17 @@ from .version import __version__ def rsa_verify(message, signature, key): - from struct import pack from hashlib import sha256 - assert isinstance(message, bytes) - block_size = 0 - n = key[0] - while n: - block_size += 1 - n >>= 8 - signature = pow(int(signature, 16), key[1], key[0]) - raw_bytes = [] - while signature: - raw_bytes.insert(0, pack("B", signature & 0xFF)) - signature >>= 8 - signature = (block_size - len(raw_bytes)) * b'\x00' + b''.join(raw_bytes) - if signature[0:2] != b'\x00\x01': + byte_size = (len(bin(key[0])) - 2 + 8 - 1) // 8 + signature = ('%x' % pow(int(signature, 16), key[1], key[0])).encode() + signature = (byte_size * 2 - len(signature)) * b'0' + signature + asn1 = b'3031300d060960864801650304020105000420' + asn1 += sha256(message).hexdigest().encode() + if byte_size < len(asn1) // 2 + 11: return False - signature = signature[2:] - if b'\x00' not in signature: - return False - signature = signature[signature.index(b'\x00') + 1:] - if not signature.startswith(b'\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20'): - return False - signature = signature[19:] - if signature != sha256(message).digest(): - return False - return True + expected = b'0001' + (byte_size - len(asn1) // 2 - 3) * b'ff' + b'00' + asn1 + return expected == signature def update_self(to_screen, verbose, opener): From 32d77eeb04657529b118159db4d23fd671fe04c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 22 Jan 2016 14:49:17 +0100 Subject: [PATCH 009/110] [downloader/common] report_retry: Don't crash when retries is infinite (fixes #8299) --- youtube_dl/downloader/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index beae8c4d0..fc7521598 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -295,7 +295,7 @@ class FileDownloader(object): def report_retry(self, count, retries): """Report retry in case of HTTP error 5xx""" - self.to_screen('[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries)) + self.to_screen('[download] Got server HTTP error. Retrying (attempt %d of %.0f)...' % (count, retries)) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" From 4118cc02c1694a7fd355b75c806a69d75b3850bb Mon Sep 17 00:00:00 2001 From: John Assael Date: Thu, 21 Jan 2016 19:51:48 +0000 Subject: [PATCH 010/110] [cbsnews] Extract subtitles added test function for CBS News subtitles --- youtube_dl/extractor/cbsnews.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index d211ec23b..7b9dd67fd 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -41,6 +41,21 @@ class CBSNewsIE(InfoExtractor): 'skip_download': True, }, }, + { + 'url': 'http://www.cbsnews.com/videos/mountain-lions-of-l-a/', + 'info_dict': { + 'id': 'Mountain Lions of L.A.', + 'ext': 'flv', + 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', + 'thumbnail': 're:^http?://.*\.jpg$', + 'subtitles': 're:^http?://.*\.xml$', + 'duration': 787, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, ] def _real_extract(self, url): @@ -85,10 +100,21 @@ class CBSNewsIE(InfoExtractor): fmt['ext'] = 'mp4' formats.append(fmt) + if 'mpxRefId' in video_info: + cap_url = 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId'] + subtitles = { + 'en': [{ + 'url': cap_url, + 'ext': 'xml' + }], } + else: + subtitles = {} + return { 'id': video_id, 'title': title, 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, + 'subtitles': subtitles, } From 220ee33f2b8a22d2fe992c26b32a046b77ce2a4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 Jan 2016 22:23:21 +0600 Subject: [PATCH 011/110] [cbsnews] Simplify subtitles extraction and fix test (Closes #8295) --- youtube_dl/extractor/cbsnews.py | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 7b9dd67fd..480435e26 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -35,21 +35,11 @@ class CBSNewsIE(InfoExtractor): 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 205, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - }, - { - 'url': 'http://www.cbsnews.com/videos/mountain-lions-of-l-a/', - 'info_dict': { - 'id': 'Mountain Lions of L.A.', - 'ext': 'flv', - 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', - 'thumbnail': 're:^http?://.*\.jpg$', - 'subtitles': 're:^http?://.*\.xml$', - 'duration': 787, + 'subtitles': { + 'en': [{ + 'ext': 'ttml', + }], + }, }, 'params': { # rtmp download @@ -100,15 +90,12 @@ class CBSNewsIE(InfoExtractor): fmt['ext'] = 'mp4' formats.append(fmt) + subtitles = {} if 'mpxRefId' in video_info: - cap_url = 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId'] - subtitles = { - 'en': [{ - 'url': cap_url, - 'ext': 'xml' - }], } - else: - subtitles = {} + subtitles['en'] = [{ + 'ext': 'ttml', + 'url': 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId'], + }] return { 'id': video_id, From 24114fee747c5cec4aae41f93581561fcb902e10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Charlier?= Date: Thu, 21 Jan 2016 18:47:43 +0100 Subject: [PATCH 012/110] [arte:future] Fix extraction [arte] Add support for more "Arte Future" uri --- youtube_dl/extractor/arte.py | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 10301a8ea..964d38fdf 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -199,25 +199,26 @@ class ArteTVCreativeIE(ArteTVPlus7IE): class ArteTVFutureIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:future' - _VALID_URL = r'https?://future\.arte\.tv/(?Pfr|de)/(thema|sujet)/.*?#article-anchor-(?P\d+)' + _VALID_URL = r'https?://future\.arte\.tv/(?Pfr|de)/(?P.+)' - _TEST = { - 'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', - 'info_dict': { - 'id': '5201', - 'ext': 'mp4', - 'title': 'Les champignons au secours de la planète', - 'upload_date': '20131101', + _TESTS = [ + { + 'url': 'http://future.arte.tv/fr/info-sciences/les-ecrevisses-aussi-sont-anxieuses', + 'info_dict': { + 'id': '050940-028-A', + 'ext': 'mp4', + 'title': 'Les écrevisses aussi peuvent être anxieuses', + }, }, - } - - def _real_extract(self, url): - anchor_id, lang = self._extract_url_info(url) - webpage = self._download_webpage(url, anchor_id) - row = self._search_regex( - r'(?s)id="%s"[^>]*>.+?(]*arte_vp_url[^>]*>)' % anchor_id, - webpage, 'row') - return self._extract_from_webpage(row, anchor_id, lang) + { + 'url': 'http://future.arte.tv/fr/la-science-est-elle-responsable', + 'info_dict': { + 'id': '061982-002-A', + 'ext': 'mp4', + 'title': 'Brian P. Schmidt - Prix Nobel de physique 2011', + }, + } + ] class ArteTVDDCIE(ArteTVPlus7IE): From 9c54ae3387b883bec5f014e2ac864a9a6c109163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 Jan 2016 23:00:05 +0600 Subject: [PATCH 013/110] [arte:future] Make duplicated test matching only --- youtube_dl/extractor/arte.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 964d38fdf..7ef42e092 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -201,24 +201,17 @@ class ArteTVFutureIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:future' _VALID_URL = r'https?://future\.arte\.tv/(?Pfr|de)/(?P.+)' - _TESTS = [ - { - 'url': 'http://future.arte.tv/fr/info-sciences/les-ecrevisses-aussi-sont-anxieuses', - 'info_dict': { - 'id': '050940-028-A', - 'ext': 'mp4', - 'title': 'Les écrevisses aussi peuvent être anxieuses', - }, + _TESTS = [{ + 'url': 'http://future.arte.tv/fr/info-sciences/les-ecrevisses-aussi-sont-anxieuses', + 'info_dict': { + 'id': '050940-028-A', + 'ext': 'mp4', + 'title': 'Les écrevisses aussi peuvent être anxieuses', }, - { - 'url': 'http://future.arte.tv/fr/la-science-est-elle-responsable', - 'info_dict': { - 'id': '061982-002-A', - 'ext': 'mp4', - 'title': 'Brian P. Schmidt - Prix Nobel de physique 2011', - }, - } - ] + }, { + 'url': 'http://future.arte.tv/fr/la-science-est-elle-responsable', + 'only_matching': True, + }] class ArteTVDDCIE(ArteTVPlus7IE): From 4fcd9d147df9b06d954b8f8a1749b50609529ed4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 Jan 2016 23:00:50 +0600 Subject: [PATCH 014/110] [arte:cinema] Add extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/arte.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bab3d7b46..cee5cfe7c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -42,6 +42,7 @@ from .arte import ( ArteTVCreativeIE, ArteTVConcertIE, ArteTVFutureIE, + ArteTVCinemaIE, ArteTVDDCIE, ArteTVEmbedIE, ) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 7ef42e092..b9e07f0ef 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -249,6 +249,23 @@ class ArteTVConcertIE(ArteTVPlus7IE): } +class ArteTVCinemaIE(ArteTVPlus7IE): + IE_NAME = 'arte.tv:cinema' + _VALID_URL = r'https?://cinema\.arte\.tv/(?Pde|fr)/(?P.+)' + + _TEST = { + 'url': 'http://cinema.arte.tv/de/node/38291', + 'md5': '6b275511a5107c60bacbeeda368c3aa1', + 'info_dict': { + 'id': '055876-000_PWA12025-D', + 'ext': 'mp4', + 'title': 'Tod auf dem Nil', + 'upload_date': '20160122', + 'description': 'md5:7f749bbb77d800ef2be11d54529b96bc', + }, + } + + class ArteTVEmbedIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:embed' _VALID_URL = r'''(?x) From d570746e45cff3c0f89654bf748e44a5da75a924 Mon Sep 17 00:00:00 2001 From: "Andrew \"Akari\" Alexeyew" Date: Wed, 2 Dec 2015 06:00:47 +0200 Subject: [PATCH 015/110] [nuevo] Generalize nuevo extractor and add support for trollvids Supports only the nuevo player for now (most common). [trollvids] convert duration to an int [trollvids] added a test [trollvids] made flake8 shut up Generalized the Nuevo extractor Affects: anitube, trollvids, trutube [nuevo] Complied with the code comments. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/anitube.py | 34 +++------------------ youtube_dl/extractor/nuevo.py | 37 +++++++++++++++++++++++ youtube_dl/extractor/trollvids.py | 49 +++++++++++++++++++++++++++++++ youtube_dl/extractor/trutube.py | 23 +++++---------- 5 files changed, 98 insertions(+), 46 deletions(-) create mode 100644 youtube_dl/extractor/nuevo.py create mode 100644 youtube_dl/extractor/trollvids.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index cee5cfe7c..6f2b35cf1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -726,6 +726,7 @@ from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE +from .trollvids import TrollvidsIE from .trutube import TruTubeIE from .tube8 import Tube8IE from .tubitv import TubiTvIE diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py index 23f942ae2..73690df82 100644 --- a/youtube_dl/extractor/anitube.py +++ b/youtube_dl/extractor/anitube.py @@ -2,10 +2,10 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .nuevo import NuevoBaseIE -class AnitubeIE(InfoExtractor): +class AnitubeIE(NuevoBaseIE): IE_NAME = 'anitube.se' _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P\d+)' @@ -29,31 +29,5 @@ class AnitubeIE(InfoExtractor): key = self._search_regex( r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key') - config_xml = self._download_xml( - 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key) - - video_title = config_xml.find('title').text - thumbnail = config_xml.find('image').text - duration = float(config_xml.find('duration').text) - - formats = [] - video_url = config_xml.find('file') - if video_url is not None: - formats.append({ - 'format_id': 'sd', - 'url': video_url.text, - }) - video_url = config_xml.find('filehd') - if video_url is not None: - formats.append({ - 'format_id': 'hd', - 'url': video_url.text, - }) - - return { - 'id': video_id, - 'title': video_title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats - } + config_url = 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key + return self._extract_nuevo(config_url, video_id) diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py new file mode 100644 index 000000000..ccc697e4f --- /dev/null +++ b/youtube_dl/extractor/nuevo.py @@ -0,0 +1,37 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + float_or_none, + xpath_text +) + + +class NuevoBaseIE(InfoExtractor): + def _extract_nuevo(self, config_url, video_id): + tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip()) + + title = xpath_text(tree, './title') + if title: + title = title.strip() + + thumbnail = xpath_text(tree, './image') + duration = float_or_none(xpath_text(tree, './duration')) + + formats = [] + for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')): + video_url = tree.find(element_name) + video_url is None or formats.append({ + 'format_id': format_id, + 'url': video_url.text + }) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats + } diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py new file mode 100644 index 000000000..e4fe620f7 --- /dev/null +++ b/youtube_dl/extractor/trollvids.py @@ -0,0 +1,49 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .nuevo import NuevoBaseIE + +from ..compat import ( + compat_urllib_parse_unquote +) + +import re + + +class TrollvidsIE(NuevoBaseIE): + _VALID_URL = r'http://(?:www\.)?trollvids\.com/+video/+(?P[0-9]+)/+(?P[^?&]+)' + IE_NAME = 'trollvids' + + def _real_extract(self, url): + match = re.match(self._VALID_URL, url) + + video_id = match.group('id') + raw_video_title = match.group('title') + url = 'http://trollvids.com/video/%s/%s' % (video_id, raw_video_title) + config_url = 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id + + info = self._extract_nuevo(config_url, video_id) + + info.update({ + 'webpage_url': url, + 'age_limit': 18 + }) + + if 'title' not in info: + info['title'] = compat_urllib_parse_unquote(raw_video_title) + + return info + + _TESTS = [ + { + 'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff', + 'md5': '1d53866b2c514b23ed69e4352fdc9839', + 'info_dict': { + 'id': '2349002', + 'ext': 'mp4', + 'title': "【MMD R-18】ガールフレンド carry_me_off", + 'age_limit': 18, + 'duration': 216.78, + }, + }, + ] diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py index e7b79243a..d7ec2ec26 100644 --- a/youtube_dl/extractor/trutube.py +++ b/youtube_dl/extractor/trutube.py @@ -1,10 +1,9 @@ from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import xpath_text +from .nuevo import NuevoBaseIE -class TruTubeIE(InfoExtractor): +class TruTubeIE(NuevoBaseIE): _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-', @@ -22,19 +21,11 @@ class TruTubeIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + config_url = 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id - config = self._download_xml( - 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id, - video_id, transform_source=lambda s: s.strip()) + info = self._extract_nuevo(config_url, video_id) - # filehd is always 404 - video_url = xpath_text(config, './file', 'video URL', fatal=True) - title = xpath_text(config, './title', 'title').strip() - thumbnail = xpath_text(config, './image', ' thumbnail') + # filehd always 404s + info['formats'] = info['formats'][:1] - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - } + return info From 10677ece81b7ed05bb84a0dbaf5bd237107eeb62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Jan 2016 00:04:33 +0600 Subject: [PATCH 016/110] [nuevo] Simplify nuevo extractors (Closes #7728) --- youtube_dl/extractor/anitube.py | 9 ++--- youtube_dl/extractor/nuevo.py | 25 +++++++------- youtube_dl/extractor/trollvids.py | 55 ++++++++++++------------------- youtube_dl/extractor/trutube.py | 13 +++----- 4 files changed, 41 insertions(+), 61 deletions(-) diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py index 73690df82..2fd912da4 100644 --- a/youtube_dl/extractor/anitube.py +++ b/youtube_dl/extractor/anitube.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .nuevo import NuevoBaseIE @@ -22,12 +20,11 @@ class AnitubeIE(NuevoBaseIE): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) key = self._search_regex( r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key') - config_url = 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key - return self._extract_nuevo(config_url, video_id) + return self._extract_nuevo( + 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, video_id) diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py index ccc697e4f..225da033c 100644 --- a/youtube_dl/extractor/nuevo.py +++ b/youtube_dl/extractor/nuevo.py @@ -11,22 +11,23 @@ from ..utils import ( class NuevoBaseIE(InfoExtractor): def _extract_nuevo(self, config_url, video_id): - tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip()) + config = self._download_xml( + config_url, video_id, transform_source=lambda s: s.strip()) - title = xpath_text(tree, './title') - if title: - title = title.strip() - - thumbnail = xpath_text(tree, './image') - duration = float_or_none(xpath_text(tree, './duration')) + title = xpath_text(config, './title', 'title', fatal=True).strip() + video_id = xpath_text(config, './mediaid', default=video_id) + thumbnail = xpath_text(config, './image') + duration = float_or_none(xpath_text(config, './duration')) formats = [] for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')): - video_url = tree.find(element_name) - video_url is None or formats.append({ - 'format_id': format_id, - 'url': video_url.text - }) + video_url = xpath_text(config, element_name) + if video_url: + formats.append({ + 'url': video_url, + 'format_id': format_id, + }) + self._check_formats(formats, video_id) return { 'id': video_id, diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py index e4fe620f7..d239949a6 100644 --- a/youtube_dl/extractor/trollvids.py +++ b/youtube_dl/extractor/trollvids.py @@ -1,49 +1,36 @@ # encoding: utf-8 from __future__ import unicode_literals -from .nuevo import NuevoBaseIE - -from ..compat import ( - compat_urllib_parse_unquote -) - import re +from .nuevo import NuevoBaseIE + class TrollvidsIE(NuevoBaseIE): - _VALID_URL = r'http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)' + _VALID_URL = r'http://(?:www\.)?trollvids\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' IE_NAME = 'trollvids' + _TEST = { + 'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff', + 'md5': '1d53866b2c514b23ed69e4352fdc9839', + 'info_dict': { + 'id': '2349002', + 'ext': 'mp4', + 'title': '【MMD R-18】ガールフレンド carry_me_off', + 'age_limit': 18, + 'duration': 216.78, + }, + } def _real_extract(self, url): - match = re.match(self._VALID_URL, url) - - video_id = match.group('id') - raw_video_title = match.group('title') - url = 'http://trollvids.com/video/%s/%s' % (video_id, raw_video_title) - config_url = 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id - - info = self._extract_nuevo(config_url, video_id) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') + info = self._extract_nuevo( + 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id, + video_id) info.update({ - 'webpage_url': url, + 'display_id': display_id, 'age_limit': 18 }) - - if 'title' not in info: - info['title'] = compat_urllib_parse_unquote(raw_video_title) - return info - - _TESTS = [ - { - 'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff', - 'md5': '1d53866b2c514b23ed69e4352fdc9839', - 'info_dict': { - 'id': '2349002', - 'ext': 'mp4', - 'title': "【MMD R-18】ガールフレンド carry_me_off", - 'age_limit': 18, - 'duration': 216.78, - }, - }, - ] diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py index d7ec2ec26..d55e0c563 100644 --- a/youtube_dl/extractor/trutube.py +++ b/youtube_dl/extractor/trutube.py @@ -4,7 +4,7 @@ from .nuevo import NuevoBaseIE class TruTubeIE(NuevoBaseIE): - _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>\d+)' _TESTS = [{ 'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-', 'md5': 'c5b6e301b0a2040b074746cbeaa26ca1', @@ -21,11 +21,6 @@ class TruTubeIE(NuevoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - config_url = 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id - - info = self._extract_nuevo(config_url, video_id) - - # filehd always 404s - info['formats'] = info['formats'][:1] - - return info + return self._extract_nuevo( + 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id, + video_id) From b9698135480e156c89b30b4da60df1aae1c2a660 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Jan 2016 00:10:49 +0600 Subject: [PATCH 017/110] Credit @nexAkari for trollvids and nuevo (#7728) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 73fcafeb5..bb1f2d8d9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -154,3 +154,4 @@ Brian Foley Vignesh Venkat Tom Gijselinck Founder Fang +Andrew Alexeyew From 1257b049bcd9857e2993a4366af41219c36baa3e Mon Sep 17 00:00:00 2001 From: Dankryn <dankryn@weg-werf-email.de> Date: Mon, 11 Jan 2016 21:17:30 +0100 Subject: [PATCH 018/110] [ruleporn] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ruleporn.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 youtube_dl/extractor/ruleporn.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6f2b35cf1..dc8679d46 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -580,6 +580,7 @@ from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE from .rtvnh import RTVNHIE from .ruhd import RUHDIE +from .ruleporn import RulepornIE from .rutube import ( RutubeIE, RutubeChannelIE, diff --git a/youtube_dl/extractor/ruleporn.py b/youtube_dl/extractor/ruleporn.py new file mode 100644 index 000000000..9e6a9125a --- /dev/null +++ b/youtube_dl/extractor/ruleporn.py @@ -0,0 +1,31 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RulepornIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ruleporn\.com/(?:[a-z]+(?:-[a-z]+)+)' + _TEST = { + 'url': 'http://ruleporn.com/brunette-nympho-chick-takes-her-boyfriend-in-every-angle/', + 'md5': '86861ebc624a1097c7c10eaf06d7d505', + 'info_dict': { + 'id': '48212', + 'ext': 'mp4', + 'title': 'Brunette Nympho Chick Takes Her Boyfriend In Every Angle', + } + } + + def _real_extract(self, url): + webpage = self._download_webpage(url, None) + + video_id = self._search_regex(r'http://lovehomeporn.com/embed/([0-9]+)', webpage, 'video_id', fatal=True) + title = self._search_regex(r'<h2 title="((?:\w|\s|\d)+)">', webpage, 'title', fatal=True) + info_xml = self._download_xml('http://lovehomeporn.com/media/nuevo/econfig.php?key=%s&rp=true' % video_id, video_id) + url = info_xml.find('file').text + + return { + 'id': video_id, + 'title': title, + 'url': url, + } From ea178204326c0d4203cc3ff88057d2ffd25a68a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Jan 2016 00:38:58 +0600 Subject: [PATCH 019/110] [nuevo] Improve thumbnail extraction --- youtube_dl/extractor/nuevo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py index 225da033c..ef093dec2 100644 --- a/youtube_dl/extractor/nuevo.py +++ b/youtube_dl/extractor/nuevo.py @@ -16,7 +16,7 @@ class NuevoBaseIE(InfoExtractor): title = xpath_text(config, './title', 'title', fatal=True).strip() video_id = xpath_text(config, './mediaid', default=video_id) - thumbnail = xpath_text(config, './image') + thumbnail = xpath_text(config, ['./image', './thumb']) duration = float_or_none(xpath_text(config, './duration')) formats = [] From b2c6528baf990d1fc0ad7b435595102ae2d8ba2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Jan 2016 00:40:11 +0600 Subject: [PATCH 020/110] [ruleporn] Rework in terms of nuevo (Closes #8206) --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/ruleporn.py | 39 +++++++++++++++++++++----------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index dc8679d46..4ea6a3f71 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -580,7 +580,7 @@ from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE from .rtvnh import RTVNHIE from .ruhd import RUHDIE -from .ruleporn import RulepornIE +from .ruleporn import RulePornIE from .rutube import ( RutubeIE, RutubeChannelIE, diff --git a/youtube_dl/extractor/ruleporn.py b/youtube_dl/extractor/ruleporn.py index 9e6a9125a..ebf9808d5 100644 --- a/youtube_dl/extractor/ruleporn.py +++ b/youtube_dl/extractor/ruleporn.py @@ -1,31 +1,44 @@ -# coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor +from .nuevo import NuevoBaseIE -class RulepornIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ruleporn\.com/(?:[a-z]+(?:-[a-z]+)+)' +class RulePornIE(NuevoBaseIE): + _VALID_URL = r'https?://(?:www\.)?ruleporn\.com/(?:[^/?#&]+/)*(?P<id>[^/?#&]+)' _TEST = { 'url': 'http://ruleporn.com/brunette-nympho-chick-takes-her-boyfriend-in-every-angle/', 'md5': '86861ebc624a1097c7c10eaf06d7d505', 'info_dict': { 'id': '48212', + 'display_id': 'brunette-nympho-chick-takes-her-boyfriend-in-every-angle', 'ext': 'mp4', 'title': 'Brunette Nympho Chick Takes Her Boyfriend In Every Angle', + 'description': 'md5:6d28be231b981fff1981deaaa03a04d5', + 'age_limit': 18, + 'duration': 635.1, } } def _real_extract(self, url): - webpage = self._download_webpage(url, None) + display_id = self._match_id(url) - video_id = self._search_regex(r'http://lovehomeporn.com/embed/([0-9]+)', webpage, 'video_id', fatal=True) - title = self._search_regex(r'<h2 title="((?:\w|\s|\d)+)">', webpage, 'title', fatal=True) - info_xml = self._download_xml('http://lovehomeporn.com/media/nuevo/econfig.php?key=%s&rp=true' % video_id, video_id) - url = info_xml.find('file').text + webpage = self._download_webpage(url, display_id) - return { - 'id': video_id, + video_id = self._search_regex( + r'lovehomeporn\.com/embed/(\d+)', webpage, 'video id') + + title = self._search_regex( + r'<h2[^>]+title=(["\'])(?P<url>.+?)\1', + webpage, 'title', group='url') + description = self._html_search_meta('description', webpage) + + info = self._extract_nuevo( + 'http://lovehomeporn.com/media/nuevo/econfig.php?key=%s&rp=true' % video_id, + video_id) + info.update({ + 'display_id': display_id, 'title': title, - 'url': url, - } + 'description': description, + 'age_limit': 18 + }) + return info From 4c0d13df9bdf7222fbfa6dde543ffcdb47696392 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Jan 2016 00:52:23 +0600 Subject: [PATCH 021/110] [lovehomeporn] Add extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/lovehomeporn.py | 37 ++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 youtube_dl/extractor/lovehomeporn.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4ea6a3f71..245e4d044 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -358,6 +358,7 @@ from .livestream import ( LivestreamShortenerIE, ) from .lnkgo import LnkGoIE +from .lovehomeporn import LoveHomePornIE from .lrt import LRTIE from .lynda import ( LyndaIE, diff --git a/youtube_dl/extractor/lovehomeporn.py b/youtube_dl/extractor/lovehomeporn.py new file mode 100644 index 000000000..8f65a3c03 --- /dev/null +++ b/youtube_dl/extractor/lovehomeporn.py @@ -0,0 +1,37 @@ +from __future__ import unicode_literals + +import re + +from .nuevo import NuevoBaseIE + + +class LoveHomePornIE(NuevoBaseIE): + _VALID_URL = r'https?://(?:www\.)?lovehomeporn\.com/video/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?' + _TEST = { + 'url': 'http://lovehomeporn.com/video/48483/stunning-busty-brunette-girlfriend-sucking-and-riding-a-big-dick#menu', + 'info_dict': { + 'id': '48483', + 'display_id': 'stunning-busty-brunette-girlfriend-sucking-and-riding-a-big-dick', + 'ext': 'mp4', + 'title': 'Stunning busty brunette girlfriend sucking and riding a big dick', + 'age_limit': 18, + 'duration': 238.47, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') + + info = self._extract_nuevo( + 'http://lovehomeporn.com/media/nuevo/config.php?key=%s' % video_id, + video_id) + info.update({ + 'display_id': display_id, + 'age_limit': 18 + }) + return info From 1f16b958b11c4c8503a3555d46bd205540d7aae8 Mon Sep 17 00:00:00 2001 From: Marian Sigler <m@qjym.de> Date: Tue, 12 Jan 2016 17:08:55 +0100 Subject: [PATCH 022/110] [SVTPlay] Add subtitle support --- youtube_dl/extractor/svt.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index fc20f664b..7ff5ee11a 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -37,6 +37,12 @@ class SVTBaseIE(InfoExtractor): }) self._sort_formats(formats) + # SVT does not tell us the language, so we assume swedish. + subtitles = {} + for sr in video_info['subtitleReferences']: + if 'url' in sr: + subtitles.setdefault('sv', []).append({'url': sr['url']}) + duration = video_info.get('materialLength') age_limit = 18 if video_info.get('inappropriateForChildren') else 0 @@ -44,6 +50,7 @@ class SVTBaseIE(InfoExtractor): 'id': video_id, 'title': title, 'formats': formats, + 'subtitles': subtitles, 'thumbnail': thumbnail, 'duration': duration, 'age_limit': age_limit, From 594c4d79a5cc988af99cbd0e3c00d1752e277dd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Jan 2016 01:47:54 +0600 Subject: [PATCH 023/110] [svt] Improve subtitles extraction and add test (Closes #8265) --- youtube_dl/extractor/svt.py | 43 ++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 7ff5ee11a..399c3b8ee 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -37,11 +37,13 @@ class SVTBaseIE(InfoExtractor): }) self._sort_formats(formats) - # SVT does not tell us the language, so we assume swedish. subtitles = {} - for sr in video_info['subtitleReferences']: - if 'url' in sr: - subtitles.setdefault('sv', []).append({'url': sr['url']}) + subtitle_references = video_info.get('subtitleReferences') + if isinstance(subtitle_references, list): + for sr in subtitle_references: + subtitle_url = sr.get('url') + if subtitle_url: + subtitles.setdefault('sv', []).append({'url': subtitle_url}) duration = video_info.get('materialLength') age_limit = 18 if video_info.get('inappropriateForChildren') else 0 @@ -90,30 +92,23 @@ class SVTIE(SVTBaseIE): class SVTPlayIE(SVTBaseIE): IE_DESC = 'SVT Play and Öppet arkiv' _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final', - 'md5': 'ade3def0643fa1c40587a422f98edfd9', + _TEST = { + 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', + 'md5': '2b6704fe4a28801e1a098bbf3c5ac611', 'info_dict': { - 'id': '2609989', - 'ext': 'flv', - 'title': 'SM veckan vinter, Örebro - Rally, final', - 'duration': 4500, + 'id': '5996901', + 'ext': 'mp4', + 'title': 'Flygplan till Haile Selassie', + 'duration': 3527, 'thumbnail': 're:^https?://.*[\.-]jpg$', 'age_limit': 0, + 'subtitles': { + 'sv': [{ + 'ext': 'wsrt', + }] + }, }, - }, { - 'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318', - 'md5': 'c3101a17ce9634f4c1f9800f0746c187', - 'info_dict': { - 'id': '1058509', - 'ext': 'flv', - 'title': 'Farlig kryssning', - 'duration': 2566, - 'thumbnail': 're:^https?://.*[\.-]jpg$', - 'age_limit': 0, - }, - 'skip': 'Only works from Sweden', - }] + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 1ca59daca964b789105b50aa42835aa87c982342 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Jan 2016 01:50:06 +0600 Subject: [PATCH 024/110] [options] Clarify language tags --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index ade58c375..433245f00 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -380,7 +380,7 @@ def parseOpts(overrideArguments=None): '--sub-lang', '--sub-langs', '--srt-lang', action='callback', dest='subtitleslangs', metavar='LANGS', type='str', default=[], callback=_comma_separated_values_options_callback, - help='Languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') + help='Languages of the subtitles to download (optional) separated by commas, use --list-subs for available language tags') downloader = optparse.OptionGroup(parser, 'Download Options') downloader.add_option( From 5ca01bb9e4803d82734420445d49ea7f98579fe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Jan 2016 01:51:18 +0600 Subject: [PATCH 025/110] [kanalplay] Use IETF language tag --- youtube_dl/extractor/kanalplay.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kanalplay.py b/youtube_dl/extractor/kanalplay.py index 4597d1b96..6c3498c67 100644 --- a/youtube_dl/extractor/kanalplay.py +++ b/youtube_dl/extractor/kanalplay.py @@ -49,7 +49,7 @@ class KanalPlayIE(InfoExtractor): subs = self._download_json( 'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id), video_id, 'Downloading subtitles JSON', fatal=False) - return {'se': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {} + return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {} def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 6fa73386cbb9b3d39dafde9c61bd950bd99b7155 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Jan 2016 01:54:00 +0600 Subject: [PATCH 026/110] [drtv] Use IETF language tag --- youtube_dl/extractor/drtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index baa24c6d1..2d74ff855 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -91,7 +91,7 @@ class DRTVIE(InfoExtractor): subtitles_list = asset.get('SubtitlesList') if isinstance(subtitles_list, list): LANGS = { - 'Danish': 'dk', + 'Danish': 'da', } for subs in subtitles_list: lang = subs['Language'] From f733b05302f44776d369f45c730da4a5b558e21b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 23 Jan 2016 12:03:12 +0100 Subject: [PATCH 027/110] release 2016.01.23 --- README.md | 4 ++-- docs/supportedsites.md | 13 +++++++++++-- youtube_dl/version.py | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9dbeae1bc..724fb17d1 100644 --- a/README.md +++ b/README.md @@ -339,8 +339,8 @@ which means you can modify it, redistribute it or use it however you like. preference, for example: "srt" or "ass/srt/best" --sub-lang LANGS Languages of the subtitles to download - (optional) separated by commas, use IETF - language tags like 'en,pt' + (optional) separated by commas, use --list- + subs for available language tags ## Authentication Options: -u, --username USERNAME Login with this account ID diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 99b1e2731..e86467cfa 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -24,7 +24,7 @@ - **AdobeTVShow** - **AdobeTVVideo** - **AdultSwim** - - **AE** + - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network - **Aftonbladet** - **AirMozilla** - **AlJazeera** @@ -43,6 +43,7 @@ - **ARD:mediathek** - **arte.tv** - **arte.tv:+7** + - **arte.tv:cinema** - **arte.tv:concert** - **arte.tv:creative** - **arte.tv:ddc** @@ -124,6 +125,7 @@ - **CSpan**: C-SPAN - **CtsNews**: 華視新聞 - **culturebox.francetvinfo.fr** + - **CultureUnplugged** - **CWTV** - **dailymotion** - **dailymotion:playlist** @@ -141,6 +143,7 @@ - **defense.gouv.fr** - **democracynow** - **DHM**: Filmarchiv - Deutsches Historisches Museum + - **Digiteka** - **Discovery** - **Dotsub** - **DouyuTV**: 斗鱼 @@ -287,7 +290,9 @@ - **la7.tv** - **Laola1Tv** - **Lecture2Go** + - **Lemonde** - **Letv**: 乐视网 + - **LetvCloud**: 乐视云 - **LetvPlaylist** - **LetvTv** - **Libsyn** @@ -300,6 +305,7 @@ - **livestream** - **livestream:original** - **LnkGo** + - **LoveHomePorn** - **lrt.lt** - **lynda**: lynda.com videos - **lynda:course**: lynda.com online courses @@ -486,6 +492,7 @@ - **rtve.es:live**: RTVE.es live streams - **RTVNH** - **RUHD** + - **RulePorn** - **rutube**: Rutube videos - **rutube:channel**: Rutube channels - **rutube:embed**: Rutube embedded videos @@ -602,6 +609,7 @@ - **ToypicsUser**: Toypics user profile - **TrailerAddict** (Currently broken) - **Trilulilu** + - **trollvids** - **TruTube** - **Tube8** - **TubiTv** @@ -640,7 +648,6 @@ - **udemy** - **udemy:course** - **UDNEmbed**: 聯合影音 - - **Ultimedia** - **Unistra** - **Urort**: NRK P3 Urørt - **ustream** @@ -707,6 +714,7 @@ - **WebOfStories** - **WebOfStoriesPlaylist** - **Weibo** + - **WeiqiTV**: WQTV - **wholecloud**: WholeCloud - **Wimp** - **Wistia** @@ -758,3 +766,4 @@ - **ZDFChannel** - **zingmp3:album**: mp3.zing.vn albums - **zingmp3:song**: mp3.zing.vn songs + - **ZippCast** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 02c438f3a..d5bf73815 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.01.15' +__version__ = '2016.01.23' From f6861ec96f33722c9b1ba4f2d8ca307dcbe64ac1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 25 Jan 2016 00:58:53 +0800 Subject: [PATCH 028/110] [utils] Add more items to mimetype2ext (#8293) These are used in Youtube formats --- youtube_dl/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 9c1c0e0bd..178d1dcb3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1831,6 +1831,8 @@ def mimetype2ext(mt): 'x-ms-wmv': 'wmv', 'x-mp4-fragmented': 'mp4', 'ttml+xml': 'ttml', + '3gpp': '3gp', + 'x-flv': 'flv', }.get(res, res) From a0d8d704df5ff3098b86e741f238d53de1711198 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 25 Jan 2016 01:01:15 +0800 Subject: [PATCH 029/110] [utils] Reorder items in mimetype2ext alphabetically --- youtube_dl/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 178d1dcb3..c63b61598 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1828,11 +1828,11 @@ def mimetype2ext(mt): _, _, res = mt.rpartition('/') return { - 'x-ms-wmv': 'wmv', - 'x-mp4-fragmented': 'mp4', - 'ttml+xml': 'ttml', '3gpp': '3gp', + 'ttml+xml': 'ttml', 'x-flv': 'flv', + 'x-mp4-fragmented': 'mp4', + 'x-ms-wmv': 'wmv', }.get(res, res) From 94278f720272c5ad2cd5900f59f8e71f31d46633 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 25 Jan 2016 01:02:19 +0800 Subject: [PATCH 030/110] [youtube] Prefer info from YouTube than _formats (#8293) --- youtube_dl/extractor/youtube.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8e8fc14d2..6f0665775 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -32,6 +32,7 @@ from ..utils import ( get_element_by_attribute, get_element_by_id, int_or_none, + mimetype2ext, orderedSet, parse_duration, remove_quotes, @@ -1090,9 +1091,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): full_info.update(f) codecs = r.attrib.get('codecs') if codecs: - if full_info.get('acodec') == 'none' and 'vcodec' not in full_info: + if full_info.get('acodec') == 'none': full_info['vcodec'] = codecs - elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info: + elif full_info.get('vcodec') == 'none': full_info['acodec'] = codecs formats.append(full_info) else: @@ -1461,15 +1462,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if 'ratebypass' not in url: url += '&ratebypass=yes' + dct = { + 'format_id': format_id, + 'url': url, + 'player_url': player_url, + } + if format_id in self._formats: + dct.update(self._formats[format_id]) + # Some itags are not included in DASH manifest thus corresponding formats will # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993). # Trying to extract metadata from url_encoded_fmt_stream_map entry. mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0]) width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None) - dct = { - 'format_id': format_id, - 'url': url, - 'player_url': player_url, + + more_fields = { 'filesize': int_or_none(url_data.get('clen', [None])[0]), 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000), 'width': width, @@ -1477,13 +1484,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'fps': int_or_none(url_data.get('fps', [None])[0]), 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0], } + for key, value in more_fields.items(): + if value: + dct[key] = value type_ = url_data.get('type', [None])[0] if type_: type_split = type_.split(';') kind_ext = type_split[0].split('/') if len(kind_ext) == 2: - kind, ext = kind_ext - dct['ext'] = ext + kind, _ = kind_ext + dct['ext'] = mimetype2ext(type_split[0]) if kind in ('audio', 'video'): codecs = None for mobj in re.finditer( @@ -1501,8 +1511,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'acodec': acodec, 'vcodec': vcodec, }) - if format_id in self._formats: - dct.update(self._formats[format_id]) formats.append(dct) elif video_info.get('hlsvp'): manifest_url = video_info['hlsvp'][0] From 77f785076f9a2fcfc35de49c8e612716bbb37f6c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 25 Jan 2016 01:03:46 +0800 Subject: [PATCH 031/110] [common] Keep full codec name from m3u8 manifests See #8293. This is for consistency between YouTube and HLS formats. --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8da70ae14..2f574054d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1058,9 +1058,9 @@ class InfoExtractor(object): # TODO: looks like video codec is not always necessarily goes first va_codecs = codecs.split(',') if va_codecs[0]: - f['vcodec'] = va_codecs[0].partition('.')[0] + f['vcodec'] = va_codecs[0] if len(va_codecs) > 1 and va_codecs[1]: - f['acodec'] = va_codecs[1].partition('.')[0] + f['acodec'] = va_codecs[1] resolution = last_info.get('RESOLUTION') if resolution: width_str, height_str = resolution.split('x') From 2e6e742c3cda5ed4846bff4ef894aac21434e3d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 25 Jan 2016 22:15:21 +0600 Subject: [PATCH 032/110] [facebook] Add shortcut and reformat _VALID_URL --- youtube_dl/extractor/facebook.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index ec699ba54..f9fd8ed4e 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -23,15 +23,23 @@ from ..utils import ( class FacebookIE(InfoExtractor): _VALID_URL = r'''(?x) - https?://(?:\w+\.)?facebook\.com/ - (?:[^#]*?\#!/)? - (?: - (?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?) - (?:v|video_id)=| - [^/]+/videos/(?:[^/]+/)? - ) - (?P<id>[0-9]+) - (?:.*)''' + (?: + https?:// + (?:\w+\.)?facebook\.com/ + (?:[^#]*?\#!/)? + (?: + (?: + video/video\.php| + photo\.php| + video\.php| + video/embed + )\?(?:.*?)(?:v|video_id)=| + [^/]+/videos/(?:[^/]+/)? + )| + facebook: + ) + (?P<id>[0-9]+) + ''' _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' _NETRC_MACHINE = 'facebook' @@ -66,6 +74,9 @@ class FacebookIE(InfoExtractor): }, { 'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater', 'only_matching': True, + }, { + 'url': 'facebook:544765982287235', + 'only_matching': True, }] def _login(self): From de691a498dd334ee5f8d237a4fc7ced314d86f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 25 Jan 2016 22:18:34 +0600 Subject: [PATCH 033/110] [facebook:post] Add extractor (Closes #8321) --- youtube_dl/extractor/__init__.py | 5 ++++- youtube_dl/extractor/facebook.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 245e4d044..532be7e4c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -196,7 +196,10 @@ from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE from .expotv import ExpoTVIE from .extremetube import ExtremeTubeIE -from .facebook import FacebookIE +from .facebook import ( + FacebookIE, + FacebookPostIE, +) from .faz import FazIE from .fc2 import FC2IE from .fczenit import FczenitIE diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index f9fd8ed4e..cb5dd57fb 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -199,3 +199,33 @@ class FacebookIE(InfoExtractor): 'formats': formats, 'uploader': uploader, } + + +class FacebookPostIE(InfoExtractor): + IE_NAME = 'facebook:post' + _VALID_URL = r'https?://(?:\w+\.)?facebook\.com/[^/]+/posts/(?P<id>\d+)' + _TEST = { + 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570', + 'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6', + 'info_dict': { + 'id': '544765982287235', + 'ext': 'mp4', + 'title': '"What are you doing running in the snow?"', + 'uploader': 'FailArmy', + } + } + + def _real_extract(self, url): + post_id = self._match_id(url) + + webpage = self._download_webpage(url, post_id) + + entries = [ + self.url_result('facebook:%s' % video_id, FacebookIE.ie_key()) + for video_id in self._parse_json( + self._search_regex( + r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', + webpage, 'video ids', group='ids'), + post_id)] + + return self.playlist_result(entries, post_id) From 2975fe1a7be39b66d14fc7ef68ea7db948f0e40f Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Mon, 25 Jan 2016 22:35:06 +0100 Subject: [PATCH 034/110] [vevo] extract all formats and bypass geo restriction --- youtube_dl/extractor/vevo.py | 234 +++++++++++++++-------------------- 1 file changed, 102 insertions(+), 132 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 02dfd36f4..ff854e74b 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -3,22 +3,18 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_etree_fromstring, - compat_urlparse, -) +from ..compat import compat_etree_fromstring from ..utils import ( ExtractorError, int_or_none, - sanitized_Request, ) class VevoIE(InfoExtractor): - """ + ''' Accepts urls from vevo.com or in the format 'vevo:{id}' (currently used by MTVIE and MySpaceIE) - """ + ''' _VALID_URL = r'''(?x) (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?| https?://cache\.vevo\.com/m/html/embed\.html\?video=| @@ -28,48 +24,51 @@ class VevoIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', - "md5": "95ee28ee45e70130e3ab02b0f579ae23", + 'md5': '2dbc7e9fd4f1c60436c9aa73a5406193', 'info_dict': { - 'id': 'GB1101300280', + 'id': 'Pt1kc_FniKM', 'ext': 'mp4', - "upload_date": "20130624", - "uploader": "Hurts", - "title": "Somebody to Die For", - "duration": 230.12, - "width": 1920, - "height": 1080, - # timestamp and upload_date are often incorrect; seem to change randomly - 'timestamp': int, - } + 'title': 'Hurts - Somebody to Die For', + 'description': 'md5:13e925b89af6b01c7e417332bd23c4bf', + 'uploader_id': 'HurtsVEVO', + 'uploader': 'HurtsVEVO', + 'upload_date': '20130624', + 'duration': 230, + }, + 'add_ie': ['Youtube'], }, { 'note': 'v3 SMIL format', 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', - 'md5': 'f6ab09b034f8c22969020b042e5ac7fc', + 'md5': '13d5204f520af905eeffa675040b8e76', 'info_dict': { - 'id': 'USUV71302923', + 'id': 'ByGmQn1uxJw', 'ext': 'mp4', + 'title': 'Cassadee Pope - I Wish I Could Break Your Heart', + 'description': 'md5:5e9721c92ef117a6f69d00e9b42ceba7', + 'uploader_id': 'CassadeeVEVO', + 'uploader': 'CassadeeVEVO', 'upload_date': '20140219', - 'uploader': 'Cassadee Pope', - 'title': 'I Wish I Could Break Your Heart', - 'duration': 226.101, + 'duration': 226, 'age_limit': 0, - 'timestamp': int, - } + }, + 'add_ie': ['Youtube'], }, { 'note': 'Age-limited video', 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', 'info_dict': { - 'id': 'USRV81300282', + 'id': '07FYdnEawAQ', 'ext': 'mp4', 'age_limit': 18, - 'title': 'Tunnel Vision (Explicit)', - 'uploader': 'Justin Timberlake', - 'upload_date': 're:2013070[34]', - 'timestamp': int, + 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', + 'description': 'md5:64249768eec3bc4276236606ea996373', + 'uploader_id': 'justintimberlakeVEVO', + 'uploader': 'justintimberlakeVEVO', + 'upload_date': '20130703', }, 'params': { 'skip_download': 'true', - } + }, + 'add_ie': ['Youtube'], }, { 'note': 'No video_info', 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', @@ -80,66 +79,40 @@ class VevoIE(InfoExtractor): 'title': 'Till I Die - K Camp ft. T.I.', 'duration': 193, }, - 'expected_warnings': ['Unable to download SMIL file'], }] - _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' + _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com' + _SOURCE_TYPES = { + 0: 'youtube', + 1: 'brightcove', + 2: 'http', + 3: 'hls_ios', + 4: 'hls', + 5: 'smil', # http + 7: 'f4m_cc', + 8: 'f4m_ak', + 9: 'f4m_l3', + 10: 'ism', + 13: 'smil', # rtmp + 18: 'dash', + } + _VERSIONS = { + 0: 'youtube', + 1: 'level3', + 2: 'akamai', + 3: 'level3', + 4: 'amazon', + } - def _real_initialize(self): - req = sanitized_Request( - 'http://www.vevo.com/auth', data=b'') - webpage = self._download_webpage( - req, None, - note='Retrieving oauth token', - errnote='Unable to retrieve oauth token', - fatal=False) - if webpage is False: - self._oauth_token = None - else: - if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage: - raise ExtractorError('%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True) - - self._oauth_token = self._search_regex( - r'access_token":\s*"([^"]+)"', - webpage, 'access token', fatal=False) - - def _formats_from_json(self, video_info): - if not video_info: - return [] - - last_version = {'version': -1} - for version in video_info['videoVersions']: - # These are the HTTP downloads, other types are for different manifests - if version['sourceType'] == 2: - if version['version'] > last_version['version']: - last_version = version - if last_version['version'] == -1: - raise ExtractorError('Unable to extract last version of the video') - - renditions = compat_etree_fromstring(last_version['data']) + def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): formats = [] - # Already sorted from worst to best quality - for rend in renditions.findall('rendition'): - attr = rend.attrib - format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr - formats.append({ - 'url': attr['url'], - 'format_id': attr['name'], - 'format_note': format_note, - 'height': int(attr['frameheight']), - 'width': int(attr['frameWidth']), - }) - return formats - - def _formats_from_smil(self, smil_doc): - formats = [] - els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') + els = smil.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') for el in els: src = el.attrib['src'] m = re.match(r'''(?xi) (?P<ext>[a-z0-9]+): (?P<path> [/a-z0-9]+ # The directory and main part of the URL - _(?P<cbr>[0-9]+)k + _(?P<tbr>[0-9]+)k _(?P<width>[0-9]+)x(?P<height>[0-9]+) _(?P<vcodec>[a-z0-9]+) _(?P<vbr>[0-9]+) @@ -153,9 +126,10 @@ class VevoIE(InfoExtractor): format_url = self._SMIL_BASE_URL + m.group('path') formats.append({ 'url': format_url, - 'format_id': 'SMIL_' + m.group('cbr'), + 'format_id': 'smil_' + m.group('tbr'), 'vcodec': m.group('vcodec'), 'acodec': m.group('acodec'), + 'tbr': int(m.group('tbr')), 'vbr': int(m.group('vbr')), 'abr': int(m.group('abr')), 'ext': m.group('ext'), @@ -164,26 +138,6 @@ class VevoIE(InfoExtractor): }) return formats - def _download_api_formats(self, video_id, video_url): - if not self._oauth_token: - self._downloader.report_warning( - 'No oauth token available, skipping API HLS download') - return [] - - api_url = compat_urlparse.urljoin(video_url, '//apiv2.vevo.com/video/%s/streams/hls?token=%s' % ( - video_id, self._oauth_token)) - api_data = self._download_json( - api_url, video_id, - note='Downloading HLS formats', - errnote='Failed to download HLS format list', fatal=False) - if api_data is None: - return [] - - m3u8_url = api_data[0]['url'] - return self._extract_m3u8_formats( - m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4', - preference=0) - def _real_extract(self, url): video_id = self._match_id(url) @@ -193,19 +147,57 @@ class VevoIE(InfoExtractor): response = self._download_json(json_url, video_id) video_info = response['video'] or {} - if not video_info and response.get('statusCode') != 909: - if 'statusMessage' in response: - raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusMessage']), expected=True) - raise ExtractorError('Unable to extract videos') - if not video_info: + ytid = response.get('errorInfo', {}).get('ytid') + if ytid: + return self.url_result(ytid, 'Youtube', ytid) + + if response.get('statusCode') != 909: + if 'statusMessage' in response: + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, response['statusMessage']), expected=True) + raise ExtractorError('Unable to extract videos') + if url.startswith('vevo:'): - raise ExtractorError('Please specify full Vevo URL for downloading', expected=True) + raise ExtractorError( + 'Please specify full Vevo URL for downloading', expected=True) webpage = self._download_webpage(url, video_id) title = video_info.get('title') or self._og_search_title(webpage) - formats = self._formats_from_json(video_info) + smil_parsed = False + formats = [] + for video_version in video_info['videoVersions']: + version = self._VERSIONS.get(video_version['version']) + if version == 'youtube': + return self.url_result( + video_version['id'], 'Youtube', video_version['id']) + else: + source_type = self._SOURCE_TYPES.get(video_version['sourceType']) + renditions = compat_etree_fromstring(video_version['data']) + if source_type == 'http': + for rend in renditions.findall('rendition'): + attr = rend.attrib + formats.append({ + 'url': attr['url'], + 'format_id': '%s-%s' % (version, attr['name']), + 'height': int_or_none(attr.get('frameheight')), + 'width': int_or_none(attr.get('frameWidth')), + 'tbr': int_or_none(attr.get('totalBitrate')), + 'vbr': int_or_none(attr.get('videoBitrate')), + 'abr': int_or_none(attr.get('audioBitrate')), + 'vcodec': attr.get('videoCodec'), + 'acodec': attr.get('audioCodec'), + }) + elif source_type == 'hls': + formats.extend(self._extract_m3u8_formats( + renditions.find('rendition').attrib['url'], video_id, + 'mp4', 'm3u8_native', m3u8_id='hls-%s' % version, fatal=False)) + elif source_type == 'smil' and not smil_parsed: + formats.extend(self._extract_smil_formats( + renditions.find('rendition').attrib['url'], video_id, False)) + smil_parsed = True + self._sort_formats(formats) is_explicit = video_info.get('isExplicit') if is_explicit is True: @@ -215,28 +207,6 @@ class VevoIE(InfoExtractor): else: age_limit = None - # Download via HLS API - formats.extend(self._download_api_formats(video_id, url)) - - # Download SMIL - smil_blocks = sorted(( - f for f in video_info.get('videoVersions', []) - if f['sourceType'] == 13), - key=lambda f: f['version']) - smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( - self._SMIL_BASE_URL, video_id, video_id.lower()) - if smil_blocks: - smil_url_m = self._search_regex( - r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL', - default=None) - if smil_url_m is not None: - smil_url = smil_url_m - if smil_url: - smil_doc = self._download_smil(smil_url, video_id, fatal=False) - if smil_doc: - formats.extend(self._formats_from_smil(smil_doc)) - - self._sort_formats(formats) timestamp = int_or_none(self._search_regex( r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date', fatal=False), From 9165d6bab9ae3cc98984ebeb249ef320b9f4db83 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 26 Jan 2016 13:46:58 +0100 Subject: [PATCH 035/110] [vevo] extract metadata and formats from api if videoinfo is empty these was fixed by @yan12125 in ff51983e1563db08734e43a07d5671a517f99ef6 i only added some code to extract video metadata and more formats from api --- youtube_dl/extractor/vevo.py | 189 ++++++++++++++++++++++++++--------- 1 file changed, 140 insertions(+), 49 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index ff854e74b..9c955c895 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -7,6 +7,8 @@ from ..compat import compat_etree_fromstring from ..utils import ( ExtractorError, int_or_none, + sanitized_Request, + parse_iso8601, ) @@ -64,6 +66,7 @@ class VevoIE(InfoExtractor): 'uploader_id': 'justintimberlakeVEVO', 'uploader': 'justintimberlakeVEVO', 'upload_date': '20130703', + 'duration': 419, }, 'params': { 'skip_download': 'true', @@ -72,13 +75,18 @@ class VevoIE(InfoExtractor): }, { 'note': 'No video_info', 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', - 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0', + 'md5': 'a8b84d1d1957cd01046441b701b270fb', 'info_dict': { - 'id': 'USUV71503000', + 'id': 'Lad2jHtJCqY', 'ext': 'mp4', - 'title': 'Till I Die - K Camp ft. T.I.', + 'title': 'K Camp - Till I Die ft. T.I.', + 'description': 'md5:0694920ededdee4a14cfc39695cc8ec3', + 'uploader_id': 'KCampVEVO', + 'uploader': 'KCampVEVO', + 'upload_date': '20151207', 'duration': 193, }, + 'add_ie': ['Youtube'], }] _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com' _SOURCE_TYPES = { @@ -96,7 +104,7 @@ class VevoIE(InfoExtractor): 18: 'dash', } _VERSIONS = { - 0: 'youtube', + 0: 'youtube', # only in AuthenticateVideo videoVersions 1: 'level3', 2: 'akamai', 3: 'level3', @@ -138,14 +146,34 @@ class VevoIE(InfoExtractor): }) return formats + def _initialize_api(self, video_url, video_id): + req = sanitized_Request( + 'http://www.vevo.com/auth', data=b'') + webpage = self._download_webpage( + req, None, + note='Retrieving oauth token', + errnote='Unable to retrieve oauth token') + + if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage: + raise ExtractorError('%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True) + + auth_info = self._parse_json(webpage, video_id) + self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token'] + + def _call_api(self, path, video_id, note, errnote, fatal=True): + return self._download_json(self._api_url_template % path, video_id, note, errnote) + def _real_extract(self, url): video_id = self._match_id(url) - webpage = None - json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id - response = self._download_json(json_url, video_id) - video_info = response['video'] or {} + response = self._download_json(json_url, video_id, 'Downloading video info', 'Unable to download info') + video_info = response.get('video') or {} + video_versions = video_info.get('videoVersions') + uploader = None + timestamp = None + view_count = None + formats = [] if not video_info: ytid = response.get('errorInfo', {}).get('ytid') @@ -161,44 +189,112 @@ class VevoIE(InfoExtractor): if url.startswith('vevo:'): raise ExtractorError( 'Please specify full Vevo URL for downloading', expected=True) - webpage = self._download_webpage(url, video_id) - title = video_info.get('title') or self._og_search_title(webpage) + self._initialize_api(url, video_id) + video_info = self._call_api( + 'video/%s' % video_id, video_id, 'Downloading api video info', + 'Failed to download video info') - smil_parsed = False - formats = [] - for video_version in video_info['videoVersions']: - version = self._VERSIONS.get(video_version['version']) - if version == 'youtube': + ytid = video_info.get('youTubeId') + if ytid: return self.url_result( - video_version['id'], 'Youtube', video_version['id']) - else: - source_type = self._SOURCE_TYPES.get(video_version['sourceType']) - renditions = compat_etree_fromstring(video_version['data']) - if source_type == 'http': - for rend in renditions.findall('rendition'): - attr = rend.attrib - formats.append({ - 'url': attr['url'], - 'format_id': '%s-%s' % (version, attr['name']), - 'height': int_or_none(attr.get('frameheight')), - 'width': int_or_none(attr.get('frameWidth')), - 'tbr': int_or_none(attr.get('totalBitrate')), - 'vbr': int_or_none(attr.get('videoBitrate')), - 'abr': int_or_none(attr.get('audioBitrate')), - 'vcodec': attr.get('videoCodec'), - 'acodec': attr.get('audioCodec'), - }) - elif source_type == 'hls': + ytid, 'Youtube', ytid) + + video_versions = self._call_api( + 'video/%s/streams' % video_id, video_id, + 'Downloading video versions info', + 'Failed to download video versions info') + + timestamp = parse_iso8601(video_info.get('releaseDate')) + artists = video_info.get('artists') + if artists: + uploader = artists[0]['name'] + view_count = int_or_none(video_info.get('views', {}).get('total')) + + for video_version in video_versions: + version = self._VERSIONS.get(video_version['version']) + version_url = video_version.get('url') + if not version_url: + continue + + if '.mpd' in version_url or '.ism' in version_url: + continue + elif '.m3u8' in version_url: formats.extend(self._extract_m3u8_formats( - renditions.find('rendition').attrib['url'], video_id, - 'mp4', 'm3u8_native', m3u8_id='hls-%s' % version, fatal=False)) - elif source_type == 'smil' and not smil_parsed: - formats.extend(self._extract_smil_formats( - renditions.find('rendition').attrib['url'], video_id, False)) - smil_parsed = True + version_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls-%s' % version, + note='Downloading %s m3u8 information' % version, + errnote='Failed to download %s m3u8 information' % version, + fatal=False)) + else: + m = re.search(r'''(?xi) + _(?P<width>[0-9]+)x(?P<height>[0-9]+) + _(?P<vcodec>[a-z0-9]+) + _(?P<vbr>[0-9]+) + _(?P<acodec>[a-z0-9]+) + _(?P<abr>[0-9]+) + \.(?P<ext>[a-z0-9]+)''', version_url) + if not m: + continue + + formats.append({ + 'url': version_url, + 'format_id': 'http-%s-%s' % (version, video_version['quality']), + 'vcodec': m.group('vcodec'), + 'acodec': m.group('acodec'), + 'vbr': int(m.group('vbr')), + 'abr': int(m.group('abr')), + 'ext': m.group('ext'), + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) + else: + timestamp = int_or_none(self._search_regex( + r'/Date\((\d+)\)/', + video_info['releaseDate'], 'release date', fatal=False), + scale=1000) + artists = video_info.get('mainArtists') + if artists: + uploader = artists[0]['artistName'] + + smil_parsed = False + for video_version in video_info['videoVersions']: + version = self._VERSIONS.get(video_version['version']) + if version == 'youtube': + return self.url_result( + video_version['id'], 'Youtube', video_version['id']) + else: + source_type = self._SOURCE_TYPES.get(video_version['sourceType']) + renditions = compat_etree_fromstring(video_version['data']) + if source_type == 'http': + for rend in renditions.findall('rendition'): + attr = rend.attrib + formats.append({ + 'url': attr['url'], + 'format_id': 'http-%s-%s' % (version, attr['name']), + 'height': int_or_none(attr.get('frameheight')), + 'width': int_or_none(attr.get('frameWidth')), + 'tbr': int_or_none(attr.get('totalBitrate')), + 'vbr': int_or_none(attr.get('videoBitrate')), + 'abr': int_or_none(attr.get('audioBitrate')), + 'vcodec': attr.get('videoCodec'), + 'acodec': attr.get('audioCodec'), + }) + elif source_type == 'hls': + formats.extend(self._extract_m3u8_formats( + renditions.find('rendition').attrib['url'], video_id, + 'mp4', 'm3u8_native', m3u8_id='hls-%s' % version, + note='Downloading %s m3u8 information' % version, + errnote='Failed to download %s m3u8 information' % version, + fatal=False)) + elif source_type == 'smil' and not smil_parsed: + formats.extend(self._extract_smil_formats( + renditions.find('rendition').attrib['url'], video_id, False)) + smil_parsed = True self._sort_formats(formats) + title = video_info['title'] + is_explicit = video_info.get('isExplicit') if is_explicit is True: age_limit = 18 @@ -207,21 +303,16 @@ class VevoIE(InfoExtractor): else: age_limit = None - timestamp = int_or_none(self._search_regex( - r'/Date\((\d+)\)/', - video_info['launchDate'], 'launch date', fatal=False), - scale=1000) if video_info else None - - duration = video_info.get('duration') or int_or_none( - self._html_search_meta('video:duration', webpage)) + duration = video_info.get('duration') return { 'id': video_id, 'title': title, 'formats': formats, - 'thumbnail': video_info.get('imageUrl'), + 'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'), 'timestamp': timestamp, - 'uploader': video_info['mainArtists'][0]['artistName'] if video_info else None, + 'uploader': uploader, 'duration': duration, + 'view_count': view_count, 'age_limit': age_limit, } From f693213567010ecc108447cba4615ae2932d1c18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 26 Jan 2016 20:42:20 +0600 Subject: [PATCH 036/110] [cspan] Fix clip/prog id extraction (#8317) --- youtube_dl/extractor/cspan.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index b3ee67018..f78cbbd7e 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -68,11 +68,16 @@ class CSpanIE(InfoExtractor): video_type, video_id = matches.groups() video_type = 'clip' if video_type == 'id' else 'program' else: - senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) - if senate_isvp_url: - title = self._og_search_title(webpage) - surl = smuggle_url(senate_isvp_url, {'force_title': title}) - return self.url_result(surl, 'SenateISVP', video_id, title) + m = re.search(r'data-(?P<type>clip|prog)id=["\'](?P<id>\d+)', webpage) + if m: + video_id = m.group('id') + video_type = 'program' if m.group('type') == 'prog' else 'clip' + else: + senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) + if senate_isvp_url: + title = self._og_search_title(webpage) + surl = smuggle_url(senate_isvp_url, {'force_title': title}) + return self.url_result(surl, 'SenateISVP', video_id, title) if video_type is None or video_id is None: raise ExtractorError('unable to find video id and type') From 682f8c43b50d8b5e2c02e34187cbddb0d5f8d3ed Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 26 Jan 2016 15:54:32 +0100 Subject: [PATCH 037/110] [vevo] fallback to youtube video only if vevo video is geo restricted(fixes 8263)(fixes 2874) --- youtube_dl/extractor/vevo.py | 93 ++++++++++++++---------------------- 1 file changed, 36 insertions(+), 57 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 9c955c895..35fcff1b2 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -26,67 +26,52 @@ class VevoIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', - 'md5': '2dbc7e9fd4f1c60436c9aa73a5406193', + 'md5': '95ee28ee45e70130e3ab02b0f579ae23', 'info_dict': { - 'id': 'Pt1kc_FniKM', + 'id': 'GB1101300280', 'ext': 'mp4', - 'title': 'Hurts - Somebody to Die For', - 'description': 'md5:13e925b89af6b01c7e417332bd23c4bf', - 'uploader_id': 'HurtsVEVO', - 'uploader': 'HurtsVEVO', + 'title': 'Somebody to Die For', 'upload_date': '20130624', - 'duration': 230, + 'uploader': 'Hurts', + 'timestamp': 1372057200, }, - 'add_ie': ['Youtube'], }, { 'note': 'v3 SMIL format', 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', - 'md5': '13d5204f520af905eeffa675040b8e76', + 'md5': 'f6ab09b034f8c22969020b042e5ac7fc', 'info_dict': { - 'id': 'ByGmQn1uxJw', + 'id': 'USUV71302923', 'ext': 'mp4', - 'title': 'Cassadee Pope - I Wish I Could Break Your Heart', - 'description': 'md5:5e9721c92ef117a6f69d00e9b42ceba7', - 'uploader_id': 'CassadeeVEVO', - 'uploader': 'CassadeeVEVO', + 'title': 'I Wish I Could Break Your Heart', 'upload_date': '20140219', - 'duration': 226, - 'age_limit': 0, + 'uploader': 'Cassadee Pope', + 'timestamp': 1392796919, }, - 'add_ie': ['Youtube'], }, { 'note': 'Age-limited video', 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', 'info_dict': { - 'id': '07FYdnEawAQ', + 'id': 'USRV81300282', 'ext': 'mp4', - 'age_limit': 18, - 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', - 'description': 'md5:64249768eec3bc4276236606ea996373', - 'uploader_id': 'justintimberlakeVEVO', - 'uploader': 'justintimberlakeVEVO', + 'title': 'Tunnel Vision (Explicit)', 'upload_date': '20130703', - 'duration': 419, + 'age_limit': 18, + 'uploader': 'Justin Timberlake', + 'timestamp': 1372888800, }, - 'params': { - 'skip_download': 'true', - }, - 'add_ie': ['Youtube'], }, { 'note': 'No video_info', 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', - 'md5': 'a8b84d1d1957cd01046441b701b270fb', + 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0', 'info_dict': { - 'id': 'Lad2jHtJCqY', + 'id': 'USUV71503000', 'ext': 'mp4', - 'title': 'K Camp - Till I Die ft. T.I.', - 'description': 'md5:0694920ededdee4a14cfc39695cc8ec3', - 'uploader_id': 'KCampVEVO', - 'uploader': 'KCampVEVO', + 'title': 'Till I Die', 'upload_date': '20151207', - 'duration': 193, + 'age_limit': 18, + 'uploader': 'K Camp', + 'timestamp': 1449468000, }, - 'add_ie': ['Youtube'], }] _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com' _SOURCE_TYPES = { @@ -146,7 +131,7 @@ class VevoIE(InfoExtractor): }) return formats - def _initialize_api(self, video_url, video_id): + def _initialize_api(self, video_id): req = sanitized_Request( 'http://www.vevo.com/auth', data=b'') webpage = self._download_webpage( @@ -155,7 +140,8 @@ class VevoIE(InfoExtractor): errnote='Unable to retrieve oauth token') if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage: - raise ExtractorError('%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True) + raise ExtractorError( + '%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True) auth_info = self._parse_json(webpage, video_id) self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token'] @@ -167,7 +153,8 @@ class VevoIE(InfoExtractor): video_id = self._match_id(url) json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id - response = self._download_json(json_url, video_id, 'Downloading video info', 'Unable to download info') + response = self._download_json( + json_url, video_id, 'Downloading video info', 'Unable to download info') video_info = response.get('video') or {} video_versions = video_info.get('videoVersions') uploader = None @@ -176,30 +163,23 @@ class VevoIE(InfoExtractor): formats = [] if not video_info: - ytid = response.get('errorInfo', {}).get('ytid') - if ytid: - return self.url_result(ytid, 'Youtube', ytid) - if response.get('statusCode') != 909: + ytid = response.get('errorInfo', {}).get('ytid') + if ytid: + self.report_warning( + 'Video is geoblocked, trying with the YouTube video %s' % ytid) + return self.url_result(ytid, 'Youtube', ytid) + if 'statusMessage' in response: raise ExtractorError('%s said: %s' % ( self.IE_NAME, response['statusMessage']), expected=True) raise ExtractorError('Unable to extract videos') - if url.startswith('vevo:'): - raise ExtractorError( - 'Please specify full Vevo URL for downloading', expected=True) - - self._initialize_api(url, video_id) + self._initialize_api(video_id) video_info = self._call_api( 'video/%s' % video_id, video_id, 'Downloading api video info', 'Failed to download video info') - ytid = video_info.get('youTubeId') - if ytid: - return self.url_result( - ytid, 'Youtube', ytid) - video_versions = self._call_api( 'video/%s/streams' % video_id, video_id, 'Downloading video versions info', @@ -215,7 +195,7 @@ class VevoIE(InfoExtractor): version = self._VERSIONS.get(video_version['version']) version_url = video_version.get('url') if not version_url: - continue + continue if '.mpd' in version_url or '.ism' in version_url: continue @@ -261,8 +241,7 @@ class VevoIE(InfoExtractor): for video_version in video_info['videoVersions']: version = self._VERSIONS.get(video_version['version']) if version == 'youtube': - return self.url_result( - video_version['id'], 'Youtube', video_version['id']) + continue else: source_type = self._SOURCE_TYPES.get(video_version['sourceType']) renditions = compat_etree_fromstring(video_version['data']) @@ -287,7 +266,7 @@ class VevoIE(InfoExtractor): note='Downloading %s m3u8 information' % version, errnote='Failed to download %s m3u8 information' % version, fatal=False)) - elif source_type == 'smil' and not smil_parsed: + elif source_type == 'smil' and version == 'level3' and not smil_parsed: formats.extend(self._extract_smil_formats( renditions.find('rendition').attrib['url'], video_id, False)) smil_parsed = True From af9c2a07aea530b3bee560a953e94ac92fcd49c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 26 Jan 2016 21:29:42 +0600 Subject: [PATCH 038/110] [cspan] Extract from path when no qualities (Closes #8317) --- youtube_dl/extractor/cspan.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index f78cbbd7e..b78edf729 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -112,6 +112,13 @@ class CSpanIE(InfoExtractor): 'height': int_or_none(get_text_attr(quality, 'height')), 'tbr': int_or_none(get_text_attr(quality, 'bitrate')), }) + if not formats: + path = get_text_attr(f, 'path') + if not path: + continue + formats = self._extract_m3u8_formats( + path, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }] self._sort_formats(formats) entries.append({ 'id': '%s_%d' % (video_id, partnum + 1), From fb4b3458000bff87f2b083f9a5e6853c043d032d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 26 Jan 2016 21:46:51 +0600 Subject: [PATCH 039/110] [instagram] Make description optional (Closes #8326) --- youtube_dl/extractor/instagram.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index e5e16ca3b..ed3e07118 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -21,6 +21,18 @@ class InstagramIE(InfoExtractor): 'title': 'Video by naomipq', 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', } + }, { + # missing description + 'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears', + 'info_dict': { + 'id': 'BA-pQFBG8HZ', + 'ext': 'mp4', + 'uploader_id': 'britneyspears', + 'title': 'Video by britneyspears', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://instagram.com/p/-Cmh1cukG2/', 'only_matching': True, @@ -32,8 +44,8 @@ class InstagramIE(InfoExtractor): webpage = self._download_webpage(url, video_id) uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', webpage, 'uploader id', fatal=False) - desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description', - fatal=False) + desc = self._search_regex( + r'"caption":"(.+?)"', webpage, 'description', default=None) return { 'id': video_id, From b6c33fd544adddf399a80842eb9a7d2250aa04b6 Mon Sep 17 00:00:00 2001 From: ping <liping.ong@gmail.com> Date: Wed, 27 Jan 2016 12:48:00 +0800 Subject: [PATCH 040/110] [daum.net] Fixes #8331 --- youtube_dl/extractor/daum.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index f08f57157..c84302c43 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -2,6 +2,8 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_urllib_parse from ..utils import ( @@ -28,6 +30,15 @@ class DaumIE(InfoExtractor): 'comment_count': int, }, }, { + 'url': 'http://m.tvpot.daum.net/v/65139429', + 'info_dict': { + 'id': '65139429', + 'ext': 'mp4', + 'title': 'md5:a100d65d09cec246d8aa9bde7de45aed', + 'description': 'md5:79794514261164ff27e36a21ad229fc5', + 'upload_date': '20150604', + 'duration': 154 + }, }, { 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', 'only_matching': True, }] @@ -42,6 +53,10 @@ class DaumIE(InfoExtractor): 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query, video_id, 'Downloading video formats info') + # For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid + if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id): + return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id) + formats = [] for format_el in movie_data['output_list']['output_list']: profile = format_el['profile'] @@ -76,7 +91,7 @@ class DaumIE(InfoExtractor): class DaumClipIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.do|mypot/View.do)\?.*?clipid=(?P<id>\d+)' + _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)' IE_NAME = 'daum.net:clip' _TESTS = [{ @@ -90,6 +105,9 @@ class DaumClipIE(InfoExtractor): 'duration': 3868, 'view_count': int, }, + }, { + 'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425', + 'only_matching': True, }] def _real_extract(self, url): From fab6f0e65b0084ce8b29c0810b874d132ffcd47c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 27 Jan 2016 08:32:03 +0100 Subject: [PATCH 041/110] release 2016.01.27 --- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e86467cfa..eb68c23b5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -180,6 +180,7 @@ - **ExpoTV** - **ExtremeTube** - **facebook** + - **facebook:post** - **faz.net** - **fc2** - **Fczenit** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d5bf73815..4ac7f9e93 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.01.23' +__version__ = '2016.01.27' From b0d21deda95e7e49e162c12aa305f27e477e0ce9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 27 Jan 2016 21:11:17 +0600 Subject: [PATCH 042/110] [extractor/common] Auto calculate tbr when missing --- youtube_dl/extractor/common.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2f574054d..f5a358388 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -825,6 +825,12 @@ class InfoExtractor(object): if not formats: raise ExtractorError('No video formats found') + for f in formats: + # Automatically determine tbr when missing based on abr and vbr (improves + # formats sorting in some cases) + if 'tbr' not in f and 'abr' in f and 'vbr' in f: + f['tbr'] = f['abr'] + f['vbr'] + def _formats_key(f): # TODO remove the following workaround from ..utils import determine_ext From 9339774af274b04a054b0fa151347619e40b5b21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 27 Jan 2016 21:49:39 +0600 Subject: [PATCH 043/110] [spankbang] Fix formats extraction --- youtube_dl/extractor/spankbang.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 7f060b15b..a67db5905 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -34,7 +34,7 @@ class SpankBangIE(InfoExtractor): 'ext': 'mp4', 'format_id': '%sp' % height, 'height': int(height), - } for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)] + } for height in re.findall(r'<(?:span|li)[^>]+q_(\d+)p', webpage)] self._sort_formats(formats) title = self._html_search_regex( From c3111ab34fed3af16f645c6ee4dcd6e5d35b69dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 27 Jan 2016 21:49:56 +0600 Subject: [PATCH 044/110] [spankbang] Fix title extraction (Closes #8329) --- youtube_dl/extractor/spankbang.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index a67db5905..3cfa671ed 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -38,7 +38,7 @@ class SpankBangIE(InfoExtractor): self._sort_formats(formats) title = self._html_search_regex( - r'(?s)<h1>(.+?)</h1>', webpage, 'title') + r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title') description = self._search_regex( r'class="desc"[^>]*>([^<]+)', webpage, 'description', default=None) From 7f32e5dc350e2a641e6304855727623f95bef31c Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 26 Jan 2016 17:44:44 +0100 Subject: [PATCH 045/110] [extractor/common] detect media playlist in _extract_m3u8_formats --- youtube_dl/extractor/common.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2f574054d..11191c173 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1014,6 +1014,18 @@ class InfoExtractor(object): return [] m3u8_doc, urlh = res m3u8_url = urlh.geturl() + # A Media Playlist Tag MUST NOT appear in a Master Playlist + # https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3 + # The EXT-X-TARGETDURATION tag is REQUIRED for every M3U8 Media Playlists + # https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1 + if '#EXT-X-TARGETDURATION' in m3u8_doc: + return [{ + 'url': m3u8_url, + 'format_id': m3u8_id, + 'ext': ext, + 'protocol': entry_protocol, + 'preference': preference, + }] last_info = None last_media = None kv_rex = re.compile( @@ -1164,6 +1176,7 @@ class InfoExtractor(object): formats = [] rtmp_count = 0 http_count = 0 + m3u8_count = 0 videos = smil.findall(self._xpath_ns('.//video', namespace)) for video in videos: @@ -1203,8 +1216,17 @@ class InfoExtractor(object): src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src) if proto == 'm3u8' or src_ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)) + m3u8_formats = self._extract_m3u8_formats( + src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False) + if len(m3u8_formats) == 1: + m3u8_count += 1 + m3u8_formats[0].update({ + 'format_id': 'hls-%d' % (m3u8_count if bitrate is None else bitrate), + 'tbr': bitrate, + 'width': width, + 'height': height, + }) + formats.extend(m3u8_formats) continue if src_ext == 'f4m': From f125d9115b262ccead54a31c4e64b0c71ad65721 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Wed, 27 Jan 2016 19:10:34 +0100 Subject: [PATCH 046/110] [cbsnews] extract all formats --- youtube_dl/extractor/cbsnews.py | 46 +++++++++++---------------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 480435e26..6db66e886 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -4,11 +4,11 @@ from __future__ import unicode_literals import re import json -from .common import InfoExtractor +from .theplatform import ThePlatformIE from ..utils import remove_start -class CBSNewsIE(InfoExtractor): +class CBSNewsIE(ThePlatformIE): IE_DESC = 'CBS News' _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)' @@ -31,7 +31,7 @@ class CBSNewsIE(InfoExtractor): 'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', 'info_dict': { 'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 205, @@ -42,7 +42,7 @@ class CBSNewsIE(InfoExtractor): }, }, 'params': { - # rtmp download + # m3u8 download 'skip_download': True, }, }, @@ -63,33 +63,6 @@ class CBSNewsIE(InfoExtractor): duration = item.get('duration') thumbnail = item.get('mediaImage') or item.get('thumbnail') - formats = [] - for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']: - uri = item.get('media' + format_id + 'URI') - if not uri: - continue - uri = remove_start(uri, '{manifest:none}') - fmt = { - 'url': uri, - 'format_id': format_id, - } - if uri.startswith('rtmp'): - play_path = re.sub( - r'{slistFilePath}', '', - uri.split('<break>')[-1].split('{break}')[-1]) - play_path = re.sub( - r'{manifest:.+}.*$', '', play_path) - fmt.update({ - 'app': 'ondemand?auth=cbs', - 'play_path': 'mp4:' + play_path, - 'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf', - 'page_url': 'http://www.cbsnews.com', - 'ext': 'flv', - }) - elif uri.endswith('.m3u8'): - fmt['ext'] = 'mp4' - formats.append(fmt) - subtitles = {} if 'mpxRefId' in video_info: subtitles['en'] = [{ @@ -97,6 +70,17 @@ class CBSNewsIE(InfoExtractor): 'url': 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId'], }] + formats = [] + for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']: + pid = item.get('media' + format_id) + if not pid: + continue + release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&mbr=true' % pid + tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid) + formats.extend(tp_formats) + subtitles = self._merge_subtitles(subtitles, tp_subtitles) + self._sort_formats(formats) + return { 'id': video_id, 'title': title, From ed7cd1e859cf97e975a28a5e8c58a1d1aca819fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 28 Jan 2016 00:42:04 +0600 Subject: [PATCH 047/110] [cbsnews] Remove unused import --- youtube_dl/extractor/cbsnews.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 6db66e886..cabf7e73b 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -5,7 +5,6 @@ import re import json from .theplatform import ThePlatformIE -from ..utils import remove_start class CBSNewsIE(ThePlatformIE): From b0df5223be5429da0378841dc33eecb532f6898f Mon Sep 17 00:00:00 2001 From: dyn888 <dyn.8.8.8+github@gmail.com> Date: Thu, 28 Jan 2016 12:07:15 +0100 Subject: [PATCH 048/110] Update YoutubeDL.py --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 09d2b18f2..e1bd40843 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -906,7 +906,7 @@ class YoutubeDL(object): str_operator_rex = re.compile(r'''(?x) \s*(?P<key>ext|acodec|vcodec|container|protocol) \s*(?P<op>%s)(?P<none_inclusive>\s*\?)? - \s*(?P<value>[a-zA-Z0-9_-]+) + \s*(?P<value>[a-zA-Z0-9._-]+) \s*$ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) m = str_operator_rex.search(filter_spec) From b913348d5fa1130ab1c65576e458407f262df6e1 Mon Sep 17 00:00:00 2001 From: dyn888 <dyn.8.8.8+github@gmail.com> Date: Thu, 28 Jan 2016 15:07:33 +0100 Subject: [PATCH 049/110] Test codec with a dot '.' in name selection. --- test/test_YoutubeDL.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 0caa43843..b53cfbe78 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -221,6 +221,16 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'dash-video-low') + formats = [ + {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'bestvideo[vcodec=avc1.123456]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot') + def test_youtube_format_selection(self): order = [ '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13', From 29f46c2bee3641efcf2c06c3d173baf7a41010af Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 28 Jan 2016 22:56:00 +0800 Subject: [PATCH 050/110] Credit @dyn888 for improving format selection [ci skip] --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index bb1f2d8d9..a46799506 100644 --- a/AUTHORS +++ b/AUTHORS @@ -155,3 +155,4 @@ Vignesh Venkat Tom Gijselinck Founder Fang Andrew Alexeyew +Saso Bezlaj From 38c84acae512f1b218e5146f4b2ddcac6b1a5e13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 28 Jan 2016 22:50:18 +0600 Subject: [PATCH 051/110] [ndr:embed:base] Add missing ext for m3u8 --- youtube_dl/extractor/ndr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 894c51399..0cded6b5c 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -193,7 +193,7 @@ class NDREmbedBaseIE(InfoExtractor): src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, f4m_id='hds')) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - src, video_id, m3u8_id='hls', entry_protocol='m3u8_native')) + src, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native')) else: quality = f.get('quality') ff = { From 10e6ed93417fb51c606af2e3e47b9a5a094dd6ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 28 Jan 2016 22:56:49 +0600 Subject: [PATCH 052/110] [ok] Add support for mobile URLs (Closes #8345) --- youtube_dl/extractor/odnoklassniki.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 184c7a323..f9e064a60 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -13,7 +13,7 @@ from ..utils import ( class OdnoklassnikiIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)' + _VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)' _TESTS = [{ # metadata in JSON 'url': 'http://ok.ru/video/20079905452', @@ -69,6 +69,12 @@ class OdnoklassnikiIE(InfoExtractor): }, { 'url': 'http://www.ok.ru/videoembed/20648036891', 'only_matching': True, + }, { + 'url': 'http://m.ok.ru/video/20079905452', + 'only_matching': True, + }, { + 'url': 'http://mobile.ok.ru/video/20079905452', + 'only_matching': True, }] def _real_extract(self, url): From 50e989e2636fc59ed896cc021b1b594bd10e9e17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 28 Jan 2016 23:19:53 +0600 Subject: [PATCH 053/110] [bbc] Add another title regex (Closes #8340) --- youtube_dl/extractor/bbc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 1c493b72d..210ce568b 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -482,7 +482,8 @@ class BBCCoUkIE(InfoExtractor): if programme_id: formats, subtitles = self._download_media_selector(programme_id) title = self._og_search_title(webpage, default=None) or self._html_search_regex( - r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>', webpage, 'title') + (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>', + r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title') description = self._search_regex( r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>', webpage, 'description', default=None) From a853427427269549593a0217db203305332ac983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 28 Jan 2016 23:23:13 +0600 Subject: [PATCH 054/110] [bbc] Add another description regex --- youtube_dl/extractor/bbc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 210ce568b..5be0ff476 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -485,7 +485,8 @@ class BBCCoUkIE(InfoExtractor): (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>', r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title') description = self._search_regex( - r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>', + (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>', + r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'), webpage, 'description', default=None) if not description: description = self._html_search_meta('description', webpage) From 1ac6e794cb36af612db97007006fc7cf1468e049 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 28 Jan 2016 23:27:48 +0600 Subject: [PATCH 055/110] [bbc] Add test for #8147 --- youtube_dl/extractor/bbc.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 5be0ff476..6ddee686c 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -193,6 +193,19 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, + }, { + # compact player (https://github.com/rg3/youtube-dl/issues/8147) + 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player', + 'info_dict': { + 'id': 'p028bfkj', + 'ext': 'flv', + 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews', + 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, }, { 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4', 'only_matching': True, From 70029bc348f27294b7f3e369f953167c1893c2bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 29 Jan 2016 11:27:11 +0100 Subject: [PATCH 056/110] [youtube:user] Require 'https?://' in the url (fixes #8356) It was matching www.youtube.com/embed/WpfukLMe1TM. The generic extractor automatically adds http:// if it's missing. --- test/test_all_urls.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index a0c11e6c1..f5af184e6 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -56,7 +56,7 @@ class TestAllURLsMatching(unittest.TestCase): assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') def test_youtube_user_matching(self): - self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user']) + self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user']) def test_youtube_feeds(self): self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater']) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 92b9f3ae4..a24c73584 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1846,7 +1846,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): class YoutubeUserIE(YoutubeChannelIE): IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' - _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)' + _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)' _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos' IE_NAME = 'youtube:user' From 055f4172781dd2a43d60f17a91a1d0c1a5f3e6b9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 29 Jan 2016 12:20:08 +0100 Subject: [PATCH 057/110] release 2016.01.29 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4ac7f9e93..d9f1e22b0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.01.27' +__version__ = '2016.01.29' From 2b4f5e68d1517bcadac4b25ecbac3b143104b1c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 29 Jan 2016 15:36:33 +0100 Subject: [PATCH 058/110] [azubu] Add extractor for live streams (closes #8343) --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/azubu.py | 40 +++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 532be7e4c..5e0d7d3dc 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -50,7 +50,7 @@ from .atresplayer import AtresPlayerIE from .atttechchannel import ATTTechChannelIE from .audimedia import AudiMediaIE from .audiomack import AudiomackIE, AudiomackAlbumIE -from .azubu import AzubuIE +from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index 0961d339f..011edf128 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -3,7 +3,11 @@ from __future__ import unicode_literals import json from .common import InfoExtractor -from ..utils import float_or_none +from ..utils import ( + ExtractorError, + float_or_none, + sanitized_Request, +) class AzubuIE(InfoExtractor): @@ -91,3 +95,37 @@ class AzubuIE(InfoExtractor): 'view_count': view_count, 'formats': formats, } + + +class AzubuLiveIE(InfoExtractor): + _VALID_URL = r'http://www.azubu.tv/(?P<id>[^/]+)$' + + _TEST = { + 'url': 'http://www.azubu.tv/MarsTVMDLen', + 'only_matching': True, + } + + def _real_extract(self, url): + user = self._match_id(url) + + info = self._download_json( + 'http://api.azubu.tv/public/modules/last-video/{0}/info'.format(user), + user)['data'] + if info['type'] != 'STREAM': + raise ExtractorError('{0} is not streaming live'.format(user), expected=True) + + req = sanitized_Request( + 'https://edge-elb.api.brightcove.com/playback/v1/accounts/3361910549001/videos/ref:' + info['reference_id']) + req.add_header('Accept', 'application/json;pk=BCpkADawqM1gvI0oGWg8dxQHlgT8HkdE2LnAlWAZkOlznO39bSZX726u4JqnDsK3MDXcO01JxXK2tZtJbgQChxgaFzEVdHRjaDoxaOu8hHOO8NYhwdxw9BzvgkvLUlpbDNUuDoc4E4wxDToV') + bc_info = self._download_json(req, user) + m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS') + formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4') + + return { + 'id': info['id'], + 'title': self._live_title(info['title']), + 'uploader_id': user, + 'formats': formats, + 'is_live': True, + 'thumbnail': bc_info['poster'], + } From 68a0ea15b4c20ed0174a82ee79a6d3c3474b0f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 Jan 2016 00:26:33 +0600 Subject: [PATCH 059/110] [cspan] Unescape path (Closes #8365) --- youtube_dl/extractor/cspan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index b78edf729..b8b9d058d 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -113,7 +113,7 @@ class CSpanIE(InfoExtractor): 'tbr': int_or_none(get_text_attr(quality, 'bitrate')), }) if not formats: - path = get_text_attr(f, 'path') + path = unescapeHTML(get_text_attr(f, 'path')) if not path: continue formats = self._extract_m3u8_formats( From 350cf045d8f86497e8d79ae193b40cc44c8e670c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 Jan 2016 01:47:46 +0600 Subject: [PATCH 060/110] [extractor/common] Restrict checks when auto calculating tbr --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 33290fd74..b3d57dfce 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -828,7 +828,7 @@ class InfoExtractor(object): for f in formats: # Automatically determine tbr when missing based on abr and vbr (improves # formats sorting in some cases) - if 'tbr' not in f and 'abr' in f and 'vbr' in f: + if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None: f['tbr'] = f['abr'] + f['vbr'] def _formats_key(f): From 83ab8a79ccc3b6ef143f7d636c0118f7c3e5777b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 Jan 2016 01:48:54 +0600 Subject: [PATCH 061/110] [espn] Improve video id extraction (Closes #8368) --- youtube_dl/extractor/espn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 3762d8748..db4b263bc 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -53,8 +53,8 @@ class ESPNIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_id = self._search_regex( - r'class="video-play-button"[^>]+data-id="(\d+)', - webpage, 'video id') + r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)', + webpage, 'video id', group='id') cms = 'espn' if 'data-source="intl"' in webpage: From e047922be01ca346cee8f53b7972773d64eed6f4 Mon Sep 17 00:00:00 2001 From: ping <liping.ong@gmail.com> Date: Sat, 30 Jan 2016 11:04:11 +0800 Subject: [PATCH 062/110] [daum] Fix copy-paste mistake --- youtube_dl/extractor/daum.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index c84302c43..0d74e5207 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -38,7 +38,8 @@ class DaumIE(InfoExtractor): 'description': 'md5:79794514261164ff27e36a21ad229fc5', 'upload_date': '20150604', 'duration': 154 - }, }, { + }, + }, { 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', 'only_matching': True, }] From f5d30d521c63b4eec4aa4386365054222e354231 Mon Sep 17 00:00:00 2001 From: ping <liping.ong@gmail.com> Date: Sat, 30 Jan 2016 11:09:30 +0800 Subject: [PATCH 063/110] [daum] Fix add view_count, comment_count to test --- youtube_dl/extractor/daum.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 0d74e5207..567abdc39 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -37,7 +37,9 @@ class DaumIE(InfoExtractor): 'title': 'md5:a100d65d09cec246d8aa9bde7de45aed', 'description': 'md5:79794514261164ff27e36a21ad229fc5', 'upload_date': '20150604', - 'duration': 154 + 'duration': 154, + 'view_count': int, + 'comment_count': int, }, }, { 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', From 06ffa3348590f9aa6ddca7ca8f78424c49314560 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 30 Jan 2016 16:23:37 +0800 Subject: [PATCH 064/110] [daum.net] Move the request to ClipInfoXml.do To reduce the number of wasted requests --- youtube_dl/extractor/daum.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 567abdc39..543eb5c3a 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -49,9 +49,6 @@ class DaumIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) query = compat_urllib_parse.urlencode({'vid': video_id}) - info = self._download_xml( - 'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, - 'Downloading video info') movie_data = self._download_json( 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query, video_id, 'Downloading video formats info') @@ -60,6 +57,10 @@ class DaumIE(InfoExtractor): if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id): return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id) + info = self._download_xml( + 'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, + 'Downloading video info') + formats = [] for format_el in movie_data['output_list']['output_list']: profile = format_el['profile'] From 830afe85dc79e034ffbe7f3ec1d049482ed651de Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 30 Jan 2016 16:50:13 +0800 Subject: [PATCH 065/110] [daum.net] Support VodPlayer.swf URLs (closes #8173) --- youtube_dl/extractor/daum.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 543eb5c3a..ea6a9d848 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -5,7 +5,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import ( + compat_urllib_parse, + compat_urllib_parse_unquote, +) from ..utils import ( int_or_none, str_to_int, @@ -14,7 +17,7 @@ from ..utils import ( class DaumIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/v/(?P<id>[^?#&]+)' + _VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)' IE_NAME = 'daum.net' _TESTS = [{ @@ -44,10 +47,23 @@ class DaumIE(InfoExtractor): }, { 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', 'only_matching': True, + }, { + 'url': 'http://videofarm.daum.net/controller/player/VodPlayer.swf?vid=vwIpVpCQsT8%24&ref=', + 'info_dict': { + 'id': 'vwIpVpCQsT8$', + 'ext': 'flv', + 'title': '01-Korean War ( Trouble on the horizon )', + 'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름', + 'upload_date': '20080223', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 249, + 'view_count': int, + 'comment_count': int, + }, }] def _real_extract(self, url): - video_id = self._match_id(url) + video_id = compat_urllib_parse_unquote(self._match_id(url)) query = compat_urllib_parse.urlencode({'vid': video_id}) movie_data = self._download_json( 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query, From 0179f6a8301e8dee1f435289f89cf1c748a13b16 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 30 Jan 2016 16:54:14 +0800 Subject: [PATCH 066/110] [daum] Add 'thumbnail' to all _TESTS --- youtube_dl/extractor/daum.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index ea6a9d848..9bc345f60 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -28,6 +28,7 @@ class DaumIE(InfoExtractor): 'title': '마크 헌트 vs 안토니오 실바', 'description': 'Mark Hunt vs Antonio Silva', 'upload_date': '20131217', + 'thumbnail': 're:^https?://.*\.(?:jpg|png)', 'duration': 2117, 'view_count': int, 'comment_count': int, @@ -40,6 +41,7 @@ class DaumIE(InfoExtractor): 'title': 'md5:a100d65d09cec246d8aa9bde7de45aed', 'description': 'md5:79794514261164ff27e36a21ad229fc5', 'upload_date': '20150604', + 'thumbnail': 're:^https?://.*\.(?:jpg|png)', 'duration': 154, 'view_count': int, 'comment_count': int, @@ -55,7 +57,7 @@ class DaumIE(InfoExtractor): 'title': '01-Korean War ( Trouble on the horizon )', 'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름', 'upload_date': '20080223', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': 're:^https?://.*\.(?:jpg|png)', 'duration': 249, 'view_count': int, 'comment_count': int, @@ -122,6 +124,7 @@ class DaumClipIE(InfoExtractor): 'title': 'DOTA 2GETHER 시즌2 6회 - 2부', 'description': 'DOTA 2GETHER 시즌2 6회 - 2부', 'upload_date': '20130831', + 'thumbnail': 're:^https?://.*\.(?:jpg|png)', 'duration': 3868, 'view_count': int, }, From 7d106a65ca30cc3201f147cb96aa98a15a48d6d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 30 Jan 2016 12:26:40 +0100 Subject: [PATCH 067/110] Add --hls-use-mpegts option When using the mpegts container hls vidoes can be played while being downloaded (useful if you are recording a live stream). VLC and mpv play them file, but QuickTime doesn't. --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 1 + youtube_dl/downloader/common.py | 1 + youtube_dl/downloader/hls.py | 6 +++++- youtube_dl/options.py | 5 +++++ 5 files changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e1bd40843..2a3d6cd4a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -263,7 +263,7 @@ class YoutubeDL(object): the downloader (see youtube_dl/downloader/common.py): nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle, - xattr_set_filesize, external_downloader_args. + xattr_set_filesize, external_downloader_args, hls_use_mpegts. The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 9f131f5db..f5f064241 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -369,6 +369,7 @@ def _real_main(argv=None): 'no_color': opts.no_color, 'ffmpeg_location': opts.ffmpeg_location, 'hls_prefer_native': opts.hls_prefer_native, + 'hls_use_mpegts': opts.hls_use_mpegts, 'external_downloader_args': external_downloader_args, 'postprocessor_args': postprocessor_args, 'cn_verification_proxy': opts.cn_verification_proxy, diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index fc7521598..de815612c 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -45,6 +45,7 @@ class FileDownloader(object): (experimental) external_downloader_args: A list of additional command-line arguments for the external downloader. + hls_use_mpegts: Use the mpegts container for HLS videos. Subclasses of this one must re-define the real_download method. """ diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 10b83c6b2..cb34dc4ab 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -39,7 +39,11 @@ class HlsFD(FileDownloader): '-headers', ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] - args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc'] + args += ['-i', url, '-c', 'copy'] + if self.params.get('hls_use_mpegts', False): + args += ['-f', 'mpegts'] + else: + args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] args = [encodeArgument(opt) for opt in args] args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 433245f00..39fc4306a 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -415,6 +415,11 @@ def parseOpts(overrideArguments=None): '--hls-prefer-native', dest='hls_prefer_native', action='store_true', help='Use the native HLS downloader instead of ffmpeg (experimental)') + downloader.add_option( + '--hls-use-mpegts', + dest='hls_use_mpegts', action='store_true', + help='Use the mpegts container for HLS videos, allowing to play the ' + 'video while downloading (some players may not be able to play it') downloader.add_option( '--external-downloader', dest='external_downloader', metavar='COMMAND', From c1406299959b780e7b86b567bd4eeecc8556dce0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 30 Jan 2016 19:30:39 +0800 Subject: [PATCH 068/110] [facebook] Support alternative webpage form Fixes #8371 --- youtube_dl/extractor/facebook.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index cb5dd57fb..899b0896b 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -150,10 +150,32 @@ class FacebookIE(InfoExtractor): url = 'https://www.facebook.com/video/video.php?v=%s' % video_id webpage = self._download_webpage(url, video_id) + video_data = None + BEFORE = '{swf.addParam(param[0], param[1]);});\n' AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage) - if not m: + if m: + data = dict(json.loads(m.group(1))) + params_raw = compat_urllib_parse_unquote(data['params']) + video_data = json.loads(params_raw)['video_data'] + + def video_data_list2dict(video_data): + ret = {} + for item in video_data: + format_id = item['stream_type'] + ret.setdefault(format_id, []).append(item) + return ret + + if not video_data: + server_js_data = self._parse_json(self._search_regex( + r'handleServerJS\(({.+})\);', webpage, 'server js data'), video_id) + for item in server_js_data['instances']: + if item[1][0] == 'VideoConfig': + video_data = video_data_list2dict(item[2][0]['videoData']) + break + + if not video_data: m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) if m_msg is not None: raise ExtractorError( @@ -161,12 +183,9 @@ class FacebookIE(InfoExtractor): expected=True) else: raise ExtractorError('Cannot parse data') - data = dict(json.loads(m.group(1))) - params_raw = compat_urllib_parse_unquote(data['params']) - params = json.loads(params_raw) formats = [] - for format_id, f in params['video_data'].items(): + for format_id, f in video_data.items(): if not f or not isinstance(f, list): continue for quality in ('sd', 'hd'): From 8ab3fe81d84cb541eac3af73404e71d979967dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 Jan 2016 18:28:38 +0600 Subject: [PATCH 069/110] [downloader/f4m] Prefer bootstrap url attribute over inline bootstrap info --- youtube_dl/downloader/f4m.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index aaf0c49c8..f8da04f69 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -273,15 +273,21 @@ class F4mFD(FragmentFD): return fragments_list def _parse_bootstrap_node(self, node, base_url): - if node.text is None: + # Sometimes non empty inline bootstrap info can be specified along + # with bootstrap url attribute (e.g. dummy inline bootstrap info + # contains whitespace characters in [1]). We will prefer bootstrap + # url over inline bootstrap info when present. + # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m + bootstrap_url = node.get('url') + if bootstrap_url: bootstrap_url = compat_urlparse.urljoin( - base_url, node.attrib['url']) + base_url, bootstrap_url) boot_info = self._get_bootstrap_from_url(bootstrap_url) else: bootstrap_url = None bootstrap = base64.b64decode(node.text.encode('ascii')) boot_info = read_bootstrap_info(bootstrap) - return (boot_info, bootstrap_url) + return boot_info, bootstrap_url def real_download(self, filename, info_dict): man_url = info_dict['url'] From c3deacd5621d9686a32614e550eb4726e66c3899 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 Jan 2016 18:30:27 +0600 Subject: [PATCH 070/110] [matchtv] Add extractor (Closes #8313) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/matchtv.py | 55 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/matchtv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5e0d7d3dc..e15495ec8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -372,6 +372,7 @@ from .macgamestore import MacGameStoreIE from .mailru import MailRuIE from .makertv import MakerTVIE from .malemotion import MalemotionIE +from .matchtv import MatchTVIE from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE diff --git a/youtube_dl/extractor/matchtv.py b/youtube_dl/extractor/matchtv.py new file mode 100644 index 000000000..28e0dfe63 --- /dev/null +++ b/youtube_dl/extractor/matchtv.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import random + +from .common import InfoExtractor +from ..compat import compat_urllib_parse +from ..utils import ( + sanitized_Request, + xpath_text, +) + + +class MatchTVIE(InfoExtractor): + _VALID_URL = r'https?://matchtv\.ru/?#live-player' + _TEST = { + 'url': 'http://matchtv.ru/#live-player', + 'info_dict': { + 'id': 'matchtv-live', + 'ext': 'flv', + 'title': 're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = 'matchtv-live' + request = sanitized_Request( + 'http://player.matchtv.ntvplus.tv/player/smil?%s' % compat_urllib_parse.urlencode({ + 'ts': '', + 'quality': 'SD', + 'contentId': '561d2c0df7159b37178b4567', + 'sign': '', + 'includeHighlights': '0', + 'userId': '', + 'sessionId': random.randint(1, 1000000000), + 'contentType': 'channel', + 'timeShift': '0', + 'platform': 'portal', + }), + headers={ + 'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf', + }) + video_url = self._download_json(request, video_id)['data']['videoUrl'] + f4m_url = xpath_text(self._download_xml(video_url, video_id), './to') + formats = self._extract_f4m_formats(f4m_url, video_id) + return { + 'id': video_id, + 'title': self._live_title('Матч ТВ - Прямой эфир'), + 'is_live': True, + 'formats': formats, + } From 53be8894e476f6e3a49373fbd34ecff157440607 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 Jan 2016 18:44:22 +0600 Subject: [PATCH 071/110] [options] Add missing closing parenthesis --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 39fc4306a..2137dfb3f 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -419,7 +419,7 @@ def parseOpts(overrideArguments=None): '--hls-use-mpegts', dest='hls_use_mpegts', action='store_true', help='Use the mpegts container for HLS videos, allowing to play the ' - 'video while downloading (some players may not be able to play it') + 'video while downloading (some players may not be able to play it)') downloader.add_option( '--external-downloader', dest='external_downloader', metavar='COMMAND', From 5fa1702ca6d8a57b4f0ff108c6fe7488d09b448c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 Jan 2016 19:20:52 +0600 Subject: [PATCH 072/110] [downloader/fragment] Do not report total bytes estimation and eta for live streams --- youtube_dl/downloader/fragment.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 0c9113d0f..e2f34eec6 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -26,7 +26,11 @@ class FragmentFD(FileDownloader): self._start_frag_download(ctx) def _prepare_frag_download(self, ctx): - self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags'])) + if 'live' not in ctx: + ctx['live'] = False + self.to_screen( + '[%s] Total fragments: %s' + % (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)')) self.report_destination(ctx['filename']) dl = HttpQuietDownloader( self.ydl, @@ -74,13 +78,15 @@ class FragmentFD(FileDownloader): if s['status'] not in ('downloading', 'finished'): return + time_now = time.time() + frag_total_bytes = s.get('total_bytes') or 0 - estimated_size = ( - (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / - (state['frag_index'] + 1) * total_frags) - time_now = time.time() - state['total_bytes_estimate'] = estimated_size + if not ctx['live']: + estimated_size = ( + (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / + (state['frag_index'] + 1) * total_frags) + state['total_bytes_estimate'] = estimated_size state['elapsed'] = time_now - start if s['status'] == 'finished': @@ -91,9 +97,10 @@ class FragmentFD(FileDownloader): else: frag_downloaded_bytes = s['downloaded_bytes'] state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] - state['eta'] = self.calc_eta( - start, time_now, estimated_size, - state['downloaded_bytes']) + if not ctx['live']: + state['eta'] = self.calc_eta( + start, time_now, estimated_size, + state['downloaded_bytes']) state['speed'] = s.get('speed') ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes self._hook_progress(state) From 09104e9930cff6cc5e9d3ab1951fdbcbb5e96840 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 Jan 2016 19:22:15 +0600 Subject: [PATCH 073/110] [downloader/f4m] Add live stream flag to context Now download progress for f4m livestreams is reported correctly --- youtube_dl/downloader/f4m.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index f8da04f69..581fa7b71 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -332,6 +332,7 @@ class F4mFD(FragmentFD): ctx = { 'filename': filename, 'total_frags': total_frags, + 'live': live, } self._prepare_frag_download(ctx) From 2c2f1efdcd37f88196bb2b546e6d8bc7c22f3b6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 Jan 2016 19:30:31 +0600 Subject: [PATCH 074/110] [downloader/fragment] Remove superfluous whitespace --- youtube_dl/downloader/fragment.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index e2f34eec6..8b96eceb9 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -79,15 +79,13 @@ class FragmentFD(FileDownloader): return time_now = time.time() - + state['elapsed'] = time_now - start frag_total_bytes = s.get('total_bytes') or 0 - if not ctx['live']: estimated_size = ( (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / (state['frag_index'] + 1) * total_frags) state['total_bytes_estimate'] = estimated_size - state['elapsed'] = time_now - start if s['status'] == 'finished': state['frag_index'] += 1 From b8c9926c0a9d68fb636bf99e873d6a88de41f76b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 Jan 2016 19:43:25 +0600 Subject: [PATCH 075/110] [downloader/f4m] Do not update fragment list while test --- youtube_dl/downloader/f4m.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 581fa7b71..fc9642905 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -322,7 +322,8 @@ class F4mFD(FragmentFD): metadata = None fragments_list = build_fragments_list(boot_info) - if self.params.get('test', False): + test = self.params.get('test', False) + if test: # We only download the first fragment fragments_list = fragments_list[:1] total_frags = len(fragments_list) @@ -387,7 +388,7 @@ class F4mFD(FragmentFD): else: raise - if not fragments_list and live and bootstrap_url: + if not fragments_list and not test and live and bootstrap_url: fragments_list = self._update_live_fragments(bootstrap_url, frag_i) total_frags += len(fragments_list) if fragments_list and (fragments_list[0][1] > frag_i + 1): From 1bf996fa5cd996c5f586089388853e324dc3850d Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Sat, 30 Jan 2016 20:45:56 +0100 Subject: [PATCH 076/110] [generic] Add support for Limelight API --- youtube_dl/extractor/generic.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 26d3698c8..b18e734c4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1819,6 +1819,17 @@ class GenericIE(InfoExtractor): if digiteka_url: return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key()) + # Look for Limelight embeds + mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage) + if mobj: + lm = { + 'Media': 'media', + 'Channel': 'channel', + 'ChannelList': 'channel_list', + } + return self.url_result('limelight:%s:%s' % ( + lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2)) + # Look for AdobeTVVideo embeds mobj = re.search( r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', From 8f1fddc816dac7c0e9b216e85474aa8eb0d847bf Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Sat, 30 Jan 2016 20:51:47 +0100 Subject: [PATCH 077/110] [limelight] fix format sorting and make m3u8 and f4m extraction non fatal --- youtube_dl/extractor/limelight.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index fb03dd527..1a0625ac3 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -40,7 +40,8 @@ class LimelightBaseIE(InfoExtractor): if not stream_url: continue if '.f4m' in stream_url: - formats.extend(self._extract_f4m_formats(stream_url, video_id)) + formats.extend(self._extract_f4m_formats( + stream_url, video_id, fatal=False)) else: fmt = { 'url': stream_url, @@ -72,8 +73,8 @@ class LimelightBaseIE(InfoExtractor): format_id = mobile_url.get('targetMediaPlatform') if determine_ext(media_url) == 'm3u8': formats.extend(self._extract_m3u8_formats( - media_url, video_id, 'mp4', entry_protocol='m3u8_native', - preference=-1, m3u8_id=format_id)) + media_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=format_id, fatal=False)) else: formats.append({ 'url': media_url, From a7685b3a6b02be62ebd53e2978ab43b1cde3ec4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 31 Jan 2016 02:38:28 +0600 Subject: [PATCH 078/110] [npo] Add extension for m3u8 --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index eb12fb810..7cd5c9303 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -189,7 +189,7 @@ class NPOIE(NPOBaseIE): if not video_url: continue if format_id == 'adaptive': - formats.extend(self._extract_m3u8_formats(video_url, video_id)) + formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4')) else: formats.append({ 'url': video_url, From 60ad3eb9706861d4182ab44ee19d64350ca2e36e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 Jan 2016 03:21:33 +0800 Subject: [PATCH 079/110] [viidea] Skip download for the test case requiring ffmpeg --- youtube_dl/extractor/viidea.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/viidea.py b/youtube_dl/extractor/viidea.py index 525e303d4..315984bf9 100644 --- a/youtube_dl/extractor/viidea.py +++ b/youtube_dl/extractor/viidea.py @@ -45,6 +45,10 @@ class ViideaIE(InfoExtractor): 'upload_date': '20130627', 'duration': 565, }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { # video with invalid direct format links (HTTP 403) 'url': 'http://videolectures.net/russir2010_filippova_nlp/', From 7c38af48b91d050a5694e878a789a028fbcc0a3b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 Jan 2016 03:31:03 +0800 Subject: [PATCH 080/110] [vgtv] Fix test_VGTV_2 --- youtube_dl/extractor/vgtv.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 86ba70ed9..14e945d49 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -86,10 +86,9 @@ class VGTVIE(XstreamIE): { # streamType: wasLive 'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla', - 'md5': '458f4841239dab414343b50e5af8869c', 'info_dict': { 'id': '113063', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'V75 fra Solvalla 30.05.15', 'description': 'md5:b3743425765355855f88e096acc93231', 'thumbnail': 're:^https?://.*\.jpg', @@ -98,6 +97,10 @@ class VGTVIE(XstreamIE): 'upload_date': '20150530', 'view_count': int, }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more', From 809e1857c5155fb41d033b3c0215708eb41ddb55 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 Jan 2016 03:38:20 +0800 Subject: [PATCH 081/110] [screenwavemedia] Fix HLS extension and test_TeamFour --- youtube_dl/extractor/screenwavemedia.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index 05f93904c..e5d62a139 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -71,7 +71,7 @@ class ScreenwaveMediaIE(InfoExtractor): formats = [] for source in sources: if source['type'] == 'hls': - formats.extend(self._extract_m3u8_formats(source['file'], video_id)) + formats.extend(self._extract_m3u8_formats(source['file'], video_id, ext='mp4')) else: file_ = source.get('file') if not file_: @@ -107,7 +107,11 @@ class TeamFourIE(InfoExtractor): 'upload_date': '20130401', 'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar', 'title': 'A Moment With TFS Episode 4', - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, } def _real_extract(self, url): From ed1a390583f7adfcf38c4af5432b4b2e611004ab Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 Jan 2016 03:42:34 +0800 Subject: [PATCH 082/110] [tv2] Fix test_TV2 --- youtube_dl/extractor/tv2.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index fa338b936..1457e524e 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -17,18 +17,21 @@ class TV2IE(InfoExtractor): _VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)' _TEST = { 'url': 'http://www.tv2.no/v/916509/', - 'md5': '9cb9e3410b18b515d71892f27856e9b1', 'info_dict': { 'id': '916509', - 'ext': 'flv', - 'title': 'Se Gryttens hyllest av Steven Gerrard', + 'ext': 'mp4', + 'title': 'Se Frode Gryttens hyllest av Steven Gerrard', 'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.', 'timestamp': 1431715610, 'upload_date': '20150515', 'duration': 156.967, 'view_count': int, 'categories': list, - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, } def _real_extract(self, url): From eb6fc7d32a6f909496cc1a4b46fdcbf2cc6f344e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 Jan 2016 03:45:09 +0800 Subject: [PATCH 083/110] [senateisvp] Fix test_SenateISVP and test_SenateISVP_1 --- youtube_dl/extractor/senateisvp.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index 474ebb49b..990ea0fa8 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -53,17 +53,25 @@ class SenateISVPIE(InfoExtractor): 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'info_dict': { 'id': 'judiciary031715', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Integrated Senate Video Player', 'thumbnail': 're:^https?://.*\.(?:jpg|png)$', - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', 'info_dict': { 'id': 'commerce011514', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Integrated Senate Video Player' - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi', # checksum differs each time From 670ad51ade60141a7948aa68686db49dd2e30145 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 Jan 2016 04:01:45 +0800 Subject: [PATCH 084/110] [nrktv] Fix _TESTS --- youtube_dl/extractor/nrk.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 6ff13050d..a126f5054 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -133,26 +133,32 @@ class NRKTVIE(InfoExtractor): _TESTS = [ { 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', - 'md5': 'adf2c5454fa2bf032f47a9f8fb351342', 'info_dict': { 'id': 'MUHH48000314', - 'ext': 'flv', + 'ext': 'mp4', 'title': '20 spørsmål', 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', 'upload_date': '20140523', 'duration': 1741.52, }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'https://tv.nrk.no/program/mdfp15000514', - 'md5': '383650ece2b25ecec996ad7b5bb2a384', 'info_dict': { 'id': 'mdfp15000514', - 'ext': 'flv', - 'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting', + 'ext': 'mp4', + 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting', 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', 'upload_date': '20140524', - 'duration': 4605.0, + 'duration': 4605.08, + }, + 'params': { + # m3u8 download + 'skip_download': True, }, }, { From e881c4bcabb64f84e382a75bd9c03379859105d0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 Jan 2016 04:34:46 +0800 Subject: [PATCH 085/110] [nbc] Use NBC's id and fix _TESTS ThePlatform URL gives the same ID for all _TESTS --- youtube_dl/extractor/nbc.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 1dd54c2f1..18d01f423 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -19,32 +19,39 @@ class NBCIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.nbc.com/the-tonight-show/segments/112966', - # md5 checksum is not stable 'info_dict': { - 'id': 'c9xnCo0YPOPH', - 'ext': 'flv', + 'id': '112966', + 'ext': 'mp4', 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://www.nbc.com/the-tonight-show/episodes/176', 'info_dict': { - 'id': 'XwU9KZkp98TH', + 'id': '176', 'ext': 'flv', 'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen', 'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.', }, - 'skip': 'Only works from US', + 'skip': '404 Not Found', }, { 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821', 'info_dict': { - 'id': '8iUuyzWDdYUZ', - 'ext': 'flv', + 'id': '2832821', + 'ext': 'mp4', 'title': 'Star Wars Teaser', 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, 'skip': 'Only works from US', }, { @@ -66,7 +73,11 @@ class NBCIE(InfoExtractor): webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/'))) if theplatform_url.startswith('//'): theplatform_url = 'http:' + theplatform_url - return self.url_result(smuggle_url(theplatform_url, {'source_url': url})) + return { + '_type': 'url_transparent', + 'url': smuggle_url(theplatform_url, {'source_url': url}), + 'id': video_id, + } class NBCSportsVPlayerIE(InfoExtractor): From db9b1dbcd918bee41b3b38f1f1106ad5dec07bb7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 Jan 2016 04:49:04 +0800 Subject: [PATCH 086/110] [nba] Add ext for hls formats and fix test_NBA --- youtube_dl/extractor/nba.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index 9d26030d3..a071378b6 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -18,13 +18,17 @@ class NBAIE(InfoExtractor): 'md5': '9e7729d3010a9c71506fd1248f74e4f4', 'info_dict': { 'id': '0021200253-okc-bkn-recap', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Thunder vs. Nets', 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.', 'duration': 181, 'timestamp': 1354638466, 'upload_date': '20121204', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/', 'only_matching': True, @@ -68,7 +72,7 @@ class NBAIE(InfoExtractor): if video_url.startswith('/'): continue if video_url.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False)) + formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)) elif video_url.endswith('.f4m'): formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False)) else: From 89f2602880198b94f98bc41109771a52105e3cf8 Mon Sep 17 00:00:00 2001 From: rrooij <rderooij685@gmail.com> Date: Sat, 30 Jan 2016 20:31:31 +0100 Subject: [PATCH 087/110] [schooltv] Add extractor for SchoolTV playlists This closes #8163 --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/npo.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e15495ec8..dbdfb86c0 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -483,6 +483,7 @@ from .npo import ( NPOLiveIE, NPORadioIE, NPORadioFragmentIE, + SchoolTVIE, VPROIE, WNLIE ) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index eb12fb810..ab418edf4 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -406,6 +406,37 @@ class NPORadioFragmentIE(InfoExtractor): } +class SchoolTVIE(InfoExtractor): + IE_NAME = 'schooltv' + _VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P<id>[^/?#&]+)' + + _TEST = { + 'url': 'http://www.schooltv.nl/video/ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam/', + 'info_dict': { + 'id': 'WO_NTR_429477', + 'display_id': 'ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam', + 'title': 'Ademhaling: De hele dag haal je adem. Maar wat gebeurt er dan eigenlijk in je lichaam?', + 'ext': 'mp4', + 'description': 'md5:abfa0ff690adb73fd0297fd033aaa631' + }, + 'params': { + # Skip because of m3u8 download + 'skip_download': True + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex(r'data-mid="([^"]+)"', webpage, 'video_id') + return { + '_type': 'url_transparent', + 'ie_key': 'NPO', + 'url': 'npo:%s' % video_id, + 'display_id': display_id + } + + class VPROIE(NPOIE): IE_NAME = 'vpro' _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html' From 673fb82e65e28935ad16d85771cefe15013a12bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 31 Jan 2016 04:41:18 +0600 Subject: [PATCH 088/110] [schooltv] Improve video id regex --- youtube_dl/extractor/npo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index a4363c16e..87f5675c7 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -428,7 +428,8 @@ class SchoolTVIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._search_regex(r'data-mid="([^"]+)"', webpage, 'video_id') + video_id = self._search_regex( + r'data-mid=(["\'])(?P<id>.+?)\1', webpage, 'video_id', group='id') return { '_type': 'url_transparent', 'ie_key': 'NPO', From 14823decf3f27d5e7f17abf85444575f7600f4e4 Mon Sep 17 00:00:00 2001 From: rrooij <rderooij685@gmail.com> Date: Sun, 31 Jan 2016 00:03:23 +0100 Subject: [PATCH 089/110] [Gamekings] Fix url from .tv to .nl Gamekings doesn't use the .tv top level domain anymore, but the regular domain for Dutch sites. --- youtube_dl/extractor/gamekings.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index 027f55eb2..e7747a3ae 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -9,9 +9,9 @@ from ..utils import ( class GamekingsIE(InfoExtractor): - _VALID_URL = r'http://www\.gamekings\.tv/(?:videos|nieuws)/(?P<id>[^/]+)' + _VALID_URL = r'http://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)' _TESTS = [{ - 'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/', + 'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/', # MD5 is flaky, seems to change regularly # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', 'info_dict': { @@ -23,7 +23,7 @@ class GamekingsIE(InfoExtractor): }, }, { # vimeo video - 'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/', + 'url': 'http://www.gamekings.nl/videos/the-legend-of-zelda-majoras-mask/', 'md5': '12bf04dfd238e70058046937657ea68d', 'info_dict': { 'id': 'the-legend-of-zelda-majoras-mask', @@ -33,7 +33,7 @@ class GamekingsIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', }, }, { - 'url': 'http://www.gamekings.tv/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/', + 'url': 'http://www.gamekings.nl/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/', 'only_matching': True, }] From 7b7507d6e166bcbad4d42b7fac7608b2db276e3f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 Jan 2016 07:13:21 +0800 Subject: [PATCH 090/110] [letv] Fix LetvCloud extraction --- youtube_dl/extractor/letv.py | 61 ++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index 08bdae8a2..9665ece89 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -5,11 +5,13 @@ import datetime import re import time import base64 +import hashlib from .common import InfoExtractor from ..compat import ( compat_urllib_parse, compat_ord, + compat_str, ) from ..utils import ( determine_ext, @@ -258,6 +260,7 @@ class LetvCloudIE(InfoExtractor): }, }, { 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360', + 'md5': 'e03d9cc8d9c13191e1caf277e42dbd31', 'info_dict': { 'id': 'p7jnfw5hw9_ec93197892', 'ext': 'mp4', @@ -265,6 +268,7 @@ class LetvCloudIE(InfoExtractor): }, }, { 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd', + 'md5': 'cb988699a776b22d4a41b9d43acfb3ac', 'info_dict': { 'id': 'p7jnfw5hw9_187060b6fd', 'ext': 'mp4', @@ -272,21 +276,37 @@ class LetvCloudIE(InfoExtractor): }, }] - def _real_extract(self, url): - uu_mobj = re.search('uu=([\w]+)', url) - vu_mobj = re.search('vu=([\w]+)', url) + @staticmethod + def sign_data(obj): + if obj['cf'] == 'flash': + salt = '2f9d6924b33a165a6d8b5d3d42f4f987' + items = ['cf', 'format', 'ran', 'uu', 'ver', 'vu'] + elif obj['cf'] == 'html5': + salt = 'fbeh5player12c43eccf2bec3300344' + items = ['cf', 'ran', 'uu', 'bver', 'vu'] + input_data = ''.join([item + obj[item] for item in items]) + salt + obj['sign'] = hashlib.md5(input_data.encode('utf-8')).hexdigest() - if not uu_mobj or not vu_mobj: - raise ExtractorError('Invalid URL: %s' % url, expected=True) + def _get_formats(self, cf, uu, vu, media_id): + def get_play_json(cf, timestamp): + data = { + 'cf': cf, + 'ver': '2.2', + 'bver': 'firefox44.0', + 'format': 'json', + 'uu': uu, + 'vu': vu, + 'ran': compat_str(timestamp), + } + self.sign_data(data) + return self._download_json( + 'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse.urlencode(data), + media_id, 'Downloading playJson data for type %s' % cf) - uu = uu_mobj.group(1) - vu = vu_mobj.group(1) - media_id = uu + '_' + vu - - play_json_req = sanitized_Request( - 'http://api.letvcloud.com/gpc.php?cf=html5&sign=signxxxxx&ver=2.2&format=json&' + - 'uu=' + uu + '&vu=' + vu) - play_json = self._download_json(play_json_req, media_id, 'Downloading playJson data') + play_json = get_play_json(cf, time.time()) + # The server time may be different from local time + if play_json.get('code') == 10071: + play_json = get_play_json(cf, play_json['timestamp']) if not play_json.get('data'): if play_json.get('message'): @@ -312,6 +332,21 @@ class LetvCloudIE(InfoExtractor): 'width': int_or_none(play_url.get('vwidth')), 'height': int_or_none(play_url.get('vheight')), }) + + return formats + + def _real_extract(self, url): + uu_mobj = re.search('uu=([\w]+)', url) + vu_mobj = re.search('vu=([\w]+)', url) + + if not uu_mobj or not vu_mobj: + raise ExtractorError('Invalid URL: %s' % url, expected=True) + + uu = uu_mobj.group(1) + vu = vu_mobj.group(1) + media_id = uu + '_' + vu + + formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id) self._sort_formats(formats) return { From ce5879fa1475d9b0a74bd4e644ea944083177b2d Mon Sep 17 00:00:00 2001 From: rrooij <rderooij685@gmail.com> Date: Sun, 31 Jan 2016 00:12:45 +0100 Subject: [PATCH 091/110] [Gamekings] Fix viewing of old videos Some old videos that aren't on Vimeo are being uploaded to YouTube under the 'Gamekings Vault' channel. They use YouTube now for some videos as video hosting instead of Vimeo or their own hosting. The first test failed to succeed under the existing code, but works now by using the YouTube extractor. The Regex is changed to find the new gogoVideo JavaScript line with the YouTube embed. Checking if there is a YouTube embed is done by a String find, which is probably not the best method of checking this. --- youtube_dl/extractor/gamekings.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index e7747a3ae..efe31c10d 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -15,11 +15,14 @@ class GamekingsIE(InfoExtractor): # MD5 is flaky, seems to change regularly # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', 'info_dict': { - 'id': 'phoenix-wright-ace-attorney-dual-destinies-review', + 'id': 'HkSQKetlGOU', 'ext': 'mp4', - 'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', - 'description': 'md5:36fd701e57e8c15ac8682a2374c99731', + 'title': 'Phoenix Wright: Ace Attorney - Dual Destinies Review', + 'description': 'md5:db88c0e7f47e9ea50df3271b9dc72e1d', 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader_id': 'UCJugRGo4STYMeFr5RoOShtQ', + 'uploader': 'Gamekings Vault', + 'upload_date': '20151123', }, }, { # vimeo video @@ -43,7 +46,11 @@ class GamekingsIE(InfoExtractor): webpage = self._download_webpage(url, video_id) playlist_id = self._search_regex( - r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id') + r'gogoVideo\(.*,\s*"([^"]+)', webpage, 'playlist id') + + # Check if a YouTube embed is used + if playlist_id.find('youtube') != -1: + return self.url_result(playlist_id, ie='Youtube') playlist = self._download_xml( 'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id, From 8e7aad20756efffa649cf073d89f22a57f93048c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 31 Jan 2016 17:49:59 +0600 Subject: [PATCH 092/110] [youtube] Use authentication for entry list base extractor (Closes #8380) --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a24c73584..2941da467 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -181,7 +181,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return -class YoutubeEntryListBaseInfoExtractor(InfoExtractor): +class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor): # Extract entries from page with "Load more" button def _entries(self, page, playlist_id): more_widget_html = content_html = page @@ -1602,7 +1602,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } -class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor): +class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): IE_DESC = 'YouTube.com playlists' _VALID_URL = r"""(?x)(?: (?:https?://)? From 9acd33094d36acde60a42837ed775c68ad3a327d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 31 Jan 2016 17:52:02 +0600 Subject: [PATCH 093/110] [youtube] Filter duplicates in playlists base extractor --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2941da467..9a64c1d11 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -233,7 +233,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): def _process_page(self, content): - for playlist_id in re.findall(r'href="/?playlist\?list=(.+?)"', content): + for playlist_id in set(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)): yield self.url_result( 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist') From a69bee4762044b657e1fd8bd4cf0baa58d950bc3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 31 Jan 2016 12:57:18 +0100 Subject: [PATCH 094/110] release 2016.01.31 --- README.md | 4 ++++ docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 724fb17d1..7c582511f 100644 --- a/README.md +++ b/README.md @@ -173,6 +173,10 @@ which means you can modify it, redistribute it or use it however you like. expected filesize (experimental) --hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental) + --hls-use-mpegts Use the mpegts container for HLS videos, + allowing to play the video while + downloading (some players may not be able + to play it) --external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,axel,curl,httpie,wget diff --git a/docs/supportedsites.md b/docs/supportedsites.md index eb68c23b5..0644436a8 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -55,6 +55,7 @@ - **audiomack** - **audiomack:album** - **Azubu** + - **AzubuLive** - **BaiduVideo**: 百度视频 - **bambuser** - **bambuser:channel** @@ -315,6 +316,7 @@ - **mailru**: Видео@Mail.Ru - **MakerTV** - **Malemotion** + - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** - **metacafe** @@ -507,6 +509,7 @@ - **Sapo**: SAPO Vídeos - **savefrom.net** - **SBS**: sbs.com.au + - **schooltv** - **SciVee** - **screen.yahoo:search**: Yahoo screen search - **Screencast** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d9f1e22b0..006b960b3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.01.29' +__version__ = '2016.01.31' From a7aab0c23e537d9df3d0946c0b960cc92e35d2ff Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 Jan 2016 19:49:54 +0800 Subject: [PATCH 095/110] [test_youtube_lists] Fix TestYoutubeLists.test_youtube_course Youtube entries are now generators --- test/test_youtube_lists.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 26aadb34f..47df0f348 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -34,7 +34,7 @@ class TestYoutubeLists(unittest.TestCase): ie = YoutubePlaylistIE(dl) # TODO find a > 100 (paginating?) videos course result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') - entries = result['entries'] + entries = list(result['entries']) self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs') self.assertEqual(len(entries), 25) self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') From 189d72d5fd16176a8990a9447c0165478a384676 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 Jan 2016 19:53:22 +0800 Subject: [PATCH 096/110] [test_subtitles] Fix TestRaiSubtitles RaiIE is renamed to RaiTVIE in 06d5556dface3901a86419b6b125ef377116448f --- test/test_subtitles.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 9ed9fe622..9a695c4e8 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -21,7 +21,7 @@ from youtube_dl.extractor import ( NPOIE, ComedyCentralIE, NRKTVIE, - RaiIE, + RaiTVIE, VikiIE, ThePlatformIE, ThePlatformFeedIE, @@ -260,7 +260,7 @@ class TestNRKSubtitles(BaseTestSubtitles): class TestRaiSubtitles(BaseTestSubtitles): url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html' - IE = RaiIE + IE = RaiTVIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True From b99d88c6a1d91f57e1804bd512fed1bdbf31a384 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 Jan 2016 20:00:07 +0800 Subject: [PATCH 097/110] [youporn] Fix uploader and description --- youtube_dl/extractor/youporn.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index dd724085a..b29baafc4 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -114,15 +114,13 @@ class YouPornIE(InfoExtractor): formats.append(f) self._sort_formats(formats) - description = self._html_search_regex( - r'(?s)<div[^>]+class=["\']video-description["\'][^>]*>(.+?)</div>', - webpage, 'description', default=None) + description = self._og_search_description(webpage, default=None) thumbnail = self._search_regex( r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1', webpage, 'thumbnail', fatal=False, group='thumbnail') uploader = self._html_search_regex( - r'(?s)<div[^>]+class=["\']videoInfoBy["\'][^>]*>\s*By:\s*</div>(.+?)</(?:a|div)>', + r'(?s)<div[^>]+class=["\']videoInfoBy(?:\s+[^"\']+)?["\'][^>]*>\s*By:\s*</div>(.+?)</(?:a|div)>', webpage, 'uploader', fatal=False) upload_date = unified_strdate(self._html_search_regex( r'(?s)<div[^>]+class=["\']videoInfoTime["\'][^>]*>(.+?)</div>', From 231ea2a3bbc6324fbd0e5dddd224646c91b0f035 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 Jan 2016 20:21:57 +0800 Subject: [PATCH 098/110] [xuite] Replace the test case with my uploaded one --- youtube_dl/extractor/xuite.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index 8bbac54e2..2466410fa 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -34,19 +34,20 @@ class XuiteIE(InfoExtractor): }, }, { # Video with only one format - 'url': 'http://vlog.xuite.net/play/TkRZNjhULTM0NDE2MjkuZmx2', - 'md5': 'c45737fc8ac5dc8ac2f92ecbcecf505e', + 'url': 'http://vlog.xuite.net/play/WUxxR2xCLTI1OTI1MDk5LmZsdg==', + 'md5': '21f7b39c009b5a4615b4463df6eb7a46', 'info_dict': { - 'id': '3441629', + 'id': '25925099', 'ext': 'mp4', - 'title': '孫燕姿 - 眼淚成詩', + 'title': 'BigBuckBunny_320x180', 'thumbnail': 're:^https?://.*\.jpg$', - 'duration': 217.399, - 'timestamp': 1299383640, - 'upload_date': '20110306', - 'uploader': 'Valen', - 'uploader_id': '10400126', - 'categories': ['影視娛樂'], + 'duration': 596.458, + 'timestamp': 1454242500, + 'upload_date': '20160131', + 'uploader': 'yan12125', + 'uploader_id': '12158353', + 'categories': ['個人短片'], + 'description': 'http://download.blender.org/peach/bigbuckbunny_movies/BigBuckBunny_320x180.mp4', }, }, { # Video with two formats From e04398e397d4a4f7e0d0994355d5a94c22441e7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 31 Jan 2016 14:22:36 +0100 Subject: [PATCH 099/110] [FFmpegSubtitlesConvertorPP] delete old subtitle files (fixes #8382) --- youtube_dl/postprocessor/ffmpeg.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index daca5d814..16a64802a 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -479,6 +479,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert') return [], info self._downloader.to_screen('[ffmpeg] Converting subtitles') + sub_filenames = [] for lang, sub in subs.items(): ext = sub['ext'] if ext == new_ext: @@ -486,6 +487,8 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): '[ffmpeg] Subtitle file for %s is already in the requested' 'format' % new_ext) continue + old_file = subtitles_filename(filename, lang, ext) + sub_filenames.append(old_file) new_file = subtitles_filename(filename, lang, new_ext) if ext == 'dfxp' or ext == 'ttml': @@ -493,7 +496,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): 'You have requested to convert dfxp (TTML) subtitles into another format, ' 'which results in style information loss') - dfxp_file = subtitles_filename(filename, lang, ext) + dfxp_file = old_file srt_file = subtitles_filename(filename, lang, 'srt') with io.open(dfxp_file, 'rt', encoding='utf-8') as f: @@ -511,9 +514,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): if new_ext == 'srt': continue - self.run_ffmpeg( - subtitles_filename(filename, lang, ext), - new_file, ['-f', new_format]) + self.run_ffmpeg(old_file, new_file, ['-f', new_format]) with io.open(new_file, 'rt', encoding='utf-8') as f: subs[lang] = { @@ -521,4 +522,4 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): 'data': f.read(), } - return [], info + return sub_filenames, info From 3ccb0655c1f27953ba00ee3f8bc290ca9bfda7ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 31 Jan 2016 15:11:00 +0100 Subject: [PATCH 100/110] [youtube] Use 'orderedSet' instead of 'set' to preserve the order --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9a64c1d11..bd87c75b6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -233,7 +233,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): def _process_page(self, content): - for playlist_id in set(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)): + for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)): yield self.url_result( 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist') From 163da6a484f57c52afdf300d368136d164316803 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 1 Feb 2016 02:13:11 +0800 Subject: [PATCH 101/110] [gamekings] Add MD5 back The test is now a YouTube video, whose MD5 should be stable --- youtube_dl/extractor/gamekings.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index efe31c10d..c011e9d22 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -12,8 +12,7 @@ class GamekingsIE(InfoExtractor): _VALID_URL = r'http://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/', - # MD5 is flaky, seems to change regularly - # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', + 'md5': '5208d3a17adeaef829a7861887cb9029', 'info_dict': { 'id': 'HkSQKetlGOU', 'ext': 'mp4', From eab3c2895c66a8d2f5da181d3ccba35a901b813f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 1 Feb 2016 02:15:25 +0800 Subject: [PATCH 102/110] [gamekings] add_ie --- youtube_dl/extractor/gamekings.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index c011e9d22..df385e338 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -11,6 +11,7 @@ from ..utils import ( class GamekingsIE(InfoExtractor): _VALID_URL = r'http://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)' _TESTS = [{ + # YouTube embed video 'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/', 'md5': '5208d3a17adeaef829a7861887cb9029', 'info_dict': { @@ -23,6 +24,7 @@ class GamekingsIE(InfoExtractor): 'uploader': 'Gamekings Vault', 'upload_date': '20151123', }, + 'add_ie': ['Youtube'], }, { # vimeo video 'url': 'http://www.gamekings.nl/videos/the-legend-of-zelda-majoras-mask/', From 0e1b1a011d1772bc1a7069bad8ad71a53798a212 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 1 Feb 2016 02:19:03 +0800 Subject: [PATCH 103/110] [gamekings] Stricter checks --- youtube_dl/extractor/gamekings.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index df385e338..f6b9046f9 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -6,6 +6,7 @@ from ..utils import ( xpath_text, xpath_with_ns, ) +from .youtube import YoutubeIE class GamekingsIE(InfoExtractor): @@ -47,10 +48,10 @@ class GamekingsIE(InfoExtractor): webpage = self._download_webpage(url, video_id) playlist_id = self._search_regex( - r'gogoVideo\(.*,\s*"([^"]+)', webpage, 'playlist id') + r'gogoVideo\([^,]+,\s*"([^"]+)', webpage, 'playlist id') # Check if a YouTube embed is used - if playlist_id.find('youtube') != -1: + if YoutubeIE.suitable(playlist_id): return self.url_result(playlist_id, ie='Youtube') playlist = self._download_xml( From a8aad210019b50540cf1bfd28390b7e7f2573a31 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 1 Feb 2016 03:07:04 +0800 Subject: [PATCH 104/110] [acast] Fix extraction --- youtube_dl/extractor/acast.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index be7913bc7..f2465f8f8 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -23,14 +23,19 @@ class ACastIE(ACastBaseIE): 'ext': 'mp3', 'title': '"Where Are You?": Taipei 101, Taiwan', 'timestamp': 1196172000000, - 'description': 'md5:0c5d8201dfea2b93218ea986c91eee6e', + 'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e', 'duration': 211, } } def _real_extract(self, url): channel, display_id = re.match(self._VALID_URL, url).groups() - cast_data = self._download_json(self._API_BASE_URL + 'channels/%s/acasts/%s/playback' % (channel, display_id), display_id) + + embed_page = self._download_webpage( + re.sub('(?:www\.)?acast\.com', 'embedcdn.acast.com', url), display_id) + cast_data = self._parse_json(self._search_regex( + r'window\[\'acast/queries\'\]\s*=\s*([^;]+);', embed_page, 'acast data'), + display_id)['GetAcast/%s/%s' % (channel, display_id)] return { 'id': compat_str(cast_data['id']), From 9934fe76be616cf468038a6b04771b3fcc514765 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 1 Feb 2016 03:08:46 +0800 Subject: [PATCH 105/110] [acast] Remove ACastBaseIE No longer necessary as _API_BASE_URL is used by ACastChannelIE only --- youtube_dl/extractor/acast.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index f2465f8f8..92eee8119 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -8,11 +8,7 @@ from ..compat import compat_str from ..utils import int_or_none -class ACastBaseIE(InfoExtractor): - _API_BASE_URL = 'https://www.acast.com/api/' - - -class ACastIE(ACastBaseIE): +class ACastIE(InfoExtractor): IE_NAME = 'acast' _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)' _TEST = { @@ -49,7 +45,7 @@ class ACastIE(ACastBaseIE): } -class ACastChannelIE(ACastBaseIE): +class ACastChannelIE(InfoExtractor): IE_NAME = 'acast:channel' _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)' _TEST = { @@ -61,6 +57,7 @@ class ACastChannelIE(ACastBaseIE): }, 'playlist_mincount': 20, } + _API_BASE_URL = 'https://www.acast.com/api/' @classmethod def suitable(cls, url): From 7a0ed06909f8951f8be3049ab089d61ead626158 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 1 Feb 2016 03:31:58 +0800 Subject: [PATCH 106/110] [allocine] Fix extraction of test_allocine_1 and update tests --- youtube_dl/extractor/allocine.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 7d65b8193..f94da1a05 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -8,6 +8,7 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( qualities, + unescapeHTML, ) @@ -31,7 +32,7 @@ class AllocineIE(InfoExtractor): 'id': '19540403', 'ext': 'mp4', 'title': 'Planes 2 Bande-annonce VF', - 'description': 'md5:eeaffe7c2d634525e21159b93acf3b1e', + 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -41,7 +42,7 @@ class AllocineIE(InfoExtractor): 'id': '19544709', 'ext': 'mp4', 'title': 'Dragons 2 - Bande annonce finale VF', - 'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac', + 'description': 'md5:601d15393ac40f249648ef000720e7e3', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -59,10 +60,14 @@ class AllocineIE(InfoExtractor): if typ == 'film': video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id') else: - player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player') - - player_data = json.loads(player) - video_id = compat_str(player_data['refMedia']) + player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None) + if player: + player_data = json.loads(player) + video_id = compat_str(player_data['refMedia']) + else: + model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model') + model_data = self._parse_json(unescapeHTML(model), display_id) + video_id = compat_str(model_data['id']) xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) From f63757ec35bb310d05966de03dcfae72b427eac3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 1 Feb 2016 03:34:02 +0800 Subject: [PATCH 107/110] [allocine] Fix for Python 2.6 Python 2.6 does not support .// syntax in find(). Fortunately, the interested node is at the top level --- youtube_dl/extractor/allocine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index f94da1a05..f6cd3a85f 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -71,7 +71,7 @@ class AllocineIE(InfoExtractor): xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) - video = xml.find('.//AcVisionVideo').attrib + video = xml.find('./AcVisionVideo').attrib quality = qualities(['ld', 'md', 'hd']) formats = [] From 566bda51f2d943d58fbe37d88ea0fceb8ddefea8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 1 Feb 2016 05:00:09 +0800 Subject: [PATCH 108/110] [bpb] Fix extraction and update tests --- youtube_dl/extractor/bpb.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/bpb.py b/youtube_dl/extractor/bpb.py index 510813f76..c28e72927 100644 --- a/youtube_dl/extractor/bpb.py +++ b/youtube_dl/extractor/bpb.py @@ -1,7 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..utils import ( + js_to_json, + determine_ext, +) class BpbIE(InfoExtractor): @@ -10,7 +16,8 @@ class BpbIE(InfoExtractor): _TEST = { 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', - 'md5': '0792086e8e2bfbac9cdf27835d5f2093', + # md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2 + 'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f', 'info_dict': { 'id': '297', 'ext': 'mp4', @@ -25,13 +32,26 @@ class BpbIE(InfoExtractor): title = self._html_search_regex( r'<h2 class="white">(.*?)</h2>', webpage, 'title') - video_url = self._html_search_regex( - r'(http://film\.bpb\.de/player/dokument_[0-9]+\.mp4)', - webpage, 'video URL') + video_info_dicts = re.findall( + r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage) + + formats = [] + for video_info in video_info_dicts: + video_info = self._parse_json(video_info, video_id, transform_source=js_to_json) + quality = video_info['quality'] + video_url = video_info['src'] + formats.append({ + 'url': video_url, + 'preference': 10 if quality == 'high' else 0, + 'format_note': quality, + 'format_id': '%s-%s' % (quality, determine_ext(video_url)), + }) + + self._sort_formats(formats) return { 'id': video_id, - 'url': video_url, + 'formats': formats, 'title': title, 'description': self._og_search_description(webpage), } From dc4fe5c6d713ac09cd35f8b8dbfbe46a67a67f10 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 1 Feb 2016 05:32:28 +0800 Subject: [PATCH 109/110] [allocine] Use xpath_element --- youtube_dl/extractor/allocine.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index f6cd3a85f..190bc2cc8 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -9,6 +9,7 @@ from ..compat import compat_str from ..utils import ( qualities, unescapeHTML, + xpath_element, ) @@ -71,7 +72,7 @@ class AllocineIE(InfoExtractor): xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) - video = xml.find('./AcVisionVideo').attrib + video = xpath_element(xml, './/AcVisionVideo').attrib quality = qualities(['ld', 'md', 'hd']) formats = [] From 92769650fae8827f242a1517565cceef5604eeb0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 1 Feb 2016 15:40:42 +0800 Subject: [PATCH 110/110] [vidzi] Fix extraction Closes #8386. Vidzi.tv now uses jwplayer, which can be handled by GenericIE --- youtube_dl/extractor/vidzi.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py index 2ba9f31df..210a738a6 100644 --- a/youtube_dl/extractor/vidzi.py +++ b/youtube_dl/extractor/vidzi.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import smuggle_url class VidziIE(InfoExtractor): @@ -20,19 +21,14 @@ class VidziIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_host = self._html_search_regex( - r'id=\'vplayer\'><img src="http://(.*?)/i', webpage, - 'video host') - video_hash = self._html_search_regex( - r'\|([a-z0-9]+)\|hls\|type', webpage, 'video_hash') - ext = self._html_search_regex( - r'\|tracks\|([a-z0-9]+)\|', webpage, 'video ext') - video_url = 'http://' + video_host + '/' + video_hash + '/v.' + ext title = self._html_search_regex( r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title') + # Vidzi now uses jwplayer, which can be handled by GenericIE return { + '_type': 'url_transparent', 'id': video_id, 'title': title, - 'url': video_url, + 'url': smuggle_url(url, {'to_generic': True}), + 'ie_key': 'Generic', }