From 5f432ac8f552b06e715c5e17165328dc76d9c1b5 Mon Sep 17 00:00:00 2001 From: Founder Fang Date: Sun, 20 Dec 2015 19:09:45 +0800 Subject: [PATCH 001/491] [Weiqitv] Add new extractor --- youtube_dl/extractor/__init__.py | 4 ++- youtube_dl/extractor/letv.py | 57 ++++++++++++++++++++++++++++++++ youtube_dl/extractor/weiqitv.py | 54 ++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/weiqitv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e46d73ed7..9dcd252f8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -331,7 +331,8 @@ from .lecture2go import Lecture2GoIE from .letv import ( LetvIE, LetvTvIE, - LetvPlaylistIE + LetvPlaylistIE, + LetvCloudIE, ) from .libsyn import LibsynIE from .lifenews import ( @@ -834,6 +835,7 @@ from .webofstories import ( WebOfStoriesPlaylistIE, ) from .weibo import WeiboIE +from .weiqitv import WeiqitvIE from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index be648000e..c096cb1ab 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import datetime import re import time +import base64 from .common import InfoExtractor from ..compat import ( @@ -16,6 +17,7 @@ from ..utils import ( parse_iso8601, sanitized_Request, int_or_none, + str_or_none, encode_data_uri, ) @@ -239,3 +241,58 @@ class LetvPlaylistIE(LetvTvIE): }, 'playlist_mincount': 7 }] + + +class LetvCloudIE(InfoExtractor): + IE_DESC = '乐视云' + _VALID_URL = r'http://yuntv\.letv\.com/bcloud.html\?.*$' + + _TESTS = [{ + 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=467623dedf', + 'md5': '26450599afd64c513bc77030ad15db44', + 'info_dict': { + 'id': 'p7jnfw5hw9_467623dedf', + 'ext': 'mp4', + 'title': 'p7jnfw5hw9_467623dedf', + }, + }, { + 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360', + 'info_dict': { + 'id': 'p7jnfw5hw9_ec93197892', + 'ext': 'mp4', + 'title': 'p7jnfw5hw9_ec93197892', + }, + }, { + 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd', + 'info_dict': { + 'id': 'p7jnfw5hw9_187060b6fd', + 'ext': 'mp4', + 'title': 'p7jnfw5hw9_187060b6fd', + }, + }] + + def _real_extract(self, url): + uu = re.search('uu=([\w]+)', url).group(1) + vu = re.search('vu=([\w]+)', url).group(1) + media_id = uu + '_' + vu + + play_json_req = sanitized_Request( + 'http://api.letvcloud.com/gpc.php?cf=html5&sign=signxxxxx&ver=2.2&format=json&' + + "uu=" + uu + "&vu=" + vu) + play_json = self._download_json(play_json_req, media_id, 'Downloading playJson data') + + formats = [{ + 'url': base64.b64decode(media['play_url']['main_url'].encode('utf-8')).decode("utf-8"), + 'ext': 'mp4', + 'format_id': int_or_none(media.get('play_url', {}).get('vtype')), + 'format_note': str_or_none(media.get('play_url', {}).get('definition')), + 'width': int_or_none(media.get('play_url', {}).get('vwidth')), + 'height': int_or_none(media.get('play_url', {}).get('vheight')), + } for media in play_json['data']['video_info']['media'].values()] + self._sort_formats(formats) + + return { + 'id': media_id, + 'title': media_id, + 'formats': formats, + } diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dl/extractor/weiqitv.py new file mode 100644 index 000000000..da3b3d145 --- /dev/null +++ b/youtube_dl/extractor/weiqitv.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class WeiqitvIE(InfoExtractor): + IE_DESC = 'WQTV' + _VALID_URL = r'http://www\.weiqitv\.com/index/video_play\?videoId=(?P[A-Za-z0-9]+)' + + _TESTS = [{ + 'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3', + 'md5': '26450599afd64c513bc77030ad15db44', + 'info_dict': { + 'id': '53c744f09874f0e76a8b46f3', + 'ext': 'mp4', + 'title': '2013年度盘点', + }, + }, { + 'url': 'http://www.weiqitv.com/index/video_play?videoId=567379a2d4c36cca518b4569', + 'info_dict': { + 'id': '567379a2d4c36cca518b4569', + 'ext': 'mp4', + 'title': '民国围棋史', + }, + }, { + 'url': 'http://www.weiqitv.com/index/video_play?videoId=5430220a9874f088658b4567', + 'info_dict': { + 'id': '5430220a9874f088658b4567', + 'ext': 'mp4', + 'title': '二路托过的手段和运用', + }, + }] + + def _real_extract(self, url): + media_id = self._match_id(url) + page = self._download_webpage(url, media_id) + + info_json_str = self._search_regex( + 'var\s+video\s*=\s*(.+});', + page, 'info_json_str') + info_json = self._parse_json(info_json_str, media_id) + + letvcloud_url = self._search_regex( + 'var\s+letvurl\s*=\s*"([^"]+)', + page, 'letvcloud_url') + + return { + '_type': 'url_transparent', + "ie_key": 'LetvCloud', + 'url': letvcloud_url, + 'title': info_json['name'], + 'id': media_id, + } From e1a0bfdffe25dda494a9da8b02fba0c9ad39f4fe Mon Sep 17 00:00:00 2001 From: dyn888 Date: Sun, 3 Jan 2016 04:11:19 +0100 Subject: [PATCH 002/491] [youtube] added vcodec/acodec/abr for multiple itags Should make downloading with filters more precise and easier, ie. bestvideo[vcodec=h264]. By default a lot of codecs are specified as avc1.xxxxxx and unique for each format, which makes them unusable for bestvideo selection. --- youtube_dl/extractor/youtube.py | 120 ++++++++++++++++---------------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4aac2cc03..64386f34a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -277,55 +277,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor): $""" _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' _formats = { - '5': {'ext': 'flv', 'width': 400, 'height': 240}, - '6': {'ext': 'flv', 'width': 450, 'height': 270}, - '13': {'ext': '3gp'}, - '17': {'ext': '3gp', 'width': 176, 'height': 144}, - '18': {'ext': 'mp4', 'width': 640, 'height': 360}, - '22': {'ext': 'mp4', 'width': 1280, 'height': 720}, - '34': {'ext': 'flv', 'width': 640, 'height': 360}, - '35': {'ext': 'flv', 'width': 854, 'height': 480}, - '36': {'ext': '3gp', 'width': 320, 'height': 240}, - '37': {'ext': 'mp4', 'width': 1920, 'height': 1080}, - '38': {'ext': 'mp4', 'width': 4096, 'height': 3072}, - '43': {'ext': 'webm', 'width': 640, 'height': 360}, - '44': {'ext': 'webm', 'width': 854, 'height': 480}, - '45': {'ext': 'webm', 'width': 1280, 'height': 720}, - '46': {'ext': 'webm', 'width': 1920, 'height': 1080}, - '59': {'ext': 'mp4', 'width': 854, 'height': 480}, - '78': {'ext': 'mp4', 'width': 854, 'height': 480}, + '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, + '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, + '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'}, + '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'}, + '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'}, + '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '36': {'ext': '3gp', 'width': 320, 'height': 240, 'acodec': 'aac', 'abr': 32, 'vcodec': 'mp4v'}, + '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, + '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, + '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, + '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, + '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, - # 3d videos - '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20}, - '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20}, - '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20}, - '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20}, - '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20}, - '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20}, - '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20}, + # 3D videos + '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, + '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, + '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, + '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, + '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20}, + '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, + '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, # Apple HTTP Live Streaming - '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10}, - '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10}, - '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10}, - '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10}, - '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10}, - '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10}, - '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10}, + '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, + '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, + '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, + '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, + '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, + '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, + '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10}, # DASH mp4 video - '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) - '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'}, - '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'}, - '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'}, + '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) + '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'fps': 60, 'preference': -40}, + '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'fps': 60, 'preference': -40}, + '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, # Dash mp4 audio '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'}, @@ -339,26 +339,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, - '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'}, - '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40}, + '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug) - '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, - '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, - '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, - '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'}, - '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, + '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, + '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, + '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, + '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, # Dash webm audio - '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, - '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50}, + '171': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, + '172': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50}, # Dash webm audio with opus inside '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50}, From e37afbe0b8a1222cb214ad0bec9a53bb7953531d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 14 Jan 2016 00:16:23 +0100 Subject: [PATCH 003/491] [YoutubeDL] urlopen: disable the 'file:' protocol (#8227) If someone is running youtube-dl on a server to deliver files, the user could input 'file:///some/important/file' and youtube-dl would save that file as a video giving access to sensitive information to the user. 'file:' urls can be filtered, but the user can use an URL to a crafted m3u8 manifest like: #EXTM3U #EXT-X-MEDIA-SEQUENCE:0 #EXTINF:10.0 file:///etc/passwd #EXT-X-ENDLIST With this patch 'file:' URLs raise URLError like for unknown protocols. --- test/test_YoutubeDL.py | 7 ++++++- youtube_dl/YoutubeDL.py | 10 ++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 0388c0bf3..0caa43843 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -12,7 +12,7 @@ import copy from test.helper import FakeYDL, assertRegexpMatches from youtube_dl import YoutubeDL -from youtube_dl.compat import compat_str +from youtube_dl.compat import compat_str, compat_urllib_error from youtube_dl.extractor import YoutubeIE from youtube_dl.postprocessor.common import PostProcessor from youtube_dl.utils import ExtractorError, match_filter_func @@ -631,6 +631,11 @@ class TestYoutubeDL(unittest.TestCase): result = get_ids({'playlist_items': '10'}) self.assertEqual(result, []) + def test_urlopen_no_file_protocol(self): + # see https://github.com/rg3/youtube-dl/issues/8227 + ydl = YDL() + self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d50b7cfed..e8ce58604 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1986,8 +1986,14 @@ class YoutubeDL(object): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) data_handler = compat_urllib_request_DataHandler() - opener = compat_urllib_request.build_opener( - proxy_handler, https_handler, cookie_processor, ydlh, data_handler) + unknown_handler = compat_urllib_request.UnknownHandler() + handlers = (proxy_handler, https_handler, cookie_processor, ydlh, data_handler, unknown_handler) + # we don't use build_opener because it automatically adds FileHandler, + # which can be used for malicious purposes (see + # https://github.com/rg3/youtube-dl/issues/8227) + opener = compat_urllib_request.OpenerDirector() + for handler in handlers: + opener.add_handler(handler) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play From 6240b0a278781a3b584a9dd6d57191b2472c0fd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 14 Jan 2016 08:14:01 +0100 Subject: [PATCH 004/491] [YoutubeDL] urlopen: use build_opener again Otherwise we would need to manually add handlers like HTTPRedirectHandler, instead we add a customized FileHandler instance that raises an error. --- youtube_dl/YoutubeDL.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e8ce58604..ccad5f2ea 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1986,14 +1986,19 @@ class YoutubeDL(object): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) data_handler = compat_urllib_request_DataHandler() - unknown_handler = compat_urllib_request.UnknownHandler() - handlers = (proxy_handler, https_handler, cookie_processor, ydlh, data_handler, unknown_handler) - # we don't use build_opener because it automatically adds FileHandler, - # which can be used for malicious purposes (see + + # When passing our own FileHandler instance, build_opener won't add the + # default FileHandler and allows us to disable the file protocol, which + # can be used for malicious purposes (see # https://github.com/rg3/youtube-dl/issues/8227) - opener = compat_urllib_request.OpenerDirector() - for handler in handlers: - opener.add_handler(handler) + file_handler = compat_urllib_request.FileHandler() + + def file_open(*args, **kwargs): + raise compat_urllib_error.URLError('file protocol is disabled') + file_handler.file_open = file_open + + opener = compat_urllib_request.build_opener( + proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play From 4240d504963bb6d1c7bd7c288a7874f9d8dc042b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 14 Jan 2016 14:07:54 +0100 Subject: [PATCH 005/491] [YoutubeDL] improve error message for file:/// URLs --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ccad5f2ea..4915fbd45 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1994,7 +1994,7 @@ class YoutubeDL(object): file_handler = compat_urllib_request.FileHandler() def file_open(*args, **kwargs): - raise compat_urllib_error.URLError('file protocol is disabled') + raise compat_urllib_error.URLError('file:/// protocol is explicitly disabled in youtube-dl for security reasons') file_handler.file_open = file_open opener = compat_urllib_request.build_opener( From 4511c1976d0a06394a000333a020a4d3668072fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 19:57:20 +0600 Subject: [PATCH 006/491] [beeg] Fix extraction (Closes #8225) --- youtube_dl/extractor/beeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index c8d921daf..d0174b818 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -60,7 +60,7 @@ class BeegIE(InfoExtractor): def decrypt_url(encrypted_url): encrypted_url = self._proto_relative_url( - encrypted_url.replace('{DATA_MARKERS}', ''), 'http:') + encrypted_url.replace('{DATA_MARKERS}', ''), 'https:') key = self._search_regex( r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None) if not key: From abb893e6e45b0b0c6ec0e3a1d29dbd1746cbee96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 19:57:56 +0600 Subject: [PATCH 007/491] [beeg] Update API URL --- youtube_dl/extractor/beeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index d0174b818..34c2a756f 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -34,7 +34,7 @@ class BeegIE(InfoExtractor): video_id = self._match_id(url) video = self._download_json( - 'http://beeg.com/api/v5/video/%s' % video_id, video_id) + 'https://api.beeg.com/api/v5/video/%s' % video_id, video_id) def split(o, e): def cut(s, x): From 11c60089a8772a2d12288f0ff382866e516f9a4b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 14 Jan 2016 15:43:21 +0100 Subject: [PATCH 008/491] release 2016.01.14 --- docs/supportedsites.md | 11 +++++++---- youtube_dl/version.py | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 8d0c7b97a..eb160bd2f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -65,6 +65,7 @@ - **Beeg** - **BehindKink** - **Bet** + - **Bigflix** - **Bild**: Bild.de - **BiliBili** - **BleacherReport** @@ -251,7 +252,7 @@ - **Instagram** - **instagram:user**: Instagram user profile - **InternetVideoArchive** - - **IPrima** + - **IPrima** (Currently broken) - **iqiyi**: 爱奇艺 - **Ir90Tv** - **ivi**: ivi.ru @@ -602,7 +603,9 @@ - **TruTube** - **Tube8** - **TubiTv** - - **Tudou** + - **tudou** + - **tudou:album** + - **tudou:playlist** - **Tumblr** - **tunein:clip** - **tunein:program** @@ -655,12 +658,12 @@ - **video.mit.edu** - **VideoDetective** - **videofy.me** - - **VideoMega** + - **VideoMega** (Currently broken) - **videomore** - **videomore:season** - **videomore:video** - **VideoPremium** - - **VideoTt**: video.tt - Your True Tube + - **VideoTt**: video.tt - Your True Tube (Currently broken) - **videoweed**: VideoWeed - **Vidme** - **Vidzi** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7030903c0..4d433b667 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.01.09' +__version__ = '2016.01.14' From 30e2f2d76f6dd52803effce14fa14f3a8051c84a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 14 Jan 2016 16:28:46 +0100 Subject: [PATCH 009/491] [YoutubeDL] use a more correct terminology in the error message for file:// URLs --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4915fbd45..6b73b8e06 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1994,7 +1994,7 @@ class YoutubeDL(object): file_handler = compat_urllib_request.FileHandler() def file_open(*args, **kwargs): - raise compat_urllib_error.URLError('file:/// protocol is explicitly disabled in youtube-dl for security reasons') + raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons') file_handler.file_open = file_open opener = compat_urllib_request.build_opener( From fbd90643cb123011a224da58b4ff1c4ba1c4f8f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 21:48:08 +0600 Subject: [PATCH 010/491] [vodlocker] Fix extraction (Closes #8231) --- youtube_dl/extractor/vodlocker.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index 357594a11..a97995a6d 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -5,12 +5,13 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse from ..utils import ( ExtractorError, + NO_DEFAULT, sanitized_Request, ) class VodlockerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vodlocker\.com/(?:embed-)?(?P[0-9a-zA-Z]+)(?:\..*?)?' + _VALID_URL = r'https?://(?:www\.)?vodlocker\.(?:com|city)/(?:embed-)?(?P[0-9a-zA-Z]+)(?:\..*?)?' _TESTS = [{ 'url': 'http://vodlocker.com/e8wvyzz4sl42', @@ -43,16 +44,31 @@ class VodlockerIE(InfoExtractor): webpage = self._download_webpage( req, video_id, 'Downloading video page') + def extract_file_url(html, default=NO_DEFAULT): + return self._search_regex( + r'file:\s*"(http[^\"]+)",', html, 'file url', default=default) + + video_url = extract_file_url(webpage, default=None) + + if not video_url: + embed_url = self._search_regex( + r']+src=(["\'])(?P(?:https?://)?vodlocker\.(?:com|city)/embed-.+?)\1', + webpage, 'embed url', group='url') + embed_webpage = self._download_webpage( + embed_url, video_id, 'Downloading embed webpage') + video_url = extract_file_url(embed_webpage) + thumbnail_webpage = embed_webpage + else: + thumbnail_webpage = webpage + title = self._search_regex( r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title') thumbnail = self._search_regex( - r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail') - url = self._search_regex( - r'file:\s*"(http[^\"]+)",', webpage, 'file url') + r'image:\s*"(http[^\"]+)",', thumbnail_webpage, 'thumbnail', fatal=False) formats = [{ 'format_id': 'sd', - 'url': url, + 'url': video_url, }] return { From 5cc9c5dfa8f731b6582b092e06f78cccbaefc3c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 21:53:24 +0600 Subject: [PATCH 011/491] [unistra] Fix extraction --- youtube_dl/extractor/unistra.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py index f70978299..594bee4f9 100644 --- a/youtube_dl/extractor/unistra.py +++ b/youtube_dl/extractor/unistra.py @@ -38,7 +38,7 @@ class UnistraIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - files = set(re.findall(r'file\s*:\s*"([^"]+)"', webpage)) + files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage)) quality = qualities(['SD', 'HD']) formats = [] From 163e8369b0d2f6b8cc59dd1e93b20a980590648f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 22:05:04 +0600 Subject: [PATCH 012/491] [ntvde] Fix extraction --- youtube_dl/extractor/ntvde.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dl/extractor/ntvde.py index d2cfe0961..8268eace7 100644 --- a/youtube_dl/extractor/ntvde.py +++ b/youtube_dl/extractor/ntvde.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( int_or_none, js_to_json, @@ -42,18 +43,24 @@ class NTVDeIE(InfoExtractor): webpage, 'player data'), video_id, transform_source=js_to_json) duration = parse_duration(vdata.get('duration')) - formats = [{ - 'format_id': 'flash', - 'url': 'rtmp://fms.n-tv.de/' + vdata['video'], - }, { - 'format_id': 'mobile', - 'url': 'http://video.n-tv.de' + vdata['videoMp4'], - 'tbr': 400, # estimation - }] - m3u8_url = 'http://video.n-tv.de' + vdata['videoM3u8'] - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', - entry_protocol='m3u8_native', preference=0)) + + formats = [] + if vdata.get('video'): + formats.append({ + 'format_id': 'flash', + 'url': 'rtmp://fms.n-tv.de/%s' % vdata['video'], + }) + if vdata.get('videoMp4'): + formats.append({ + 'format_id': 'mobile', + 'url': compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoMp4']), + 'tbr': 400, # estimation + }) + if vdata.get('videoM3u8'): + m3u8_url = compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoM3u8']) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + preference=0, m3u8_id='hls', fatal=False)) self._sort_formats(formats) return { From 4654c1d01613e26d782c95b13ce60e5fdd84892a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 22:07:42 +0600 Subject: [PATCH 013/491] [orf:fm4] Extend _VALID_URL (Closes #8234) --- youtube_dl/extractor/orf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 2e6c9872b..da598e7f7 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -170,7 +170,7 @@ class ORFOE1IE(InfoExtractor): class ORFFM4IE(InfoExtractor): IE_NAME = 'orf:fm4' IE_DESC = 'radio FM4' - _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P[0-9]+)/(?P\w+)' + _VALID_URL = r'http://fm4\.orf\.at/(?:7tage/?#|player/)(?P[0-9]+)/(?P\w+)' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 986986064ec102b0d97b4ab008ae38ede6358796 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 22:11:33 +0600 Subject: [PATCH 014/491] [orf:fm4] Add test --- youtube_dl/extractor/orf.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index da598e7f7..c54775d54 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -172,6 +172,20 @@ class ORFFM4IE(InfoExtractor): IE_DESC = 'radio FM4' _VALID_URL = r'http://fm4\.orf\.at/(?:7tage/?#|player/)(?P[0-9]+)/(?P\w+)' + _TEST = { + 'url': 'http://fm4.orf.at/player/20160110/IS/', + 'md5': '01e736e8f1cef7e13246e880a59ad298', + 'info_dict': { + 'id': '2016-01-10_2100_tl_54_7DaysSun13_11244', + 'ext': 'mp3', + 'title': 'Im Sumpf', + 'description': 'md5:384c543f866c4e422a55f66a62d669cd', + 'duration': 7173, + 'timestamp': 1452456073, + 'upload_date': '20160110', + }, + } + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) show_date = mobj.group('date') From 6b559c2fbcf70158bd84b3b5892ecd5fc4b03e91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 22:12:24 +0600 Subject: [PATCH 015/491] [ntvde] Improve regex --- youtube_dl/extractor/ntvde.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dl/extractor/ntvde.py index 8268eace7..a83e85cb8 100644 --- a/youtube_dl/extractor/ntvde.py +++ b/youtube_dl/extractor/ntvde.py @@ -35,7 +35,7 @@ class NTVDeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) info = self._parse_json(self._search_regex( - r'(?s)ntv.pageInfo.article =\s(\{.*?\});', webpage, 'info'), + r'(?s)ntv\.pageInfo\.article\s*=\s*(\{.*?\});', webpage, 'info'), video_id, transform_source=js_to_json) timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp')) vdata = self._parse_json(self._search_regex( From 0baedd1851692a4b9f94c08b3eae5d57acf07f09 Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 13 Jan 2016 16:11:49 +0100 Subject: [PATCH 016/491] [prosiebensat1] add support for 7tv.de --- youtube_dl/extractor/prosiebensat1.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index baa54a3af..953df3efc 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -20,7 +20,7 @@ from ..utils import ( class ProSiebenSat1IE(InfoExtractor): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' - _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P.+)' _TESTS = [ { @@ -172,6 +172,20 @@ class ProSiebenSat1IE(InfoExtractor): }, 'playlist_count': 2, }, + { + 'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge', + 'info_dict': { + 'id': '4187506', + 'ext': 'flv', + 'title': 'Best of Circus HalliGalli', + 'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9', + 'upload_date': '20151229', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, ] _CLIPID_REGEXES = [ @@ -186,12 +200,14 @@ class ProSiebenSat1IE(InfoExtractor): r'\s*

(.+?)

', r'

\s*(.+?)

', r'
\s*

([^<]+)

\s*
', + r'

\s*(.+?)

', ] _DESCRIPTION_REGEXES = [ r'

\s*(.+?)

', r'
\s*

Beschreibung: (.+?)

', r'
\s*
\s*\s*(.+?)\s*