From 1eba6cdd123f7291f0daebb3a343030caee89229 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 22 Jul 2015 20:03:05 +0800 Subject: [PATCH 1/3] [utils] Add function_pool --- youtube_dl/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 942f76d24..8bc78d0f2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -68,6 +68,8 @@ ENGLISH_MONTH_NAMES = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] +function_pool = {} + def preferredencoding(): """Get preferred encoding. From a714f57bf2ef2178c1fa0ae3c9eef7484082ce8f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 22 Jul 2015 20:03:29 +0800 Subject: [PATCH 2/3] [downloader/hls] Support preprocessing of m3u8 manifests --- youtube_dl/downloader/hls.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8be4f4249..f4424cb6d 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -13,6 +13,7 @@ from ..compat import ( from ..utils import ( encodeArgument, encodeFilename, + function_pool, ) @@ -28,9 +29,20 @@ class HlsFD(FileDownloader): return False ffpp.check_version() + input_arg = url + if info_dict.get('hls_transform_source_key'): + transform_source = function_pool[info_dict['hls_transform_source_key']] + self.to_screen( + '[hls] %s: Downloading m3u8 manifest' % info_dict['id']) + data = self.ydl.urlopen(url).read() + data = transform_source(data) + input_arg = '%s.m3u8' % filename + with open(input_arg, 'wb') as f: + f.write(data) + args = [ encodeArgument(opt) - for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] + for opt in (ffpp.executable, '-y', '-i', input_arg, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] args.append(encodeFilename(tmpfilename, True)) retval = subprocess.call(args) @@ -44,6 +56,8 @@ class HlsFD(FileDownloader): 'filename': filename, 'status': 'finished', }) + if info_dict.get('hls_transform_source_key'): + os.remove(input_arg) return True else: self.to_stderr('\n') @@ -62,6 +76,9 @@ class NativeHlsFD(FileDownloader): self.to_screen( '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id']) data = self.ydl.urlopen(url).read() + if info_dict.get('hls_transform_source_key'): + transform_source = function_pool.get(info_dict['hls_transform_source_key']) + data = transform_source(data) s = data.decode('utf-8', 'ignore') segment_urls = [] for line in s.splitlines(): From fc20ac489648c5e718018d27f96235ce99521116 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 22 Jul 2015 20:03:50 +0800 Subject: [PATCH 3/3] [letv] Fix extraction (closes #6192) --- youtube_dl/extractor/letv.py | 70 ++++++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index ba2ae8085..42ca5e00f 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -9,12 +9,13 @@ from .common import InfoExtractor from ..compat import ( compat_urllib_parse, compat_urllib_request, - compat_urlparse, + compat_ord, ) from ..utils import ( determine_ext, ExtractorError, parse_iso8601, + function_pool, ) @@ -24,15 +25,16 @@ class LetvIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.letv.com/ptv/vplay/22005890.html', - 'md5': 'cab23bd68d5a8db9be31c9a222c1e8df', + 'md5': 'edadcfe5406976f42f9f266057ee5e40', 'info_dict': { 'id': '22005890', 'ext': 'mp4', 'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家', - 'timestamp': 1424747397, - 'upload_date': '20150224', 'description': 'md5:a9cb175fd753e2962176b7beca21a47c', - } + }, + 'params': { + 'hls_prefer_native': True, + }, }, { 'url': 'http://www.letv.com/ptv/vplay/1415246.html', 'info_dict': { @@ -41,19 +43,31 @@ class LetvIE(InfoExtractor): 'title': '美人天下01', 'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda', }, + 'params': { + 'hls_prefer_native': True, + }, }, { 'note': 'This video is available only in Mainland China, thus a proxy is needed', 'url': 'http://www.letv.com/ptv/vplay/1118082.html', - 'md5': 'f80936fbe20fb2f58648e81386ff7927', + 'md5': '2424c74948a62e5f31988438979c5ad1', 'info_dict': { 'id': '1118082', 'ext': 'mp4', 'title': '与龙共舞 完整版', 'description': 'md5:7506a5eeb1722bb9d4068f85024e3986', }, + 'params': { + 'hls_prefer_native': True, + }, 'skip': 'Only available in China', }] + def _real_initialize(self): + self.register_functions() + + def register_functions(self): + function_pool['LetvIE.decrypt_m3u8'] = self.decrypt_m3u8 + @staticmethod def urshift(val, n): return val >> n if val >= 0 else (val + 0x100000000) >> n @@ -73,6 +87,27 @@ class LetvIE(InfoExtractor): _loc3_ = self.ror(_loc3_, _loc2_ % 17) return _loc3_ + # see M3U8Encryption class in KLetvPlayer.swf + @staticmethod + def decrypt_m3u8(encrypted_data): + if encrypted_data[:5].decode('utf-8').lower() != 'vc_01': + return encrypted_data + encrypted_data = encrypted_data[5:] + + _loc4_ = bytearray() + while encrypted_data: + b = compat_ord(encrypted_data[0]) + _loc4_.extend([b // 16, b & 0x0f]) + encrypted_data = encrypted_data[1:] + idx = len(_loc4_) - 11 + _loc4_ = _loc4_[idx:] + _loc4_[:idx] + _loc7_ = bytearray() + while _loc4_: + _loc7_.append(_loc4_[0] * 16 + _loc4_[1]) + _loc4_ = _loc4_[2:] + + return bytes(_loc7_) + def _real_extract(self, url): media_id = self._match_id(url) page = self._download_webpage(url, media_id) @@ -114,23 +149,22 @@ class LetvIE(InfoExtractor): for format_id in formats: if format_id in dispatch: media_url = playurl['domain'][0] + dispatch[format_id][0] - - # Mimic what flvxz.com do - url_parts = list(compat_urlparse.urlparse(media_url)) - qs = dict(compat_urlparse.parse_qs(url_parts[4])) - qs.update({ - 'platid': '14', - 'splatid': '1401', - 'tss': 'no', - 'retry': 1 + media_url += '&' + compat_urllib_parse.urlencode({ + 'm3v': 1, + 'format': 1, + 'expect': 3, + 'rateid': format_id, }) - url_parts[4] = compat_urllib_parse.urlencode(qs) - media_url = compat_urlparse.urlunparse(url_parts) + + nodes_data = self._download_json( + media_url, media_id, + 'Download JSON metadata for format %s' % format_id) url_info_dict = { - 'url': media_url, + 'url': nodes_data['nodelist'][0]['location'], 'ext': determine_ext(dispatch[format_id][1]), 'format_id': format_id, + 'hls_transform_source_key': 'LetvIE.decrypt_m3u8', } if format_id[-1:] == 'p':