From c9d51b2a20d175abab9ef306ae806f76624f8348 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Tue, 2 Jun 2015 23:59:59 +0200 Subject: [PATCH] [NativeHlsFD] Add support for encrypted media --- youtube_dl/downloader/hls.py | 125 ++++++++++++++++++++++++++++++----- 1 file changed, 107 insertions(+), 18 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8be4f4249..87a13b410 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -13,7 +13,10 @@ from ..compat import ( from ..utils import ( encodeArgument, encodeFilename, + bytes_to_intlist, + intlist_to_bytes ) +from ..aes import aes_cbc_decrypt class HlsFD(FileDownloader): @@ -55,6 +58,14 @@ class NativeHlsFD(FileDownloader): """ A more limited implementation that does not require ffmpeg """ def real_download(self, filename, info_dict): + def convert_to_big_endian(value, size): + big_endian = [0] * size + for i in range(size): + block = value % 256 + value //= 256 + big_endian[size - 1 - i] = block + return big_endian + url = info_dict['url'] self.report_destination(filename) tmpfilename = self.temp_name(filename) @@ -62,37 +73,115 @@ class NativeHlsFD(FileDownloader): self.to_screen( '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id']) data = self.ydl.urlopen(url).read() - s = data.decode('utf-8', 'ignore') - segment_urls = [] - for line in s.splitlines(): - line = line.strip() - if line and not line.startswith('#'): - segment_url = ( - line - if re.match(r'^https?://', line) - else compat_urlparse.urljoin(url, line)) - segment_urls.append(segment_url) + m3u8_data = data.decode('utf-8', 'ignore') + + segment_count = 0 + for m3u8_line in m3u8_data.splitlines(): + m3u8_line = m3u8_line.strip() + if m3u8_line and not m3u8_line.startswith('#'): + segment_count += 1 is_test = self.params.get('test', False) remaining_bytes = self._TEST_FILE_SIZE if is_test else None byte_counter = 0 - with open(tmpfilename, 'wb') as outf: - for i, segurl in enumerate(segment_urls): - self.to_screen( - '[hlsnative] %s: Downloading segment %d / %d' % - (info_dict['id'], i + 1, len(segment_urls))) - seg_req = compat_urllib_request.Request(segurl) + media_sequence = 0 + # Function to decrypt segment or None + decrypt_fn = None + segment_index = 0 + for m3u8_line in m3u8_data.splitlines(): + m3u8_line = m3u8_line.strip() + mo = re.match(r'^#\s*EXT-X-MEDIA-SEQUENCE\s*:\s*(?P\d+)$', + m3u8_line, re.IGNORECASE) + if mo: + media_sequence = int(mo.group('seq')) + continue + mo = re.match(r'^#\s*EXT-X-KEY\s*:\s*' + # METHOD + r'METHOD\s*=\s*(?P(?PAES-128)|NONE)' + r'(?(AES)' # if AES + r'\s*,\s*' + # URI + r'URI\s*=\s*' + r'(?P["\'])' + r'(?P.*?)' + r'(?P=uri_delim)' + # IV (optional) + r'(?:' + r'\s*,\s*' + # IV + r'IV\s*=\s*' + r'(?:0X)?' + r'(?P[0-9a-f]+)' + r')?' + r'|' # else + r'$' + r')', m3u8_line, re.IGNORECASE) + if mo: + _type = mo.group('type').upper() + if _type == 'NONE': + decrypt_fn = None + elif _type == 'AES-128': + self.to_screen( + '[hlsnative] %s: Downloading encryption key' % + (info_dict['id'])) + key = bytes_to_intlist(self.ydl.urlopen(mo.group('uri')).read()) + if len(key) != 16: + self.report_warning('Invalid encryption key') + continue + if mo.group('iv'): + iv = int(mo.group('iv'), 16) + iv_big_endian = convert_to_big_endian(iv, 16) + decrypt_fn = lambda data: aes_cbc_decrypt(bytes_to_intlist(data), key, iv_big_endian) + else: + decrypt_fn = lambda data: aes_cbc_decrypt(bytes_to_intlist(data), key, + convert_to_big_endian(media_sequence, 16)) + continue + + if m3u8_line and not m3u8_line.startswith('#'): + segment_url = ( + m3u8_line + if re.match(r'^https?://', m3u8_line) + else compat_urlparse.urljoin(url, m3u8_line)) + + seg_req = compat_urllib_request.Request(segment_url) if remaining_bytes is not None: seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) - segment = self.ydl.urlopen(seg_req).read() + segment_filename = '%s-%d' % (tmpfilename, segment_index) + if os.path.exists(segment_filename): + self.to_screen( + '[hlsnative] %s: Segment content already downloaded %d / %d' % + (info_dict['id'], segment_index + 1, segment_count)) + with open(segment_filename, "rb") as inf: + segment = inf.read() + else: + self.to_screen( + '[hlsnative] %s: Downloading segment %d / %d' % + (info_dict['id'], segment_index + 1, segment_count)) + segment = self.ydl.urlopen(seg_req).read() + if decrypt_fn is not None: + segment = intlist_to_bytes(decrypt_fn(segment)) if remaining_bytes is not None: segment = segment[:remaining_bytes] remaining_bytes -= len(segment) - outf.write(segment) + with open(segment_filename, 'wb') as outf: + outf.write(segment) + + segment_index += 1 + media_sequence += 1 + byte_counter += len(segment) if remaining_bytes is not None and remaining_bytes <= 0: break + segment_count = segment_index + + # Concatenate segments + segment_filenames = ['%s-%d' % (tmpfilename, segment_index) for segment_index in range(segment_count)] + with open(tmpfilename, "wb") as outf: + for segment_filename in segment_filenames: + with open(segment_filename, "rb") as inf: + outf.write(inf.read()) + os.remove(segment_filename) self._hook_progress({ 'downloaded_bytes': byte_counter,