From c9d51b2a20d175abab9ef306ae806f76624f8348 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Tue, 2 Jun 2015 23:59:59 +0200 Subject: [PATCH 1/3] [NativeHlsFD] Add support for encrypted media --- youtube_dl/downloader/hls.py | 125 ++++++++++++++++++++++++++++++----- 1 file changed, 107 insertions(+), 18 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8be4f4249..87a13b410 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -13,7 +13,10 @@ from ..compat import ( from ..utils import ( encodeArgument, encodeFilename, + bytes_to_intlist, + intlist_to_bytes ) +from ..aes import aes_cbc_decrypt class HlsFD(FileDownloader): @@ -55,6 +58,14 @@ class NativeHlsFD(FileDownloader): """ A more limited implementation that does not require ffmpeg """ def real_download(self, filename, info_dict): + def convert_to_big_endian(value, size): + big_endian = [0] * size + for i in range(size): + block = value % 256 + value //= 256 + big_endian[size - 1 - i] = block + return big_endian + url = info_dict['url'] self.report_destination(filename) tmpfilename = self.temp_name(filename) @@ -62,37 +73,115 @@ class NativeHlsFD(FileDownloader): self.to_screen( '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id']) data = self.ydl.urlopen(url).read() - s = data.decode('utf-8', 'ignore') - segment_urls = [] - for line in s.splitlines(): - line = line.strip() - if line and not line.startswith('#'): - segment_url = ( - line - if re.match(r'^https?://', line) - else compat_urlparse.urljoin(url, line)) - segment_urls.append(segment_url) + m3u8_data = data.decode('utf-8', 'ignore') + + segment_count = 0 + for m3u8_line in m3u8_data.splitlines(): + m3u8_line = m3u8_line.strip() + if m3u8_line and not m3u8_line.startswith('#'): + segment_count += 1 is_test = self.params.get('test', False) remaining_bytes = self._TEST_FILE_SIZE if is_test else None byte_counter = 0 - with open(tmpfilename, 'wb') as outf: - for i, segurl in enumerate(segment_urls): - self.to_screen( - '[hlsnative] %s: Downloading segment %d / %d' % - (info_dict['id'], i + 1, len(segment_urls))) - seg_req = compat_urllib_request.Request(segurl) + media_sequence = 0 + # Function to decrypt segment or None + decrypt_fn = None + segment_index = 0 + for m3u8_line in m3u8_data.splitlines(): + m3u8_line = m3u8_line.strip() + mo = re.match(r'^#\s*EXT-X-MEDIA-SEQUENCE\s*:\s*(?P\d+)$', + m3u8_line, re.IGNORECASE) + if mo: + media_sequence = int(mo.group('seq')) + continue + mo = re.match(r'^#\s*EXT-X-KEY\s*:\s*' + # METHOD + r'METHOD\s*=\s*(?P(?PAES-128)|NONE)' + r'(?(AES)' # if AES + r'\s*,\s*' + # URI + r'URI\s*=\s*' + r'(?P["\'])' + r'(?P.*?)' + r'(?P=uri_delim)' + # IV (optional) + r'(?:' + r'\s*,\s*' + # IV + r'IV\s*=\s*' + r'(?:0X)?' + r'(?P[0-9a-f]+)' + r')?' + r'|' # else + r'$' + r')', m3u8_line, re.IGNORECASE) + if mo: + _type = mo.group('type').upper() + if _type == 'NONE': + decrypt_fn = None + elif _type == 'AES-128': + self.to_screen( + '[hlsnative] %s: Downloading encryption key' % + (info_dict['id'])) + key = bytes_to_intlist(self.ydl.urlopen(mo.group('uri')).read()) + if len(key) != 16: + self.report_warning('Invalid encryption key') + continue + if mo.group('iv'): + iv = int(mo.group('iv'), 16) + iv_big_endian = convert_to_big_endian(iv, 16) + decrypt_fn = lambda data: aes_cbc_decrypt(bytes_to_intlist(data), key, iv_big_endian) + else: + decrypt_fn = lambda data: aes_cbc_decrypt(bytes_to_intlist(data), key, + convert_to_big_endian(media_sequence, 16)) + continue + + if m3u8_line and not m3u8_line.startswith('#'): + segment_url = ( + m3u8_line + if re.match(r'^https?://', m3u8_line) + else compat_urlparse.urljoin(url, m3u8_line)) + + seg_req = compat_urllib_request.Request(segment_url) if remaining_bytes is not None: seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) - segment = self.ydl.urlopen(seg_req).read() + segment_filename = '%s-%d' % (tmpfilename, segment_index) + if os.path.exists(segment_filename): + self.to_screen( + '[hlsnative] %s: Segment content already downloaded %d / %d' % + (info_dict['id'], segment_index + 1, segment_count)) + with open(segment_filename, "rb") as inf: + segment = inf.read() + else: + self.to_screen( + '[hlsnative] %s: Downloading segment %d / %d' % + (info_dict['id'], segment_index + 1, segment_count)) + segment = self.ydl.urlopen(seg_req).read() + if decrypt_fn is not None: + segment = intlist_to_bytes(decrypt_fn(segment)) if remaining_bytes is not None: segment = segment[:remaining_bytes] remaining_bytes -= len(segment) - outf.write(segment) + with open(segment_filename, 'wb') as outf: + outf.write(segment) + + segment_index += 1 + media_sequence += 1 + byte_counter += len(segment) if remaining_bytes is not None and remaining_bytes <= 0: break + segment_count = segment_index + + # Concatenate segments + segment_filenames = ['%s-%d' % (tmpfilename, segment_index) for segment_index in range(segment_count)] + with open(tmpfilename, "wb") as outf: + for segment_filename in segment_filenames: + with open(segment_filename, "rb") as inf: + outf.write(inf.read()) + os.remove(segment_filename) self._hook_progress({ 'downloaded_bytes': byte_counter, From b4b1d4be3ed9e9da8217921e9be8b6c8c3cecc91 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Wed, 3 Jun 2015 00:00:29 +0200 Subject: [PATCH 2/3] [aes.py] Use Crypto.Cipher.AES if available --- youtube_dl/aes.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index 7817adcfd..459a76457 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -2,6 +2,10 @@ from __future__ import unicode_literals import base64 from math import ceil +try: + from Crypto.Cipher import AES +except: + pass from .utils import bytes_to_intlist, intlist_to_bytes @@ -18,6 +22,12 @@ def aes_ctr_decrypt(data, key, counter): returns the next counter block @returns {int[]} decrypted data """ + if 'AES' in globals(): + obj = AES.new(intlist_to_bytes(key), AES.MODE_CTR, + counter = lambda: intlist_to_bytes(counter.next_value())) + decrypted_data = obj.decrypt(intlist_to_bytes(data)) + return bytes_to_intlist(decrypted_data) + expanded_key = key_expansion(key) block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) @@ -43,15 +53,20 @@ def aes_cbc_decrypt(data, key, iv): @param {int[]} iv 16-Byte IV @returns {int[]} decrypted data """ - expanded_key = key_expansion(key) block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + data += [0] * (block_count * BLOCK_SIZE_BYTES - len(data)) + + if 'AES' in globals(): + obj = AES.new(intlist_to_bytes(key), AES.MODE_CBC, intlist_to_bytes(iv)) + decrypted_data = obj.decrypt(intlist_to_bytes(data)) + return bytes_to_intlist(decrypted_data) + + expanded_key = key_expansion(key) decrypted_data = [] previous_cipher_block = iv for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] - block += [0] * (BLOCK_SIZE_BYTES - len(block)) - decrypted_block = aes_decrypt(block, expanded_key) decrypted_data += xor(decrypted_block, previous_cipher_block) previous_cipher_block = block From 2d129b7d4b7da6354e3b0708cebde42fcb7f9a43 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Wed, 3 Jun 2015 16:42:44 +0200 Subject: [PATCH 3/3] [aes.py] Optimize --- youtube_dl/aes.py | 128 +++++++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 63 deletions(-) diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index 459a76457..e323b9a32 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -35,11 +35,9 @@ def aes_ctr_decrypt(data, key, counter): for i in range(block_count): counter_block = counter.next_value() block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] - block += [0] * (BLOCK_SIZE_BYTES - len(block)) cipher_counter_block = aes_encrypt(counter_block, expanded_key) decrypted_data += xor(block, cipher_counter_block) - decrypted_data = decrypted_data[:len(data)] return decrypted_data @@ -118,15 +116,31 @@ def aes_encrypt(data, expanded_key): @param {int[]} expanded_key 176/208/240-Byte expanded key @returns {int[]} 16-Byte cipher """ + precompute_rijndael_mul() rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 - - data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) - for i in range(1, rounds + 1): - data = sub_bytes(data) - data = shift_rows(data) - if i != rounds: - data = mix_columns(data) - data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) + # xor + data = [x ^ y for x, y in zip(data, expanded_key[:BLOCK_SIZE_BYTES])] + for _round in range(1, rounds + 1): + # sub bytes + data = [SBOX[x] for x in data] + # shift rows + data_shifted = [0] * 16 + for column in range(4): + for row in range(4): + data_shifted[column*4 + row] = data[((column + row) & 0b11) * 4 + row] + data = data_shifted + if _round != rounds: + # mix columns + for j in range(0,16,4): + column_data = data[j:j + 4] + for row in range(4): + mixed = 0 + for column in range(4): + # xor is (+) and (-) + mixed ^= rijndael_mul_precomputed[column_data[column]][MIX_COLUMN_MATRIX[row][column]] + data[j+row] = mixed + # xor + data = [x ^ y for x, y in zip(data, expanded_key[_round * BLOCK_SIZE_BYTES: (_round + 1) * BLOCK_SIZE_BYTES])] return data @@ -139,15 +153,31 @@ def aes_decrypt(data, expanded_key): @param {int[]} expanded_key 176/208/240-Byte expanded key @returns {int[]} 16-Byte state """ + precompute_rijndael_mul() rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 - - for i in range(rounds, 0, -1): - data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) - if i != rounds: - data = mix_columns_inv(data) - data = shift_rows_inv(data) - data = sub_bytes_inv(data) - data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) + for _round in range(rounds, 0, -1): + # xor + data = [x ^ y for x, y in zip(data, expanded_key[_round * BLOCK_SIZE_BYTES: (_round + 1) * BLOCK_SIZE_BYTES])] + if _round != rounds: + # mix columns + for j in range(0,16,4): + column_data = data[j:j + 4] + for row in range(4): + mixed = 0 + for column in range(4): + # xor is (+) and (-) + mixed ^= rijndael_mul_precomputed[column_data[column]][MIX_COLUMN_MATRIX_INV[row][column]] + data[j+row] = mixed + # shift rows inv + data_shifted = [0] * 16 + for column in range(4): + for row in range(4): + data_shifted[column*4 + row] = data[((column - row) & 0b11) * 4 + row] + data = data_shifted + # sub bytes + data = [SBOX_INV[x] for x in data] + # xor + data = [x ^ y for x, y in zip(data, expanded_key[:BLOCK_SIZE_BYTES])] return data @@ -262,6 +292,23 @@ RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07) +rijndael_mul_precomputed = None + + +def precompute_rijndael_mul(): + global rijndael_mul_precomputed + if rijndael_mul_precomputed is not None: + return + rijndael_mul_precomputed = [[0] * 256 for _ in range(256)] + for i in range(256): + for j in range(256): + rijndael_mul_precomputed[i][j] = rijndael_mul(i, j) + + +def rijndael_mul(a, b): + if(a == 0 or b == 0): + return 0 + return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF] def sub_bytes(data): @@ -288,51 +335,6 @@ def xor(data1, data2): return [x ^ y for x, y in zip(data1, data2)] -def rijndael_mul(a, b): - if(a == 0 or b == 0): - return 0 - return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF] - - -def mix_column(data, matrix): - data_mixed = [] - for row in range(4): - mixed = 0 - for column in range(4): - # xor is (+) and (-) - mixed ^= rijndael_mul(data[column], matrix[row][column]) - data_mixed.append(mixed) - return data_mixed - - -def mix_columns(data, matrix=MIX_COLUMN_MATRIX): - data_mixed = [] - for i in range(4): - column = data[i * 4: (i + 1) * 4] - data_mixed += mix_column(column, matrix) - return data_mixed - - -def mix_columns_inv(data): - return mix_columns(data, MIX_COLUMN_MATRIX_INV) - - -def shift_rows(data): - data_shifted = [] - for column in range(4): - for row in range(4): - data_shifted.append(data[((column + row) & 0b11) * 4 + row]) - return data_shifted - - -def shift_rows_inv(data): - data_shifted = [] - for column in range(4): - for row in range(4): - data_shifted.append(data[((column - row) & 0b11) * 4 + row]) - return data_shifted - - def inc(data): data = data[:] # copy for i in range(len(data) - 1, -1, -1):