From 6cbb20bb090845898fcc368beed45708f05bf908 Mon Sep 17 00:00:00 2001 From: DarkstaIkers Date: Tue, 29 Mar 2016 14:26:24 -0300 Subject: [PATCH 01/18] Update crunchyroll.py --- youtube_dl/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 8ae3f2890..44c720aaa 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -184,7 +184,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style'] output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x'] output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y'] - output += """ScaledBorderAndShadow: yes + output += """ScaledBorderAndShadow: no [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding From 189935f15960300d316e8b07108b076ac6c2186a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= Date: Sat, 5 Nov 2016 06:11:51 +0100 Subject: [PATCH 02/18] [jsinterp] Fix function calls without arguments. --- test/test_jsinterp.py | 7 +++++++ youtube_dl/jsinterp.py | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 63c350b8f..a9abae5f5 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -104,6 +104,13 @@ class TestJSInterpreter(unittest.TestCase): }''') self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50]) + def test_call(self): + jsi = JSInterpreter(''' + function x() { return 2; } + function y(a) { return x() + a; } + function z() { return y(3); } + ''') + self.assertEqual(jsi.call_function('z'), 5) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 9737f7002..a8df4aef0 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -198,12 +198,12 @@ class JSInterpreter(object): return opfunc(x, y) m = re.match( - r'^(?P%s)\((?P[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr) + r'^(?P%s)\((?P[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr) if m: fname = m.group('func') argvals = tuple([ int(v) if v.isdigit() else local_vars[v] - for v in m.group('args').split(',')]) + for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple() if fname not in self._functions: self._functions[fname] = self.extract_function(fname) return self._functions[fname](argvals) From 95ad9ce573033006b08c4f1a440f3ff04c20d8b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= Date: Sat, 5 Nov 2016 05:00:09 +0100 Subject: [PATCH 03/18] [openload] Fix extraction. aadecode code was restored from commit c1decda58c812b3d0a3d4dfa998e7d8bd8f99203 with some optimizations (2x faster). Fixes #10408 --- youtube_dl/extractor/openload.py | 65 ++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index d3d4101de..7f19b1ba5 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals, division +import re + from .common import InfoExtractor from ..compat import ( compat_chr, @@ -10,6 +12,10 @@ from ..utils import ( determine_ext, ExtractorError, ) +from ..jsinterp import ( + JSInterpreter, + _NAME_RE +) class OpenloadIE(InfoExtractor): @@ -56,6 +62,44 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] + def openload_decode(self, txt): + symbol_dict = { + '(゚Д゚) [゚Θ゚]': '_', + '(゚Д゚) [゚ω゚ノ]': 'a', + '(゚Д゚) [゚Θ゚ノ]': 'b', + '(゚Д゚) [\'c\']': 'c', + '(゚Д゚) [゚ー゚ノ]': 'd', + '(゚Д゚) [゚Д゚ノ]': 'e', + '(゚Д゚) [1]': 'f', + '(゚Д゚) [\'o\']': 'o', + '(o゚ー゚o)': 'u', + '(゚Д゚) [\'c\']': 'c', + '((゚ー゚) + (o^_^o))': '7', + '((o^_^o) +(o^_^o) +(c^_^o))': '6', + '((゚ー゚) + (゚Θ゚))': '5', + '(-~3)': '4', + '(-~-~1)': '3', + '(-~1)': '2', + '(-~0)': '1', + '((c^_^o)-(c^_^o))': '0', + } + delim = '(゚Д゚)[゚ε゚]+' + end_token = '(゚Д゚)[゚o゚]' + symbols = '|'.join(map(re.escape, symbol_dict.keys())) + txt = re.sub('(%s)\+\s?' % symbols, lambda m: symbol_dict[m.group(1)], txt) + ret = '' + for aacode in re.findall(r'{0}\+\s?{1}(.*?){0}'.format(re.escape(end_token), re.escape(delim)), txt): + for aachar in aacode.split(delim): + if aachar.isdigit(): + ret += compat_chr(int(aachar, 8)) + else: + m = re.match(r'^u([\da-f]{4})$', aachar) + if m: + ret += compat_chr(int(m.group(1), 16)) + else: + self.report_warning("Cannot decode: %s" % aachar) + return ret + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) @@ -70,19 +114,26 @@ class OpenloadIE(InfoExtractor): r']*>([^<]+)\s*]*>[^<]+\s*]+id="streamurl"', webpage, 'encrypted data') - magic = compat_ord(enc_data[-1]) + enc_code = self._html_search_regex(r']+>(゚ω゚[^<]+)', + webpage, 'encrypted code') + + js_code = self.openload_decode(enc_code) + jsi = JSInterpreter(js_code) + + m_offset_fun = self._search_regex(r'slice\(0\s*-\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript offset function') + m_diff_fun = self._search_regex(r'charCodeAt\(0\)\s*\+\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript diff function') + + offset = jsi.call_function(m_offset_fun) + diff = jsi.call_function(m_diff_fun) + video_url_chars = [] for idx, c in enumerate(enc_data): j = compat_ord(c) - if j == magic: - j -= 1 - elif j == magic - 1: - j += 1 if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 - if idx == len(enc_data) - 1: - j += 3 + if idx == len(enc_data) - offset: + j += diff video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) From d7553968047d1fe8221905ea6ce7c006881ec2e3 Mon Sep 17 00:00:00 2001 From: MAA Date: Wed, 16 Nov 2016 09:00:30 +0300 Subject: [PATCH 04/18] Strip only args urls --- youtube_dl/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 643393558..ae8b83694 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -95,8 +95,7 @@ def _real_main(argv=None): write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except IOError: sys.exit('ERROR: batch file could not be read') - all_urls = batch_urls + args - all_urls = [url.strip() for url in all_urls] + all_urls = batch_urls + [url.strip() for url in args] _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] From d76767c90ec8d0edfabfaf51b7ab28182196d9dd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 16 Nov 2016 20:47:15 +0800 Subject: [PATCH 05/18] [ChangeLog] Update after #11122 landed --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 577709c44..9e9f5cfc9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [openload] Fix extraction (#10408, #11122) + + version 2016.11.14.1 Core From b2fc1c4fb965c08067cf42e5a7aaab45df8c2d5f Mon Sep 17 00:00:00 2001 From: FooBarQuaxx Date: Wed, 16 Nov 2016 18:18:54 +0300 Subject: [PATCH 06/18] Add explanatory comment --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ae8b83694..af99cf1c0 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -95,7 +95,7 @@ def _real_main(argv=None): write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except IOError: sys.exit('ERROR: batch file could not be read') - all_urls = batch_urls + [url.strip() for url in args] + all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] From 073d5bf583e8ff8ae71efca5fc6fae0743ac8961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 16 Nov 2016 23:14:06 +0700 Subject: [PATCH 07/18] [youtube:live] Relax _VALID_URL (closes #11164) --- youtube_dl/extractor/youtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 545246bcd..7ccb875a5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2175,7 +2175,7 @@ class YoutubeUserIE(YoutubeChannelIE): class YoutubeLiveIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com live streams' - _VALID_URL = r'(?Phttps?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P[^/]+))/live' + _VALID_URL = r'(?Phttps?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P[^/]+))/live' IE_NAME = 'youtube:live' _TESTS = [{ @@ -2204,6 +2204,9 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor): }, { 'url': 'https://www.youtube.com/c/CommanderVideoHq/live', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/TheYoungTurks/live', + 'only_matching': True, }] def _real_extract(self, url): From 582be3584761030bfbee13b0c6ea9e6ce2c8a790 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 17 Nov 2016 19:42:56 +0800 Subject: [PATCH 08/18] Update coding style after pycodestyle 2.1.0 In pycodestyle 2.1.0, E305 was introduced, which requires two blank lines after top level declarations, too. See https://github.com/PyCQA/pycodestyle/issues/400 See also #10689; thanks @stepshal for first mentioning this issue and initial patches --- devscripts/bash-completion.py | 1 + devscripts/fish-completion.py | 1 + devscripts/generate_aes_testdata.py | 1 + devscripts/gh-pages/update-sites.py | 1 + devscripts/make_contributing.py | 1 + devscripts/make_lazy_extractors.py | 1 + devscripts/make_supportedsites.py | 1 + devscripts/prepare_manpage.py | 1 + devscripts/zsh-completion.py | 1 + test/test_InfoExtractor.py | 1 + test/test_aes.py | 1 + test/test_download.py | 2 ++ test/test_execution.py | 1 + test/test_http.py | 1 + test/test_iqiyi_sdk_interpreter.py | 1 + test/test_jsinterp.py | 1 + test/test_utils.py | 1 + test/test_verbose_output.py | 1 + test/test_write_annotations.py | 2 ++ test/test_youtube_lists.py | 1 + test/test_youtube_signature.py | 1 + youtube_dl/__init__.py | 1 + youtube_dl/aes.py | 2 ++ youtube_dl/compat.py | 2 ++ youtube_dl/downloader/external.py | 1 + youtube_dl/socks.py | 1 + youtube_dl/swfinterp.py | 3 +++ 27 files changed, 33 insertions(+) diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py index ce68f26f9..3d1391334 100755 --- a/devscripts/bash-completion.py +++ b/devscripts/bash-completion.py @@ -25,5 +25,6 @@ def build_completion(opt_parser): filled_template = template.replace("{{flags}}", " ".join(opts_flag)) f.write(filled_template) + parser = youtube_dl.parseOpts()[0] build_completion(parser) diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py index 41629d87d..51d19dd33 100755 --- a/devscripts/fish-completion.py +++ b/devscripts/fish-completion.py @@ -44,5 +44,6 @@ def build_completion(opt_parser): with open(FISH_COMPLETION_FILE, 'w') as f: f.write(filled_template) + parser = youtube_dl.parseOpts()[0] build_completion(parser) diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py index 2e389fc8e..e3df42cc2 100644 --- a/devscripts/generate_aes_testdata.py +++ b/devscripts/generate_aes_testdata.py @@ -23,6 +23,7 @@ def openssl_encode(algo, key, iv): out, _ = prog.communicate(secret_msg) return out + iv = key = [0x20, 0x15] + 14 * [0] r = openssl_encode('aes-128-cbc', key, iv) diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py index 503c1372f..531c93c70 100755 --- a/devscripts/gh-pages/update-sites.py +++ b/devscripts/gh-pages/update-sites.py @@ -32,5 +32,6 @@ def main(): with open('supportedsites.html', 'w', encoding='utf-8') as sitesf: sitesf.write(template) + if __name__ == '__main__': main() diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py index 5e454a429..226d1a5d6 100755 --- a/devscripts/make_contributing.py +++ b/devscripts/make_contributing.py @@ -28,5 +28,6 @@ def main(): with io.open(outfile, 'w', encoding='utf-8') as outf: outf.write(out) + if __name__ == '__main__': main() diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 9a79c2bc5..19114d30d 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -59,6 +59,7 @@ def build_lazy_ie(ie, name): s += make_valid_template.format(valid_url=ie._make_valid_url()) return s + # find the correct sorting and add the required base classes so that sublcasses # can be correctly created classes = _ALL_CLASSES[:-1] diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 8cb4a4638..764795bc5 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -41,5 +41,6 @@ def main(): with io.open(outfile, 'w', encoding='utf-8') as outf: outf.write(out) + if __name__ == '__main__': main() diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index ce548739f..f9fe63f1f 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -74,5 +74,6 @@ def filter_options(readme): return ret + if __name__ == '__main__': main() diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py index 04728e8e2..60aaf76cc 100755 --- a/devscripts/zsh-completion.py +++ b/devscripts/zsh-completion.py @@ -44,5 +44,6 @@ def build_completion(opt_parser): with open(ZSH_COMPLETION_FILE, "w") as f: f.write(template) + parser = youtube_dl.parseOpts()[0] build_completion(parser) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index a98305c74..437c7270e 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -84,5 +84,6 @@ class TestInfoExtractor(unittest.TestCase): self.assertRaises(ExtractorError, self.ie._download_json, uri, None) self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) + if __name__ == '__main__': unittest.main() diff --git a/test/test_aes.py b/test/test_aes.py index 315a3f5ae..54078a66d 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -51,5 +51,6 @@ class TestAES(unittest.TestCase): decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) + if __name__ == '__main__': unittest.main() diff --git a/test/test_download.py b/test/test_download.py index a3f1c0644..463952989 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -60,6 +60,7 @@ def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() + defs = gettestcases() @@ -217,6 +218,7 @@ def generator(test_case): return test_template + # And add them to TestDownload for n, test_case in enumerate(defs): test_method = generator(test_case) diff --git a/test/test_execution.py b/test/test_execution.py index 620db080e..11661bb68 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -39,5 +39,6 @@ class TestExecution(unittest.TestCase): _, stderr = p.communicate() self.assertFalse(stderr) + if __name__ == '__main__': unittest.main() diff --git a/test/test_http.py b/test/test_http.py index bb0a098e4..7a7a3510f 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -169,5 +169,6 @@ class TestProxy(unittest.TestCase): # b'xn--fiq228c' is '中文'.encode('idna') self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') + if __name__ == '__main__': unittest.main() diff --git a/test/test_iqiyi_sdk_interpreter.py b/test/test_iqiyi_sdk_interpreter.py index 9d95cb606..789059dbe 100644 --- a/test/test_iqiyi_sdk_interpreter.py +++ b/test/test_iqiyi_sdk_interpreter.py @@ -43,5 +43,6 @@ class TestIqiyiSDKInterpreter(unittest.TestCase): ie._login() self.assertTrue('unable to log in:' in logger.messages[0]) + if __name__ == '__main__': unittest.main() diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index a9abae5f5..c24b8ca74 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -112,5 +112,6 @@ class TestJSInterpreter(unittest.TestCase): ''') self.assertEqual(jsi.call_function('z'), 5) + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index cb75ca53e..2e3cd0179 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1075,5 +1075,6 @@ The first line self.assertEqual(get_element_by_class('foo', html), 'nice') self.assertEqual(get_element_by_class('no-such-class', html), None) + if __name__ == '__main__': unittest.main() diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py index 96a66f7a0..c1465fe8c 100644 --- a/test/test_verbose_output.py +++ b/test/test_verbose_output.py @@ -66,5 +66,6 @@ class TestVerboseOutput(unittest.TestCase): self.assertTrue(b'-p' in serr) self.assertTrue(b'secret' not in serr) + if __name__ == '__main__': unittest.main() diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py index 8de08f2d6..41abdfe3b 100644 --- a/test/test_write_annotations.py +++ b/test/test_write_annotations.py @@ -24,6 +24,7 @@ class YoutubeDL(youtube_dl.YoutubeDL): super(YoutubeDL, self).__init__(*args, **kwargs) self.to_stderr = self.to_screen + params = get_params({ 'writeannotations': True, 'skip_download': True, @@ -74,5 +75,6 @@ class TestAnnotations(unittest.TestCase): def tearDown(self): try_rm(ANNOTATIONS_FILE) + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index af1c45421..7a33dbf88 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -66,5 +66,6 @@ class TestYoutubeLists(unittest.TestCase): for entry in result['entries']: self.assertTrue(entry.get('title')) + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 060864434..f0c370eee 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -114,6 +114,7 @@ def make_tfunc(url, stype, sig_input, expected_sig): test_func.__name__ = str('test_signature_' + stype + '_' + test_id) setattr(TestSignature, test_func.__name__, test_func) + for test_spec in _TESTS: make_tfunc(*test_spec) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index af99cf1c0..6850d95e1 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -449,4 +449,5 @@ def main(argv=None): except KeyboardInterrupt: sys.exit('\nERROR: Interrupted by user') + __all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index a01c367de..b8ff45481 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -174,6 +174,7 @@ def aes_decrypt_text(data, password, key_size_bytes): return plaintext + RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, @@ -328,4 +329,5 @@ def inc(data): break return data + __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index b8aaf5a46..83ee7e257 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2491,6 +2491,7 @@ class _TreeBuilder(etree.TreeBuilder): def doctype(self, name, pubid, system): pass + if sys.version_info[0] >= 3: def compat_etree_fromstring(text): return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) @@ -2787,6 +2788,7 @@ def workaround_optparse_bug9161(): return real_add_option(self, *bargs, **bkwargs) optparse.OptionGroup.add_option = _compat_add_option + if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3 compat_get_terminal_size = shutil.get_terminal_size else: diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 0aeae3b8f..5d3e5d8d3 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -293,6 +293,7 @@ class FFmpegFD(ExternalFD): class AVconvFD(FFmpegFD): pass + _BY_NAME = dict( (klass.get_basename(), klass) for name, klass in globals().items() diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index 104807242..63d19b3a5 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -103,6 +103,7 @@ class ProxyType(object): SOCKS4A = 1 SOCKS5 = 2 + Proxy = collections.namedtuple('Proxy', ( 'type', 'host', 'port', 'username', 'password', 'remote_dns')) diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 7cf490aa4..0c7158575 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -115,6 +115,8 @@ def _u30(reader): res = _read_int(reader) assert res & 0xf0000000 == 0 return res + + _u32 = _read_int @@ -176,6 +178,7 @@ class _Undefined(object): return 'undefined' __repr__ = __str__ + undefined = _Undefined() From 689f31fde52fc49c01cc8a8acd20464a3527501d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Nov 2016 00:17:46 +0700 Subject: [PATCH 09/18] [devscripts/create-github-release] Fill release body from ChangeLog (closes #11094) --- devscripts/create-github-release.py | 17 +++++++++++++---- devscripts/release.sh | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py index 3b8021e74..30716ad8e 100644 --- a/devscripts/create-github-release.py +++ b/devscripts/create-github-release.py @@ -2,11 +2,13 @@ from __future__ import unicode_literals import base64 +import io import json import mimetypes import netrc import optparse import os +import re import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -90,16 +92,23 @@ class GitHubReleaser(object): def main(): - parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH') + parser = optparse.OptionParser(usage='%prog CHANGELOG VERSION BUILDPATH') options, args = parser.parse_args() - if len(args) != 2: + if len(args) != 3: parser.error('Expected a version and a build directory') - version, build_path = args + changelog_file, version, build_path = args + + with io.open(changelog_file, encoding='utf-8') as inf: + changelog = inf.read() + + mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog) + body = mobj.group(1) if mobj else '' releaser = GitHubReleaser() - new_release = releaser.create_release(version, name='youtube-dl %s' % version) + new_release = releaser.create_release( + version, name='youtube-dl %s' % version, body=body) release_id = new_release['id'] for asset in os.listdir(build_path): diff --git a/devscripts/release.sh b/devscripts/release.sh index 1af61aa0b..4db5def5d 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -110,7 +110,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done ROOT=$(pwd) -python devscripts/create-github-release.py $version "$ROOT/build/$version" +python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version" ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" From 5f75c4a4ad5f406f9dc01bd872f3be54de87b23d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Nov 2016 00:19:55 +0700 Subject: [PATCH 10/18] [ChangeLog] Actualize --- ChangeLog | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ChangeLog b/ChangeLog index 9e9f5cfc9..99c9c1cf3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,9 @@ version Extractors +* [youtube:live] Relax _VALID_URL (#11164) * [openload] Fix extraction (#10408, #11122) +* [vlive] Prefer locale over language for subtitles id (#11203) version 2016.11.14.1 From b25459b88ae8faf072046eb9c23af0dcda7c1408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Nov 2016 00:25:24 +0700 Subject: [PATCH 11/18] release 2016.11.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index fef9fc7a2..85ac137a1 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.14.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.14.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.14.1 +[debug] youtube-dl version 2016.11.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 99c9c1cf3..874230f42 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.18 Extractors * [youtube:live] Relax _VALID_URL (#11164) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9557b2000..ef9ccc08a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.14.1' +__version__ = '2016.11.18' From c131fc3372c4fc69434dbb7c79935c1587beff20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Nov 2016 01:16:33 +0700 Subject: [PATCH 12/18] [tvanouvelles] Add extractor (closes #10616) --- youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/tvanouvelles.py | 65 ++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 youtube_dl/extractor/tvanouvelles.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 578359a5e..9107f0b96 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -965,6 +965,10 @@ from .tv2 import ( ) from .tv3 import TV3IE from .tv4 import TV4IE +from .tvanouvelles import ( + TVANouvellesIE, + TVANouvellesArticleIE, +) from .tvc import ( TVCIE, TVCArticleIE, diff --git a/youtube_dl/extractor/tvanouvelles.py b/youtube_dl/extractor/tvanouvelles.py new file mode 100644 index 000000000..1086176a2 --- /dev/null +++ b/youtube_dl/extractor/tvanouvelles.py @@ -0,0 +1,65 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .brightcove import BrightcoveNewIE + + +class TVANouvellesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/videos/(?P\d+)' + _TEST = { + 'url': 'http://www.tvanouvelles.ca/videos/5117035533001', + 'info_dict': { + 'id': '5117035533001', + 'ext': 'mp4', + 'title': 'L’industrie du taxi dénonce l’entente entre Québec et Uber: explications', + 'description': 'md5:479653b7c8cf115747bf5118066bd8b3', + 'uploader_id': '1741764581', + 'timestamp': 1473352030, + 'upload_date': '20160908', + }, + 'add_ie': ['BrightcoveNew'], + } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1741764581/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + brightcove_id = self._match_id(url) + return self.url_result( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + BrightcoveNewIE.ie_key(), brightcove_id) + + +class TVANouvellesArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/(?:[^/]+/)+(?P[^/?#&]+)' + _TEST = { + 'url': 'http://www.tvanouvelles.ca/2016/11/17/des-policiers-qui-ont-la-meche-un-peu-courte', + 'info_dict': { + 'id': 'des-policiers-qui-ont-la-meche-un-peu-courte', + 'title': 'Des policiers qui ont «la mèche un peu courte»?', + 'description': 'md5:92d363c8eb0f0f030de9a4a84a90a3a0', + }, + 'playlist_mincount': 4, + } + + @classmethod + def suitable(cls, url): + return False if TVANouvellesIE.suitable(url) else super(TVANouvellesArticleIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + entries = [ + self.url_result( + 'http://www.tvanouvelles.ca/videos/%s' % mobj.group('id'), + ie=TVANouvellesIE.ie_key(), video_id=mobj.group('id')) + for mobj in re.finditer( + r'data-video-id=(["\'])?(?P\d+)', webpage)] + + title = self._og_search_title(webpage, fatal=False) + description = self._og_search_description(webpage) + + return self.playlist_result(entries, display_id, title, description) From 748a462fbecc9c006d8e9ed6b3f596ff1893cf39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Nov 2016 01:49:13 +0700 Subject: [PATCH 13/18] [twitter:card] Relax _VALID_URL (closes #11225) --- youtube_dl/extractor/twitter.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 3411fcf7e..ac0b221b4 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -25,7 +25,7 @@ class TwitterBaseIE(InfoExtractor): class TwitterCardIE(TwitterBaseIE): IE_NAME = 'twitter:card' - _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos/tweet)/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P\d+)' _TESTS = [ { 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', @@ -84,6 +84,9 @@ class TwitterCardIE(TwitterBaseIE): 'title': 'Twitter web player', 'thumbnail': 're:^https?://.*\.jpg', }, + }, { + 'url': 'https://twitter.com/i/videos/752274308186120192', + 'only_matching': True, }, ] From df46b19cb82b90807693d0d25ac5d817546dd63b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Nov 2016 01:56:31 +0700 Subject: [PATCH 14/18] [toutv] Fix login form regex (closes #11223) --- youtube_dl/extractor/toutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 573f2ff6b..26d770992 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -56,7 +56,7 @@ class TouTvIE(InfoExtractor): 'state': state, }) login_form = self._search_regex( - r'(?s)(]+id="Form-login".+?)', login_webpage, 'login form') + r'(?s)(]+(?:id|name)="Form-login".+?)', login_webpage, 'login form') form_data = self._hidden_inputs(login_form) form_data.update({ 'login-email': email, From 08ec95a6dba54aeec398c99f422abb2a5b59a7e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Nov 2016 03:10:20 +0700 Subject: [PATCH 15/18] [ChangeLog] Actualize --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 874230f42..15129419c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,7 @@ version 2016.11.18 Extractors -* [youtube:live] Relax _VALID_URL (#11164) +* [youtube:live] Relax URL regular expression (#11164) * [openload] Fix extraction (#10408, #11122) * [vlive] Prefer locale over language for subtitles id (#11203) From 0aacd2deb1075e0d4d4b8b23b9a65b3967a1d658 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Nov 2016 04:18:21 +0700 Subject: [PATCH 16/18] [bandcamp] Fix free downloads extraction and extract all formats (closes #11067) --- youtube_dl/extractor/bandcamp.py | 86 +++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 249c3d956..88c590e98 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -1,7 +1,9 @@ from __future__ import unicode_literals import json +import random import re +import time from .common import InfoExtractor from ..compat import ( @@ -12,6 +14,9 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + parse_filesize, + unescapeHTML, + update_url_query, ) @@ -81,35 +86,68 @@ class BandcampIE(InfoExtractor): r'(?ms)var TralbumData = .*?[{,]\s*id: (?P\d+),?$', webpage, 'video id') - download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') - # We get the dictionary of the track from some javascript code - all_info = self._parse_json(self._search_regex( - r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id) - info = all_info[0] - # We pick mp3-320 for now, until format selection can be easily implemented. - mp3_info = info['downloads']['mp3-320'] - # If we try to use this url it says the link has expired - initial_url = mp3_info['url'] - m_url = re.match( - r'(?Phttp://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P.*?)&id=(?P.*?)&ts=(?P.*)$', - initial_url) - # We build the url we will use to get the final track url - # This url is build in Bandcamp in the script download_bunde_*.js - request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts')) - final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url') - # If we could correctly generate the .rand field the url would be - # in the "download_url" key - final_url = self._proto_relative_url(self._search_regex( - r'"retry_url":"(.+?)"', final_url_webpage, 'final video URL'), 'http:') + download_webpage = self._download_webpage( + download_link, video_id, 'Downloading free downloads page') + + blob = self._parse_json( + self._search_regex( + r'data-blob=(["\'])(?P{.+?})\1', download_webpage, + 'blob', group='blob'), + video_id, transform_source=unescapeHTML) + + info = blob['digital_items'][0] + + downloads = info['downloads'] + track = info['title'] + + artist = info.get('artist') + title = '%s - %s' % (artist, track) if artist else track + + download_formats = {} + for f in blob['download_formats']: + name, ext = f.get('name'), f.get('file_extension') + if all(isinstance(x, compat_str) for x in (name, ext)): + download_formats[name] = ext.strip('.') + + formats = [] + for format_id, f in downloads.items(): + format_url = f.get('url') + if not format_url: + continue + # Stat URL generation algorithm is reverse engineered from + # download_*_bundle_*.js + stat_url = update_url_query( + format_url.replace('/download/', '/statdownload/'), { + '.rand': int(time.time() * 1000 * random.random()), + }) + format_id = f.get('encoding_name') or format_id + stat = self._download_json( + stat_url, video_id, 'Downloading %s JSON' % format_id, + transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1], + fatal=False) + if not stat: + continue + retry_url = stat.get('retry_url') + if not isinstance(retry_url, compat_str): + continue + formats.append({ + 'url': self._proto_relative_url(retry_url, 'http:'), + 'ext': download_formats.get(format_id), + 'format_id': format_id, + 'format_note': f.get('description'), + 'filesize': parse_filesize(f.get('size_mb')), + 'vcodec': 'none', + }) + self._sort_formats(formats) return { 'id': video_id, - 'title': info['title'], - 'ext': 'mp3', - 'vcodec': 'none', - 'url': final_url, + 'title': title, 'thumbnail': info.get('thumb_url'), 'uploader': info.get('artist'), + 'artist': artist, + 'track': track, + 'formats': formats, } From 303b38fa84eee94a51961e5273b4dbe174266d26 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 20 Nov 2016 00:06:44 +0800 Subject: [PATCH 17/18] [ChangeLog] Update for #9028 --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 15129419c..0d8174408 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +- [Crunchyroll] ScaledBorderAndShadow are removed from ASS subtitles + (#8207, #9028) + version 2016.11.18 Extractors From 8f8f182d0baf04c31e95582aa7eedea940e0cdd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Nov 2016 02:13:21 +0700 Subject: [PATCH 18/18] [extractor/generic] Improve limelight embeds support --- youtube_dl/extractor/generic.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index bde65fa27..f9707c155 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2232,6 +2232,16 @@ class GenericIE(InfoExtractor): return self.url_result('limelight:%s:%s' % ( lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2)) + mobj = re.search( + r'''(?sx) + ]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*? + ]+ + name=(["\'])flashVars\2[^>]+ + value=(["\'])(?:(?!\3).)*mediaId=(?P[a-z0-9]{32}) + ''', webpage) + if mobj: + return self.url_result('limelight:media:%s' % mobj.group('id')) + # Look for AdobeTVVideo embeds mobj = re.search( r']+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',