From 6cbb20bb090845898fcc368beed45708f05bf908 Mon Sep 17 00:00:00 2001 From: DarkstaIkers Date: Tue, 29 Mar 2016 14:26:24 -0300 Subject: [PATCH 01/80] Update crunchyroll.py --- youtube_dl/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 8ae3f2890..44c720aaa 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -184,7 +184,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style'] output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x'] output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y'] - output += """ScaledBorderAndShadow: yes + output += """ScaledBorderAndShadow: no [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding From 189935f15960300d316e8b07108b076ac6c2186a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= Date: Sat, 5 Nov 2016 06:11:51 +0100 Subject: [PATCH 02/80] [jsinterp] Fix function calls without arguments. --- test/test_jsinterp.py | 7 +++++++ youtube_dl/jsinterp.py | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 63c350b8f..a9abae5f5 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -104,6 +104,13 @@ class TestJSInterpreter(unittest.TestCase): }''') self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50]) + def test_call(self): + jsi = JSInterpreter(''' + function x() { return 2; } + function y(a) { return x() + a; } + function z() { return y(3); } + ''') + self.assertEqual(jsi.call_function('z'), 5) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 9737f7002..a8df4aef0 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -198,12 +198,12 @@ class JSInterpreter(object): return opfunc(x, y) m = re.match( - r'^(?P%s)\((?P[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr) + r'^(?P%s)\((?P[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr) if m: fname = m.group('func') argvals = tuple([ int(v) if v.isdigit() else local_vars[v] - for v in m.group('args').split(',')]) + for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple() if fname not in self._functions: self._functions[fname] = self.extract_function(fname) return self._functions[fname](argvals) From 95ad9ce573033006b08c4f1a440f3ff04c20d8b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= Date: Sat, 5 Nov 2016 05:00:09 +0100 Subject: [PATCH 03/80] [openload] Fix extraction. aadecode code was restored from commit c1decda58c812b3d0a3d4dfa998e7d8bd8f99203 with some optimizations (2x faster). Fixes #10408 --- youtube_dl/extractor/openload.py | 65 ++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index d3d4101de..7f19b1ba5 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals, division +import re + from .common import InfoExtractor from ..compat import ( compat_chr, @@ -10,6 +12,10 @@ from ..utils import ( determine_ext, ExtractorError, ) +from ..jsinterp import ( + JSInterpreter, + _NAME_RE +) class OpenloadIE(InfoExtractor): @@ -56,6 +62,44 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] + def openload_decode(self, txt): + symbol_dict = { + '(゚Д゚) [゚Θ゚]': '_', + '(゚Д゚) [゚ω゚ノ]': 'a', + '(゚Д゚) [゚Θ゚ノ]': 'b', + '(゚Д゚) [\'c\']': 'c', + '(゚Д゚) [゚ー゚ノ]': 'd', + '(゚Д゚) [゚Д゚ノ]': 'e', + '(゚Д゚) [1]': 'f', + '(゚Д゚) [\'o\']': 'o', + '(o゚ー゚o)': 'u', + '(゚Д゚) [\'c\']': 'c', + '((゚ー゚) + (o^_^o))': '7', + '((o^_^o) +(o^_^o) +(c^_^o))': '6', + '((゚ー゚) + (゚Θ゚))': '5', + '(-~3)': '4', + '(-~-~1)': '3', + '(-~1)': '2', + '(-~0)': '1', + '((c^_^o)-(c^_^o))': '0', + } + delim = '(゚Д゚)[゚ε゚]+' + end_token = '(゚Д゚)[゚o゚]' + symbols = '|'.join(map(re.escape, symbol_dict.keys())) + txt = re.sub('(%s)\+\s?' % symbols, lambda m: symbol_dict[m.group(1)], txt) + ret = '' + for aacode in re.findall(r'{0}\+\s?{1}(.*?){0}'.format(re.escape(end_token), re.escape(delim)), txt): + for aachar in aacode.split(delim): + if aachar.isdigit(): + ret += compat_chr(int(aachar, 8)) + else: + m = re.match(r'^u([\da-f]{4})$', aachar) + if m: + ret += compat_chr(int(m.group(1), 16)) + else: + self.report_warning("Cannot decode: %s" % aachar) + return ret + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) @@ -70,19 +114,26 @@ class OpenloadIE(InfoExtractor): r']*>([^<]+)\s*]*>[^<]+\s*]+id="streamurl"', webpage, 'encrypted data') - magic = compat_ord(enc_data[-1]) + enc_code = self._html_search_regex(r']+>(゚ω゚[^<]+)', + webpage, 'encrypted code') + + js_code = self.openload_decode(enc_code) + jsi = JSInterpreter(js_code) + + m_offset_fun = self._search_regex(r'slice\(0\s*-\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript offset function') + m_diff_fun = self._search_regex(r'charCodeAt\(0\)\s*\+\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript diff function') + + offset = jsi.call_function(m_offset_fun) + diff = jsi.call_function(m_diff_fun) + video_url_chars = [] for idx, c in enumerate(enc_data): j = compat_ord(c) - if j == magic: - j -= 1 - elif j == magic - 1: - j += 1 if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 - if idx == len(enc_data) - 1: - j += 3 + if idx == len(enc_data) - offset: + j += diff video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) From 8b1aeadc33cdb1eef8079e67d522d8a39676bb53 Mon Sep 17 00:00:00 2001 From: cpm Date: Fri, 11 Nov 2016 15:48:19 -0500 Subject: [PATCH 04/80] [plays] Fix extraction --- youtube_dl/extractor/plays.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/plays.py b/youtube_dl/extractor/plays.py index c3c38cf4a..5ac0292fa 100644 --- a/youtube_dl/extractor/plays.py +++ b/youtube_dl/extractor/plays.py @@ -10,12 +10,12 @@ from ..utils import int_or_none class PlaysTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P[0-9a-f]{18})' _TEST = { - 'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', + 'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', 'md5': 'dfeac1198506652b5257a62762cec7bc', 'info_dict': { 'id': '56af17f56c95335490', 'ext': 'mp4', - 'title': 'When you outplay the Azir wall', + 'title': 'Bjergsen - When you outplay the Azir wall', 'description': 'Posted by Bjergsen', } } @@ -24,14 +24,11 @@ class PlaysTVIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) - content = self._parse_json( - self._search_regex( - r'R\.bindContent\(({.+?})\);', webpage, - 'content'), video_id)['content'] + content = self._search_json_ld(webpage, video_id) + title = content['title'] mpd_url, sources = re.search( r'(?s)]+data-mpd="([^"]+)"[^>]*>(.+?)', - content).groups() + webpage).groups() formats = self._extract_mpd_formats( self._proto_relative_url(mpd_url), video_id, mpd_id='DASH') for format_id, height, format_url in re.findall(r' Date: Sat, 12 Nov 2016 23:01:05 +0700 Subject: [PATCH 05/80] [extractor/common] Improve thumbnail extraction from JSON-LD --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5f4c984a9..bc5d6a4c3 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -886,7 +886,7 @@ class InfoExtractor(object): 'url': e.get('contentUrl'), 'title': unescapeHTML(e.get('name')), 'description': unescapeHTML(e.get('description')), - 'thumbnail': e.get('thumbnailUrl'), + 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), 'filesize': float_or_none(e.get('contentSize')), From 3d2729514f432ac4d80b8dffbacb893b603f6d68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Nov 2016 23:08:05 +0700 Subject: [PATCH 06/80] [plays] Improve extraction and add support for embed URLs --- youtube_dl/extractor/plays.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/plays.py b/youtube_dl/extractor/plays.py index 5ac0292fa..ddfc6f148 100644 --- a/youtube_dl/extractor/plays.py +++ b/youtube_dl/extractor/plays.py @@ -8,8 +8,8 @@ from ..utils import int_or_none class PlaysTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P[0-9a-f]{18})' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P[0-9a-f]{18})' + _TESTS = [{ 'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', 'md5': 'dfeac1198506652b5257a62762cec7bc', 'info_dict': { @@ -18,14 +18,18 @@ class PlaysTVIE(InfoExtractor): 'title': 'Bjergsen - When you outplay the Azir wall', 'description': 'Posted by Bjergsen', } - } + }, { + 'url': 'https://plays.tv/embeds/56af17f56c95335490', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'https://plays.tv/video/%s' % video_id, video_id) + + info = self._search_json_ld(webpage, video_id,) - content = self._search_json_ld(webpage, video_id) - title = content['title'] mpd_url, sources = re.search( r'(?s)]+data-mpd="([^"]+)"[^>]*>(.+?)', webpage).groups() @@ -39,10 +43,11 @@ class PlaysTVIE(InfoExtractor): }) self._sort_formats(formats) - return { + info.update({ 'id': video_id, - 'title': title, 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), + 'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage), 'formats': formats, - } + }) + + return info From 577281b0c6b2fc774e03abf3465d47d909ba31e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= Date: Sun, 16 Oct 2016 03:04:17 +0200 Subject: [PATCH 07/80] [cda] Fix and improve extraction Fixes #10929 --- youtube_dl/extractor/cda.py | 65 ++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index 8af318703..e00bdaf66 100755 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -5,14 +5,16 @@ import re from .common import InfoExtractor from ..utils import ( - decode_packed_codes, ExtractorError, - parse_duration + float_or_none, + int_or_none, + parse_duration, ) class CDAIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P[0-9a-z]+)' + _BASE_URL = 'http://www.cda.pl/' _TESTS = [{ 'url': 'http://www.cda.pl/video/5749950c', 'md5': '6f844bf51b15f31fae165365707ae970', @@ -21,6 +23,9 @@ class CDAIE(InfoExtractor): 'ext': 'mp4', 'height': 720, 'title': 'Oto dlaczego przed zakrętem należy zwolnić.', + 'description': 'md5:269ccd135d550da90d1662651fcb9772', + 'thumbnail': 're:^https?://.*\.jpg$', + 'average_rating': float, 'duration': 39 } }, { @@ -30,6 +35,11 @@ class CDAIE(InfoExtractor): 'id': '57413289', 'ext': 'mp4', 'title': 'Lądowanie na lotnisku na Maderze', + 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'crash404', + 'view_count': int, + 'average_rating': float, 'duration': 137 } }, { @@ -39,31 +49,55 @@ class CDAIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id) + self._set_cookie('cda.pl', 'cda.player', 'html5') + webpage = self._download_webpage( + self._BASE_URL + '/video/' + video_id, video_id) if 'Ten film jest dostępny dla użytkowników premium' in webpage: raise ExtractorError('This video is only available for premium users.', expected=True) - title = self._html_search_regex(r'(.+?)', webpage, 'title') - formats = [] + uploader = self._search_regex(r'''(?x) + <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*> + (?:<\1[^>]*>[^<]*|(?!)(?:.|\n))*? + <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P[^<]+) + ''', webpage, 'uploader', default=None, group='uploader') + view_count = self._search_regex( + r'Odsłony:(?:\s| )*([0-9]+)', webpage, + 'view_count', default=None) + average_rating = self._search_regex( + r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P[0-9.]+)', + webpage, 'rating', fatal=False, group='rating_value') + info_dict = { 'id': video_id, - 'title': title, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'uploader': uploader, + 'view_count': int_or_none(view_count), + 'average_rating': float_or_none(average_rating), + 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'duration': None, } def extract_format(page, version): - unpacked = decode_packed_codes(page) - format_url = self._search_regex( - r"(?:file|url)\s*:\s*(\\?[\"'])(?Phttp.+?)\1", unpacked, - '%s url' % version, fatal=False, group='url') - if not format_url: + json_str = self._search_regex( + r'player_data=(\\?["\'])(?P.+?)\1', page, + '%s player_json' % version, fatal=False, group='player_data') + if not json_str: + return + player_data = self._parse_json( + json_str, '%s player_data' % version, fatal=False) + if not player_data: + return + video = player_data.get('video') + if not video or 'file' not in video: + self.report_warning('Unable to extract %s version information' % version) return f = { - 'url': format_url, + 'url': video['file'], } m = re.search( r']+data-quality="(?P[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P[0-9]+)p', @@ -75,9 +109,7 @@ class CDAIE(InfoExtractor): }) info_dict['formats'].append(f) if not info_dict['duration']: - info_dict['duration'] = parse_duration(self._search_regex( - r"duration\s*:\s*(\\?[\"'])(?P.+?)\1", - unpacked, 'duration', fatal=False, group='duration')) + info_dict['duration'] = parse_duration(video.get('duration')) extract_format(webpage, 'default') @@ -85,7 +117,8 @@ class CDAIE(InfoExtractor): r']+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', webpage): webpage = self._download_webpage( - href, video_id, 'Downloading %s version information' % resolution, fatal=False) + self._BASE_URL + href, video_id, + 'Downloading %s version information' % resolution, fatal=False) if not webpage: # Manually report warning because empty page is returned when # invalid version is requested. From 4ea4c0bb2248b4de7efc152a4ee91d104ac79bc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Nov 2016 05:43:34 +0700 Subject: [PATCH 08/80] [extractor/common] Fix Bandwidth substitution in media template (closes #11175) --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index bc5d6a4c3..05c51fac9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1703,7 +1703,7 @@ class InfoExtractor(object): representation_ms_info['fragments'] = [{ 'url': media_template % { 'Number': segment_number, - 'Bandwidth': representation_attrib.get('bandwidth'), + 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')), }, 'duration': segment_duration, } for segment_number in range( @@ -1721,7 +1721,7 @@ class InfoExtractor(object): def add_segment_url(): segment_url = media_template % { 'Time': segment_time, - 'Bandwidth': representation_attrib.get('bandwidth'), + 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')), 'Number': segment_number, } representation_ms_info['fragments'].append({ From e58609b22ccf51859376ec20d657cba5365631d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Nov 2016 06:02:26 +0700 Subject: [PATCH 09/80] [afreecatv] Add support for vod.afreecatv.com (closes #11174) --- youtube_dl/extractor/afreecatv.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 518c61f67..75b366993 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -11,6 +11,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + update_url_query, xpath_element, xpath_text, ) @@ -18,12 +19,18 @@ from ..utils import ( class AfreecaTVIE(InfoExtractor): IE_DESC = 'afreecatv.com' - _VALID_URL = r'''(?x)^ - https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)? - (?: - /app/(?:index|read_ucc_bbs)\.cgi| - /player/[Pp]layer\.(?:swf|html)) - \?.*?\bnTitleNo=(?P\d+)''' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)? + (?: + /app/(?:index|read_ucc_bbs)\.cgi| + /player/[Pp]layer\.(?:swf|html) + )\?.*?\bnTitleNo=| + vod\.afreecatv\.com/PLAYER/STATION/ + ) + (?P\d+) + ''' _TESTS = [{ 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', @@ -66,6 +73,9 @@ class AfreecaTVIE(InfoExtractor): }, { 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', 'only_matching': True, + }, { + 'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030', + 'only_matching': True, }] @staticmethod @@ -83,7 +93,9 @@ class AfreecaTVIE(InfoExtractor): info_url = compat_urlparse.urlunparse(parsed_url._replace( netloc='afbbs.afreecatv.com:8080', path='/api/video/get_video_info.php')) - video_xml = self._download_xml(info_url, video_id) + + video_xml = self._download_xml( + update_url_query(info_url, {'nTitleNo': video_id}), video_id) if xpath_element(video_xml, './track/video/file') is None: raise ExtractorError('Specified AfreecaTV video does not exist', From 754e6c8322705cd9953c5f1032ed9dae35c38b27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Nov 2016 20:54:34 +0700 Subject: [PATCH 10/80] [nrk] Workaround geo restriction and improve error messages --- youtube_dl/extractor/nrk.py | 39 +++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 3700b7ab2..aed98141b 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import random import re from .common import InfoExtractor @@ -14,6 +15,24 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): + _faked_ip = None + + def _download_webpage(self, *args, **kwargs): + # NRK checks X-Forwarded-For HTTP header in order to figure out the + # origin of the client behind proxy. This allows to bypass geo + # restriction by faking this header's value to some Norway IP. + # We will do so once we encounter any geo restriction error. + if self._faked_ip: + kwargs.setdefault('headers', {})['X-Forwarded-For'] = self._faked_ip + return super(NRKBaseIE, self)._download_webpage(*args, **kwargs) + + def _fake_ip(self): + # Use fake IP from 37.191.128.0/17 in order to workaround geo + # restriction + def octet(lb=0, ub=255): + return random.randint(lb, ub) + self._faked_ip = '37.191.%d.%d' % (octet(128), octet()) + def _real_extract(self, url): video_id = self._match_id(url) @@ -70,10 +89,22 @@ class NRKBaseIE(InfoExtractor): }] if not entries: - if data.get('usageRights', {}).get('isGeoBlocked'): - raise ExtractorError( - 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', - expected=True) + message_type = data.get('messageType') + if message_type == 'ProgramIsGeoBlocked' and not self._faked_ip: + self.report_warning( + 'Video is geo restricted, trying to fake IP') + self._fake_ip() + return self._real_extract(url) + + MESSAGES = { + 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet', + 'ProgramRightsHasExpired': 'Programmet har gått ut', + 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', + } + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, MESSAGES.get( + message_type, message_type)), + expected=True) conviva = data.get('convivaStatistics') or {} series = conviva.get('seriesName') or data.get('seriesTitle') From 690355551c084a942db9820b4c83b65f73fb2d4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Nov 2016 22:06:16 +0700 Subject: [PATCH 11/80] [downoader/fragment,f4m,hls] Add internal support for custom HTTP headers --- youtube_dl/downloader/f4m.py | 8 ++++++-- youtube_dl/downloader/fragment.py | 5 +++++ youtube_dl/downloader/hls.py | 8 ++++++-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 80c21d40b..688e086eb 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -314,7 +314,8 @@ class F4mFD(FragmentFD): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) - urlh = self.ydl.urlopen(man_url) + + urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.geturl() # Some manifests may be malformed, e.g. prosiebensat1 generated manifests # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244 @@ -387,7 +388,10 @@ class F4mFD(FragmentFD): url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) frag_filename = '%s-%s' % (ctx['tmpfilename'], name) try: - success = ctx['dl'].download(frag_filename, {'url': url_parsed.geturl()}) + success = ctx['dl'].download(frag_filename, { + 'url': url_parsed.geturl(), + 'http_headers': info_dict.get('http_headers'), + }) if not success: return False (down, frag_sanitized) = sanitize_open(frag_filename, 'rb') diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 84aacf7db..60df627a6 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -9,6 +9,7 @@ from ..utils import ( error_to_compat_str, encodeFilename, sanitize_open, + sanitized_Request, ) @@ -37,6 +38,10 @@ class FragmentFD(FileDownloader): def report_skip_fragment(self, fragment_name): self.to_screen('[download] Skipping fragment %s...' % fragment_name) + def _prepare_url(self, info_dict, url): + headers = info_dict.get('http_headers') + return sanitized_Request(url, None, headers) if headers else url + def _prepare_and_start_frag_download(self, ctx): self._prepare_frag_download(ctx) self._start_frag_download(ctx) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 541b92ee1..7373ec05f 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -59,7 +59,8 @@ class HlsFD(FragmentFD): def real_download(self, filename, info_dict): man_url = info_dict['url'] self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) - manifest = self.ydl.urlopen(man_url).read() + + manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read() s = manifest.decode('utf-8', 'ignore') @@ -112,7 +113,10 @@ class HlsFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success = ctx['dl'].download(frag_filename, {'url': frag_url}) + success = ctx['dl'].download(frag_filename, { + 'url': frag_url, + 'http_headers': info_dict.get('http_headers'), + }) if not success: return False down, frag_sanitized = sanitize_open(frag_filename, 'rb') From 7e08e2cab02b0284e72171b3ba6b946b49f12331 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Nov 2016 22:28:29 +0700 Subject: [PATCH 12/80] [nrk] Add X-Forwarded-For HTTP header in info dict --- youtube_dl/extractor/nrk.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index aed98141b..1f2204833 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -17,14 +17,15 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): _faked_ip = None - def _download_webpage(self, *args, **kwargs): + def _download_webpage_handle(self, *args, **kwargs): # NRK checks X-Forwarded-For HTTP header in order to figure out the # origin of the client behind proxy. This allows to bypass geo # restriction by faking this header's value to some Norway IP. # We will do so once we encounter any geo restriction error. if self._faked_ip: - kwargs.setdefault('headers', {})['X-Forwarded-For'] = self._faked_ip - return super(NRKBaseIE, self)._download_webpage(*args, **kwargs) + # NB: str is intentional + kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip + return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs) def _fake_ip(self): # Use fake IP from 37.191.128.0/17 in order to workaround geo @@ -43,6 +44,8 @@ class NRKBaseIE(InfoExtractor): title = data.get('fullTitle') or data.get('mainTitle') or data['title'] video_id = data.get('id') or video_id + http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {} + entries = [] media_assets = data.get('mediaAssets') @@ -73,6 +76,7 @@ class NRKBaseIE(InfoExtractor): 'duration': duration, 'subtitles': subtitles, 'formats': formats, + 'http_headers': http_headers, }) if not entries: From 50913b82414488bbf625a00f1844ca84dec094dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Nov 2016 22:29:36 +0700 Subject: [PATCH 13/80] [nrk] Improve geo restriction detection --- youtube_dl/extractor/nrk.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 1f2204833..c89aac63e 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -93,8 +93,9 @@ class NRKBaseIE(InfoExtractor): }] if not entries: - message_type = data.get('messageType') - if message_type == 'ProgramIsGeoBlocked' and not self._faked_ip: + message_type = data.get('messageType', '') + # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* + if 'IsGeoBlocked' in message_type and not self._faked_ip: self.report_warning( 'Video is geo restricted, trying to fake IP') self._fake_ip() From dbffd00ba940164df6f144577902d2cd8cf27e71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Nov 2016 02:37:21 +0700 Subject: [PATCH 14/80] [ChangeLog] Actualize --- ChangeLog | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ChangeLog b/ChangeLog index d97156e20..1b3962d2c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,18 @@ version +Core ++ [downoader/fragment,f4m,hls] Respect HTTP headers from info dict +* [extractor/common] Fix media templates with Bandwidth substitution pattern in + MPD manifests (#11175) +* [extractor/common] Improve thumbnail extraction from JSON-LD + Extractors ++ [nrk] Workaround geo restriction ++ [nrk] Improve error detection and messages ++ [afreecatv] Add support for vod.afreecatv.com (#11174) +* [cda] Fix and improve extraction (#10929, #10936) +* [plays] Fix extraction (#11165) +* [eagleplatform] Fix extraction (#11160) + [audioboom] Recognize /posts/ URLs (#11149) From b3d4bd05f9c3fff52efe669d7930b1647d6e2612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Nov 2016 02:39:50 +0700 Subject: [PATCH 15/80] release 2016.11.14 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index bfae97ddd..f96b5672d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.08.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.08.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.14** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.08.1 +[debug] youtube-dl version 2016.11.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 1b3962d2c..791ffb7b6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.14 Core + [downoader/fragment,f4m,hls] Respect HTTP headers from info dict diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 69df88c6e..1f2b246e4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.08.1' +__version__ = '2016.11.14' From 9f60134a9dd06f89961c34da1e6611d599ea6102 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Nov 2016 02:46:12 +0700 Subject: [PATCH 16/80] [ChangeLog] Actualize --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 791ffb7b6..1b3962d2c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version 2016.11.14 +version Core + [downoader/fragment,f4m,hls] Respect HTTP headers from info dict From 6b4dfa28197af9939ffa1cff90124300c46742f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Nov 2016 02:48:15 +0700 Subject: [PATCH 17/80] release 2016.11.14.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f96b5672d..fef9fc7a2 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.14.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.14.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.14 +[debug] youtube-dl version 2016.11.14.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 1b3962d2c..577709c44 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.14.1 Core + [downoader/fragment,f4m,hls] Respect HTTP headers from info dict diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1f2b246e4..9557b2000 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.14' +__version__ = '2016.11.14.1' From 49b69ad91c4a638eb8b9bdb9846819fa9274a38e Mon Sep 17 00:00:00 2001 From: ping Date: Tue, 15 Nov 2016 23:07:17 +0800 Subject: [PATCH 18/80] [vlive] Prefer locale over language for subtitles id --- youtube_dl/extractor/vlive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 8d671cca7..c3aa57cd6 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -116,7 +116,7 @@ class VLiveIE(InfoExtractor): subtitles = {} for caption in playinfo.get('captions', {}).get('list', []): - lang = dict_get(caption, ('language', 'locale', 'country', 'label')) + lang = dict_get(caption, ('locale', 'language', 'country', 'label')) if lang and caption.get('source'): subtitles[lang] = [{ 'ext': 'vtt', From 58355a3bf163349831b076c0fac4c09a286d5aa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 15 Nov 2016 22:11:47 +0700 Subject: [PATCH 19/80] [vlive] Add test for #11203 --- youtube_dl/extractor/vlive.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index c3aa57cd6..acf9fda48 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -17,7 +17,7 @@ from ..compat import compat_urllib_parse_urlencode class VLiveIE(InfoExtractor): IE_NAME = 'vlive' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.vlive.tv/video/1326', 'md5': 'cc7314812855ce56de70a06a27314983', 'info_dict': { @@ -27,7 +27,20 @@ class VLiveIE(InfoExtractor): 'creator': "Girl's Day", 'view_count': int, }, - } + }, { + 'url': 'http://www.vlive.tv/video/16937', + 'info_dict': { + 'id': '16937', + 'ext': 'mp4', + 'title': '[V LIVE] 첸백시 걍방', + 'creator': 'EXO', + 'view_count': int, + 'subtitles': 'mincount:12', + }, + 'params': { + 'skip_download': True, + }, + }] def _real_extract(self, url): video_id = self._match_id(url) From d7553968047d1fe8221905ea6ce7c006881ec2e3 Mon Sep 17 00:00:00 2001 From: MAA Date: Wed, 16 Nov 2016 09:00:30 +0300 Subject: [PATCH 20/80] Strip only args urls --- youtube_dl/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 643393558..ae8b83694 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -95,8 +95,7 @@ def _real_main(argv=None): write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except IOError: sys.exit('ERROR: batch file could not be read') - all_urls = batch_urls + args - all_urls = [url.strip() for url in all_urls] + all_urls = batch_urls + [url.strip() for url in args] _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] From d76767c90ec8d0edfabfaf51b7ab28182196d9dd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 16 Nov 2016 20:47:15 +0800 Subject: [PATCH 21/80] [ChangeLog] Update after #11122 landed --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 577709c44..9e9f5cfc9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [openload] Fix extraction (#10408, #11122) + + version 2016.11.14.1 Core From b2fc1c4fb965c08067cf42e5a7aaab45df8c2d5f Mon Sep 17 00:00:00 2001 From: FooBarQuaxx Date: Wed, 16 Nov 2016 18:18:54 +0300 Subject: [PATCH 22/80] Add explanatory comment --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ae8b83694..af99cf1c0 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -95,7 +95,7 @@ def _real_main(argv=None): write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except IOError: sys.exit('ERROR: batch file could not be read') - all_urls = batch_urls + [url.strip() for url in args] + all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] From 073d5bf583e8ff8ae71efca5fc6fae0743ac8961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 16 Nov 2016 23:14:06 +0700 Subject: [PATCH 23/80] [youtube:live] Relax _VALID_URL (closes #11164) --- youtube_dl/extractor/youtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 545246bcd..7ccb875a5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2175,7 +2175,7 @@ class YoutubeUserIE(YoutubeChannelIE): class YoutubeLiveIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com live streams' - _VALID_URL = r'(?Phttps?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P[^/]+))/live' + _VALID_URL = r'(?Phttps?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P[^/]+))/live' IE_NAME = 'youtube:live' _TESTS = [{ @@ -2204,6 +2204,9 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor): }, { 'url': 'https://www.youtube.com/c/CommanderVideoHq/live', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/TheYoungTurks/live', + 'only_matching': True, }] def _real_extract(self, url): From 582be3584761030bfbee13b0c6ea9e6ce2c8a790 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 17 Nov 2016 19:42:56 +0800 Subject: [PATCH 24/80] Update coding style after pycodestyle 2.1.0 In pycodestyle 2.1.0, E305 was introduced, which requires two blank lines after top level declarations, too. See https://github.com/PyCQA/pycodestyle/issues/400 See also #10689; thanks @stepshal for first mentioning this issue and initial patches --- devscripts/bash-completion.py | 1 + devscripts/fish-completion.py | 1 + devscripts/generate_aes_testdata.py | 1 + devscripts/gh-pages/update-sites.py | 1 + devscripts/make_contributing.py | 1 + devscripts/make_lazy_extractors.py | 1 + devscripts/make_supportedsites.py | 1 + devscripts/prepare_manpage.py | 1 + devscripts/zsh-completion.py | 1 + test/test_InfoExtractor.py | 1 + test/test_aes.py | 1 + test/test_download.py | 2 ++ test/test_execution.py | 1 + test/test_http.py | 1 + test/test_iqiyi_sdk_interpreter.py | 1 + test/test_jsinterp.py | 1 + test/test_utils.py | 1 + test/test_verbose_output.py | 1 + test/test_write_annotations.py | 2 ++ test/test_youtube_lists.py | 1 + test/test_youtube_signature.py | 1 + youtube_dl/__init__.py | 1 + youtube_dl/aes.py | 2 ++ youtube_dl/compat.py | 2 ++ youtube_dl/downloader/external.py | 1 + youtube_dl/socks.py | 1 + youtube_dl/swfinterp.py | 3 +++ 27 files changed, 33 insertions(+) diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py index ce68f26f9..3d1391334 100755 --- a/devscripts/bash-completion.py +++ b/devscripts/bash-completion.py @@ -25,5 +25,6 @@ def build_completion(opt_parser): filled_template = template.replace("{{flags}}", " ".join(opts_flag)) f.write(filled_template) + parser = youtube_dl.parseOpts()[0] build_completion(parser) diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py index 41629d87d..51d19dd33 100755 --- a/devscripts/fish-completion.py +++ b/devscripts/fish-completion.py @@ -44,5 +44,6 @@ def build_completion(opt_parser): with open(FISH_COMPLETION_FILE, 'w') as f: f.write(filled_template) + parser = youtube_dl.parseOpts()[0] build_completion(parser) diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py index 2e389fc8e..e3df42cc2 100644 --- a/devscripts/generate_aes_testdata.py +++ b/devscripts/generate_aes_testdata.py @@ -23,6 +23,7 @@ def openssl_encode(algo, key, iv): out, _ = prog.communicate(secret_msg) return out + iv = key = [0x20, 0x15] + 14 * [0] r = openssl_encode('aes-128-cbc', key, iv) diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py index 503c1372f..531c93c70 100755 --- a/devscripts/gh-pages/update-sites.py +++ b/devscripts/gh-pages/update-sites.py @@ -32,5 +32,6 @@ def main(): with open('supportedsites.html', 'w', encoding='utf-8') as sitesf: sitesf.write(template) + if __name__ == '__main__': main() diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py index 5e454a429..226d1a5d6 100755 --- a/devscripts/make_contributing.py +++ b/devscripts/make_contributing.py @@ -28,5 +28,6 @@ def main(): with io.open(outfile, 'w', encoding='utf-8') as outf: outf.write(out) + if __name__ == '__main__': main() diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 9a79c2bc5..19114d30d 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -59,6 +59,7 @@ def build_lazy_ie(ie, name): s += make_valid_template.format(valid_url=ie._make_valid_url()) return s + # find the correct sorting and add the required base classes so that sublcasses # can be correctly created classes = _ALL_CLASSES[:-1] diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 8cb4a4638..764795bc5 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -41,5 +41,6 @@ def main(): with io.open(outfile, 'w', encoding='utf-8') as outf: outf.write(out) + if __name__ == '__main__': main() diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index ce548739f..f9fe63f1f 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -74,5 +74,6 @@ def filter_options(readme): return ret + if __name__ == '__main__': main() diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py index 04728e8e2..60aaf76cc 100755 --- a/devscripts/zsh-completion.py +++ b/devscripts/zsh-completion.py @@ -44,5 +44,6 @@ def build_completion(opt_parser): with open(ZSH_COMPLETION_FILE, "w") as f: f.write(template) + parser = youtube_dl.parseOpts()[0] build_completion(parser) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index a98305c74..437c7270e 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -84,5 +84,6 @@ class TestInfoExtractor(unittest.TestCase): self.assertRaises(ExtractorError, self.ie._download_json, uri, None) self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) + if __name__ == '__main__': unittest.main() diff --git a/test/test_aes.py b/test/test_aes.py index 315a3f5ae..54078a66d 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -51,5 +51,6 @@ class TestAES(unittest.TestCase): decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) + if __name__ == '__main__': unittest.main() diff --git a/test/test_download.py b/test/test_download.py index a3f1c0644..463952989 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -60,6 +60,7 @@ def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() + defs = gettestcases() @@ -217,6 +218,7 @@ def generator(test_case): return test_template + # And add them to TestDownload for n, test_case in enumerate(defs): test_method = generator(test_case) diff --git a/test/test_execution.py b/test/test_execution.py index 620db080e..11661bb68 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -39,5 +39,6 @@ class TestExecution(unittest.TestCase): _, stderr = p.communicate() self.assertFalse(stderr) + if __name__ == '__main__': unittest.main() diff --git a/test/test_http.py b/test/test_http.py index bb0a098e4..7a7a3510f 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -169,5 +169,6 @@ class TestProxy(unittest.TestCase): # b'xn--fiq228c' is '中文'.encode('idna') self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') + if __name__ == '__main__': unittest.main() diff --git a/test/test_iqiyi_sdk_interpreter.py b/test/test_iqiyi_sdk_interpreter.py index 9d95cb606..789059dbe 100644 --- a/test/test_iqiyi_sdk_interpreter.py +++ b/test/test_iqiyi_sdk_interpreter.py @@ -43,5 +43,6 @@ class TestIqiyiSDKInterpreter(unittest.TestCase): ie._login() self.assertTrue('unable to log in:' in logger.messages[0]) + if __name__ == '__main__': unittest.main() diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index a9abae5f5..c24b8ca74 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -112,5 +112,6 @@ class TestJSInterpreter(unittest.TestCase): ''') self.assertEqual(jsi.call_function('z'), 5) + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index cb75ca53e..2e3cd0179 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1075,5 +1075,6 @@ The first line self.assertEqual(get_element_by_class('foo', html), 'nice') self.assertEqual(get_element_by_class('no-such-class', html), None) + if __name__ == '__main__': unittest.main() diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py index 96a66f7a0..c1465fe8c 100644 --- a/test/test_verbose_output.py +++ b/test/test_verbose_output.py @@ -66,5 +66,6 @@ class TestVerboseOutput(unittest.TestCase): self.assertTrue(b'-p' in serr) self.assertTrue(b'secret' not in serr) + if __name__ == '__main__': unittest.main() diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py index 8de08f2d6..41abdfe3b 100644 --- a/test/test_write_annotations.py +++ b/test/test_write_annotations.py @@ -24,6 +24,7 @@ class YoutubeDL(youtube_dl.YoutubeDL): super(YoutubeDL, self).__init__(*args, **kwargs) self.to_stderr = self.to_screen + params = get_params({ 'writeannotations': True, 'skip_download': True, @@ -74,5 +75,6 @@ class TestAnnotations(unittest.TestCase): def tearDown(self): try_rm(ANNOTATIONS_FILE) + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index af1c45421..7a33dbf88 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -66,5 +66,6 @@ class TestYoutubeLists(unittest.TestCase): for entry in result['entries']: self.assertTrue(entry.get('title')) + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 060864434..f0c370eee 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -114,6 +114,7 @@ def make_tfunc(url, stype, sig_input, expected_sig): test_func.__name__ = str('test_signature_' + stype + '_' + test_id) setattr(TestSignature, test_func.__name__, test_func) + for test_spec in _TESTS: make_tfunc(*test_spec) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index af99cf1c0..6850d95e1 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -449,4 +449,5 @@ def main(argv=None): except KeyboardInterrupt: sys.exit('\nERROR: Interrupted by user') + __all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index a01c367de..b8ff45481 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -174,6 +174,7 @@ def aes_decrypt_text(data, password, key_size_bytes): return plaintext + RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, @@ -328,4 +329,5 @@ def inc(data): break return data + __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index b8aaf5a46..83ee7e257 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2491,6 +2491,7 @@ class _TreeBuilder(etree.TreeBuilder): def doctype(self, name, pubid, system): pass + if sys.version_info[0] >= 3: def compat_etree_fromstring(text): return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) @@ -2787,6 +2788,7 @@ def workaround_optparse_bug9161(): return real_add_option(self, *bargs, **bkwargs) optparse.OptionGroup.add_option = _compat_add_option + if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3 compat_get_terminal_size = shutil.get_terminal_size else: diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 0aeae3b8f..5d3e5d8d3 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -293,6 +293,7 @@ class FFmpegFD(ExternalFD): class AVconvFD(FFmpegFD): pass + _BY_NAME = dict( (klass.get_basename(), klass) for name, klass in globals().items() diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index 104807242..63d19b3a5 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -103,6 +103,7 @@ class ProxyType(object): SOCKS4A = 1 SOCKS5 = 2 + Proxy = collections.namedtuple('Proxy', ( 'type', 'host', 'port', 'username', 'password', 'remote_dns')) diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 7cf490aa4..0c7158575 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -115,6 +115,8 @@ def _u30(reader): res = _read_int(reader) assert res & 0xf0000000 == 0 return res + + _u32 = _read_int @@ -176,6 +178,7 @@ class _Undefined(object): return 'undefined' __repr__ = __str__ + undefined = _Undefined() From 689f31fde52fc49c01cc8a8acd20464a3527501d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Nov 2016 00:17:46 +0700 Subject: [PATCH 25/80] [devscripts/create-github-release] Fill release body from ChangeLog (closes #11094) --- devscripts/create-github-release.py | 17 +++++++++++++---- devscripts/release.sh | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py index 3b8021e74..30716ad8e 100644 --- a/devscripts/create-github-release.py +++ b/devscripts/create-github-release.py @@ -2,11 +2,13 @@ from __future__ import unicode_literals import base64 +import io import json import mimetypes import netrc import optparse import os +import re import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -90,16 +92,23 @@ class GitHubReleaser(object): def main(): - parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH') + parser = optparse.OptionParser(usage='%prog CHANGELOG VERSION BUILDPATH') options, args = parser.parse_args() - if len(args) != 2: + if len(args) != 3: parser.error('Expected a version and a build directory') - version, build_path = args + changelog_file, version, build_path = args + + with io.open(changelog_file, encoding='utf-8') as inf: + changelog = inf.read() + + mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog) + body = mobj.group(1) if mobj else '' releaser = GitHubReleaser() - new_release = releaser.create_release(version, name='youtube-dl %s' % version) + new_release = releaser.create_release( + version, name='youtube-dl %s' % version, body=body) release_id = new_release['id'] for asset in os.listdir(build_path): diff --git a/devscripts/release.sh b/devscripts/release.sh index 1af61aa0b..4db5def5d 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -110,7 +110,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done ROOT=$(pwd) -python devscripts/create-github-release.py $version "$ROOT/build/$version" +python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version" ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" From 5f75c4a4ad5f406f9dc01bd872f3be54de87b23d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Nov 2016 00:19:55 +0700 Subject: [PATCH 26/80] [ChangeLog] Actualize --- ChangeLog | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ChangeLog b/ChangeLog index 9e9f5cfc9..99c9c1cf3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,9 @@ version Extractors +* [youtube:live] Relax _VALID_URL (#11164) * [openload] Fix extraction (#10408, #11122) +* [vlive] Prefer locale over language for subtitles id (#11203) version 2016.11.14.1 From b25459b88ae8faf072046eb9c23af0dcda7c1408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Nov 2016 00:25:24 +0700 Subject: [PATCH 27/80] release 2016.11.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index fef9fc7a2..85ac137a1 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.14.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.14.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.14.1 +[debug] youtube-dl version 2016.11.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 99c9c1cf3..874230f42 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.18 Extractors * [youtube:live] Relax _VALID_URL (#11164) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9557b2000..ef9ccc08a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.14.1' +__version__ = '2016.11.18' From c131fc3372c4fc69434dbb7c79935c1587beff20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Nov 2016 01:16:33 +0700 Subject: [PATCH 28/80] [tvanouvelles] Add extractor (closes #10616) --- youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/tvanouvelles.py | 65 ++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 youtube_dl/extractor/tvanouvelles.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 578359a5e..9107f0b96 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -965,6 +965,10 @@ from .tv2 import ( ) from .tv3 import TV3IE from .tv4 import TV4IE +from .tvanouvelles import ( + TVANouvellesIE, + TVANouvellesArticleIE, +) from .tvc import ( TVCIE, TVCArticleIE, diff --git a/youtube_dl/extractor/tvanouvelles.py b/youtube_dl/extractor/tvanouvelles.py new file mode 100644 index 000000000..1086176a2 --- /dev/null +++ b/youtube_dl/extractor/tvanouvelles.py @@ -0,0 +1,65 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .brightcove import BrightcoveNewIE + + +class TVANouvellesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/videos/(?P\d+)' + _TEST = { + 'url': 'http://www.tvanouvelles.ca/videos/5117035533001', + 'info_dict': { + 'id': '5117035533001', + 'ext': 'mp4', + 'title': 'L’industrie du taxi dénonce l’entente entre Québec et Uber: explications', + 'description': 'md5:479653b7c8cf115747bf5118066bd8b3', + 'uploader_id': '1741764581', + 'timestamp': 1473352030, + 'upload_date': '20160908', + }, + 'add_ie': ['BrightcoveNew'], + } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1741764581/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + brightcove_id = self._match_id(url) + return self.url_result( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + BrightcoveNewIE.ie_key(), brightcove_id) + + +class TVANouvellesArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/(?:[^/]+/)+(?P[^/?#&]+)' + _TEST = { + 'url': 'http://www.tvanouvelles.ca/2016/11/17/des-policiers-qui-ont-la-meche-un-peu-courte', + 'info_dict': { + 'id': 'des-policiers-qui-ont-la-meche-un-peu-courte', + 'title': 'Des policiers qui ont «la mèche un peu courte»?', + 'description': 'md5:92d363c8eb0f0f030de9a4a84a90a3a0', + }, + 'playlist_mincount': 4, + } + + @classmethod + def suitable(cls, url): + return False if TVANouvellesIE.suitable(url) else super(TVANouvellesArticleIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + entries = [ + self.url_result( + 'http://www.tvanouvelles.ca/videos/%s' % mobj.group('id'), + ie=TVANouvellesIE.ie_key(), video_id=mobj.group('id')) + for mobj in re.finditer( + r'data-video-id=(["\'])?(?P\d+)', webpage)] + + title = self._og_search_title(webpage, fatal=False) + description = self._og_search_description(webpage) + + return self.playlist_result(entries, display_id, title, description) From 748a462fbecc9c006d8e9ed6b3f596ff1893cf39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Nov 2016 01:49:13 +0700 Subject: [PATCH 29/80] [twitter:card] Relax _VALID_URL (closes #11225) --- youtube_dl/extractor/twitter.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 3411fcf7e..ac0b221b4 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -25,7 +25,7 @@ class TwitterBaseIE(InfoExtractor): class TwitterCardIE(TwitterBaseIE): IE_NAME = 'twitter:card' - _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos/tweet)/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P\d+)' _TESTS = [ { 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', @@ -84,6 +84,9 @@ class TwitterCardIE(TwitterBaseIE): 'title': 'Twitter web player', 'thumbnail': 're:^https?://.*\.jpg', }, + }, { + 'url': 'https://twitter.com/i/videos/752274308186120192', + 'only_matching': True, }, ] From df46b19cb82b90807693d0d25ac5d817546dd63b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Nov 2016 01:56:31 +0700 Subject: [PATCH 30/80] [toutv] Fix login form regex (closes #11223) --- youtube_dl/extractor/toutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 573f2ff6b..26d770992 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -56,7 +56,7 @@ class TouTvIE(InfoExtractor): 'state': state, }) login_form = self._search_regex( - r'(?s)(]+id="Form-login".+?)', login_webpage, 'login form') + r'(?s)(]+(?:id|name)="Form-login".+?)', login_webpage, 'login form') form_data = self._hidden_inputs(login_form) form_data.update({ 'login-email': email, From 08ec95a6dba54aeec398c99f422abb2a5b59a7e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Nov 2016 03:10:20 +0700 Subject: [PATCH 31/80] [ChangeLog] Actualize --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 874230f42..15129419c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,7 @@ version 2016.11.18 Extractors -* [youtube:live] Relax _VALID_URL (#11164) +* [youtube:live] Relax URL regular expression (#11164) * [openload] Fix extraction (#10408, #11122) * [vlive] Prefer locale over language for subtitles id (#11203) From 0aacd2deb1075e0d4d4b8b23b9a65b3967a1d658 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Nov 2016 04:18:21 +0700 Subject: [PATCH 32/80] [bandcamp] Fix free downloads extraction and extract all formats (closes #11067) --- youtube_dl/extractor/bandcamp.py | 86 +++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 249c3d956..88c590e98 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -1,7 +1,9 @@ from __future__ import unicode_literals import json +import random import re +import time from .common import InfoExtractor from ..compat import ( @@ -12,6 +14,9 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + parse_filesize, + unescapeHTML, + update_url_query, ) @@ -81,35 +86,68 @@ class BandcampIE(InfoExtractor): r'(?ms)var TralbumData = .*?[{,]\s*id: (?P\d+),?$', webpage, 'video id') - download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') - # We get the dictionary of the track from some javascript code - all_info = self._parse_json(self._search_regex( - r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id) - info = all_info[0] - # We pick mp3-320 for now, until format selection can be easily implemented. - mp3_info = info['downloads']['mp3-320'] - # If we try to use this url it says the link has expired - initial_url = mp3_info['url'] - m_url = re.match( - r'(?Phttp://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P.*?)&id=(?P.*?)&ts=(?P.*)$', - initial_url) - # We build the url we will use to get the final track url - # This url is build in Bandcamp in the script download_bunde_*.js - request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts')) - final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url') - # If we could correctly generate the .rand field the url would be - # in the "download_url" key - final_url = self._proto_relative_url(self._search_regex( - r'"retry_url":"(.+?)"', final_url_webpage, 'final video URL'), 'http:') + download_webpage = self._download_webpage( + download_link, video_id, 'Downloading free downloads page') + + blob = self._parse_json( + self._search_regex( + r'data-blob=(["\'])(?P{.+?})\1', download_webpage, + 'blob', group='blob'), + video_id, transform_source=unescapeHTML) + + info = blob['digital_items'][0] + + downloads = info['downloads'] + track = info['title'] + + artist = info.get('artist') + title = '%s - %s' % (artist, track) if artist else track + + download_formats = {} + for f in blob['download_formats']: + name, ext = f.get('name'), f.get('file_extension') + if all(isinstance(x, compat_str) for x in (name, ext)): + download_formats[name] = ext.strip('.') + + formats = [] + for format_id, f in downloads.items(): + format_url = f.get('url') + if not format_url: + continue + # Stat URL generation algorithm is reverse engineered from + # download_*_bundle_*.js + stat_url = update_url_query( + format_url.replace('/download/', '/statdownload/'), { + '.rand': int(time.time() * 1000 * random.random()), + }) + format_id = f.get('encoding_name') or format_id + stat = self._download_json( + stat_url, video_id, 'Downloading %s JSON' % format_id, + transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1], + fatal=False) + if not stat: + continue + retry_url = stat.get('retry_url') + if not isinstance(retry_url, compat_str): + continue + formats.append({ + 'url': self._proto_relative_url(retry_url, 'http:'), + 'ext': download_formats.get(format_id), + 'format_id': format_id, + 'format_note': f.get('description'), + 'filesize': parse_filesize(f.get('size_mb')), + 'vcodec': 'none', + }) + self._sort_formats(formats) return { 'id': video_id, - 'title': info['title'], - 'ext': 'mp3', - 'vcodec': 'none', - 'url': final_url, + 'title': title, 'thumbnail': info.get('thumb_url'), 'uploader': info.get('artist'), + 'artist': artist, + 'track': track, + 'formats': formats, } From 303b38fa84eee94a51961e5273b4dbe174266d26 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 20 Nov 2016 00:06:44 +0800 Subject: [PATCH 33/80] [ChangeLog] Update for #9028 --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 15129419c..0d8174408 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +- [Crunchyroll] ScaledBorderAndShadow are removed from ASS subtitles + (#8207, #9028) + version 2016.11.18 Extractors From 8f8f182d0baf04c31e95582aa7eedea940e0cdd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Nov 2016 02:13:21 +0700 Subject: [PATCH 34/80] [extractor/generic] Improve limelight embeds support --- youtube_dl/extractor/generic.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index bde65fa27..f9707c155 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2232,6 +2232,16 @@ class GenericIE(InfoExtractor): return self.url_result('limelight:%s:%s' % ( lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2)) + mobj = re.search( + r'''(?sx) + ]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*? + ]+ + name=(["\'])flashVars\2[^>]+ + value=(["\'])(?:(?!\3).)*mediaId=(?P[a-z0-9]{32}) + ''', webpage) + if mobj: + return self.url_result('limelight:media:%s' % mobj.group('id')) + # Look for AdobeTVVideo embeds mobj = re.search( r']+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', From dc6a9e41955219daa4a353846a2183f6b5c910e5 Mon Sep 17 00:00:00 2001 From: Joseph Frazier <1212jtraceur@gmail.com> Date: Sun, 20 Nov 2016 11:32:00 -0500 Subject: [PATCH 35/80] [README.md] Update link from generated CONTRIBUTING.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 98e374420..ea9131c3a 100644 --- a/README.md +++ b/README.md @@ -930,7 +930,7 @@ If you want to create a build of youtube-dl yourself, you'll need ### Adding support for a new site -If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. +If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`): From dbcc4a6b32acdb13a872fcead50e6a254960a55e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 21 Nov 2016 12:25:19 +0800 Subject: [PATCH 36/80] [CONTRIBUTING.md] Fix broken links (#11239) --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0b5a5c1f8..495955bb5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -92,7 +92,7 @@ If you want to create a build of youtube-dl yourself, you'll need ### Adding support for a new site -If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. +If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`): From 2574721a81cd52fa4fda65fb0a494064c09308c3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 21 Nov 2016 12:50:13 +0800 Subject: [PATCH 37/80] Clean and ignore more file types ape is another audio codec seen in kuwo. See https://en.wikipedia.org/wiki/Monkey's_Audio --- .gitignore | 2 ++ Makefile | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 354505d66..87754f90f 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,8 @@ updates_key.pem *.mp3 *.3gp *.wav +*.ape +*.mkv *.part *.swp test/testdata diff --git a/Makefile b/Makefile index b7cec1666..68bbf5e96 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From 4eece8ba572dfd009ea2d980bfc36d0adacb16d0 Mon Sep 17 00:00:00 2001 From: Andy Savicki Date: Wed, 16 Nov 2016 02:37:28 +0300 Subject: [PATCH 38/80] [funnyordie] Improve extraction --- youtube_dl/extractor/funnyordie.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 8c5ffc9e8..7664dd584 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -11,7 +11,7 @@ class FunnyOrDieIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?Pembed|articles|videos)/(?P[0-9a-f]+)(?:$|[?#/])' _TESTS = [{ 'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', - 'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9', + 'md5': 'c26b9ee0e1ca138c12071f59572ba9c7', 'info_dict': { 'id': '0732f586d7', 'ext': 'mp4', @@ -51,10 +51,7 @@ class FunnyOrDieIE(InfoExtractor): formats = [] - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - - bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)] + bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)] bitrates.sort() for bitrate in bitrates: @@ -65,6 +62,11 @@ class FunnyOrDieIE(InfoExtractor): 'vbr': bitrate, }) + self._check_formats(formats, video_id) + + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + subtitles = {} for src, src_lang in re.findall(r' Date: Mon, 21 Nov 2016 23:46:55 +0700 Subject: [PATCH 39/80] [funnyordie] Copy formats' metadata from hls and sort formats --- youtube_dl/extractor/funnyordie.py | 47 +++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 7664dd584..f2928b5fe 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -11,7 +11,7 @@ class FunnyOrDieIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?Pembed|articles|videos)/(?P[0-9a-f]+)(?:$|[?#/])' _TESTS = [{ 'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', - 'md5': 'c26b9ee0e1ca138c12071f59572ba9c7', + 'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9', 'info_dict': { 'id': '0732f586d7', 'ext': 'mp4', @@ -28,6 +28,9 @@ class FunnyOrDieIE(InfoExtractor): 'description': 'Please use this to sell something. www.jonlajoie.com', 'thumbnail': 're:^http:.*\.jpg$', }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man', 'only_matching': True, @@ -51,21 +54,45 @@ class FunnyOrDieIE(InfoExtractor): formats = [] + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False) + source_formats = list(filter( + lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + m3u8_formats)) + bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)] bitrates.sort() - for bitrate in bitrates: - for link in links: - formats.append({ - 'url': self._proto_relative_url('%s%d.%s' % (link[0], bitrate, link[1])), - 'format_id': '%s-%d' % (link[1], bitrate), - 'vbr': bitrate, - }) + if source_formats: + self._sort_formats(source_formats) + for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)): + for path, ext in links: + ff = f.copy() + if ff: + if ext != 'mp4': + ff = dict( + [(k, v) for k, v in ff.items() + if k in ('height', 'width', 'format_id')]) + ff.update({ + 'format_id': ff['format_id'].replace('hls', ext), + 'ext': ext, + 'protocol': 'http', + }) + else: + ff.update({ + 'format_id': '%s-%d' % (ext, bitrate), + 'vbr': bitrate, + }) + ff['url'] = self._proto_relative_url( + '%s%d.%s' % (path, bitrate, ext)) + formats.append(ff) self._check_formats(formats, video_id) - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + formats.extend(m3u8_formats) + self._sort_formats( + formats, field_preference=('height', 'width', 'tbr', 'format_id')) subtitles = {} for src, src_lang in re.findall(r' Date: Tue, 22 Nov 2016 20:40:57 +0800 Subject: [PATCH 40/80] [amcnetworks] Recognize more BBC America URLs Closes #11263 --- ChangeLog | 1 + youtube_dl/extractor/amcnetworks.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 0d8174408..9ed42315e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [amcnetworks] Recognize more BBC America URLs (#11263) - [Crunchyroll] ScaledBorderAndShadow are removed from ASS subtitles (#8207, #9028) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index d2b03b177..87c803e94 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -10,7 +10,7 @@ from ..utils import ( class AMCNetworksIE(ThePlatformIE): - _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P[^/?#]+)' _TESTS = [{ 'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', 'md5': '', @@ -41,6 +41,9 @@ class AMCNetworksIE(ThePlatformIE): }, { 'url': 'http://www.ifc.com/movies/chaos', 'only_matching': True, + }, { + 'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version', + 'only_matching': True, }] def _real_extract(self, url): From 868630fbe5843ea9da5fd6fa826516f0dcbed20e Mon Sep 17 00:00:00 2001 From: Andy Savicki Date: Sun, 20 Nov 2016 02:12:22 +0300 Subject: [PATCH 41/80] [hellporno] Add support for hellporno.net and improve ext extraction --- youtube_dl/extractor/hellporno.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/hellporno.py b/youtube_dl/extractor/hellporno.py index 7a1c75b65..10da14067 100644 --- a/youtube_dl/extractor/hellporno.py +++ b/youtube_dl/extractor/hellporno.py @@ -6,12 +6,13 @@ from .common import InfoExtractor from ..utils import ( js_to_json, remove_end, + determine_ext, ) class HellPornoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P[^/]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P[^/]+)' + _TESTS = [{ 'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/', 'md5': '1fee339c610d2049699ef2aa699439f1', 'info_dict': { @@ -22,7 +23,10 @@ class HellPornoIE(InfoExtractor): 'thumbnail': 're:https?://.*\.jpg$', 'age_limit': 18, } - } + }, { + 'url': 'http://hellporno.net/v/186271/', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) @@ -38,7 +42,7 @@ class HellPornoIE(InfoExtractor): video_id = flashvars.get('video_id') thumbnail = flashvars.get('preview_url') - ext = flashvars.get('postfix', '.mp4')[1:] + ext = determine_ext(flashvars.get('postfix'), 'mp4') formats = [] for video_url_key in ['video_url', 'video_alt_url']: From c8f56741dd531685e61f0f4418107318663f5ff3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Nov 2016 22:29:37 +0700 Subject: [PATCH 42/80] [ChangeLog] Actualize --- ChangeLog | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9ed42315e..4127fd24f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,16 @@ version Extractors +* [hellporno] Fix video extension extraction (#11247) ++ [hellporno] Add support for hellporno.net (#11247) + [amcnetworks] Recognize more BBC America URLs (#11263) -- [Crunchyroll] ScaledBorderAndShadow are removed from ASS subtitles - (#8207, #9028) +* [funnyordie] Improve extraction (#11208) +* [extractor/generic] Improve limelight embeds support +- [crunchyroll] Remove ScaledBorderAndShadow from ASS subtitles (#8207, #9028) +* [bandcamp] Fix free downloads extraction and extract all formats (#11067) +* [twitter:card] Relax URL regular expression (#11225) ++ [tvanouvelles] Add support for tvanouvelles.ca (#10616) + version 2016.11.18 From 3b5daf07362e401e84a5c32482dc3c9416bdd000 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Nov 2016 22:32:16 +0700 Subject: [PATCH 43/80] release 2016.11.22 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 85ac137a1..b7fa566c8 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.22** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.18 +[debug] youtube-dl version 2016.11.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 4127fd24f..2b35952fe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.22 Extractors * [hellporno] Fix video extension extraction (#11247) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 77832504a..7c485349d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -771,6 +771,8 @@ - **TV2Article** - **TV3** - **TV4**: tv4.se and tv4play.se + - **TVANouvelles** + - **TVANouvellesArticle** - **TVC** - **TVCArticle** - **tvigle**: Интернет-телевидение Tvigle.ru diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ef9ccc08a..3c746baac 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.18' +__version__ = '2016.11.22' From c867adc68c5dda0fafb2535c1a02ea32549b9d10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Nov 2016 23:28:32 +0700 Subject: [PATCH 44/80] [youtube:playlist] Pass disable_polymer in query (closes #11193, closes #11270) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7ccb875a5..bd24a2838 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1796,7 +1796,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): | ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,}) )""" - _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' + _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true' _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})&[^"]*?index=(?P\d+)(?:[^>]+>(?P[^<]+))?' IE_NAME = 'youtube:playlist' _TESTS = [{ From 44444f0d3ba8e448cc824d7722d865794fb6d5d3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 24 Nov 2016 20:32:17 +0800 Subject: [PATCH 45/80] [cbslocal] Support newyork.cbslocal.com Closes #11285 --- ChangeLog | 6 +++++ youtube_dl/extractor/cbslocal.py | 39 ++++++++++++++++++++++++++++---- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2b35952fe..7e784ed76 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [cbslocal] Recognize New York site (#11285) + + version 2016.11.22 Extractors diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 289709c97..8d5f11dd1 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -4,11 +4,14 @@ from __future__ import unicode_literals from .anvato import AnvatoIE from .sendtonews import SendtoNewsIE from ..compat import compat_urlparse -from ..utils import unified_timestamp +from ..utils import ( + parse_iso8601, + unified_timestamp, +) class CBSLocalIE(AnvatoIE): - _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)' + _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)' _TESTS = [{ # Anvato backend @@ -49,6 +52,31 @@ class CBSLocalIE(AnvatoIE): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/', + 'info_dict': { + 'id': '3580809', + 'ext': 'mp4', + 'title': 'A Very Blue Anniversary', + 'description': 'CBS2’s Cindy Hsu has more.', + 'thumbnail': 're:^https?://.*', + 'timestamp': 1479962220, + 'upload_date': '20161124', + 'uploader': 'CBS', + 'subtitles': { + 'en': 'mincount:5', + }, + 'categories': [ + 'Stations\\Spoken Word\\WCBSTV', + 'Syndication\\AOL', + 'Syndication\\MSN', + 'Syndication\\NDN', + 'Syndication\\Yahoo', + 'Content\\News', + 'Content\\News\\Local News', + ], + 'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'], + }, }] def _real_extract(self, url): @@ -64,8 +92,11 @@ class CBSLocalIE(AnvatoIE): info_dict = self._extract_anvato_videos(webpage, display_id) time_str = self._html_search_regex( - r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) - timestamp = unified_timestamp(time_str) + r'class="entry-date">([^<]+)<', webpage, 'released date', default=None) + if time_str: + timestamp = unified_timestamp(time_str) + else: + timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage)) info_dict.update({ 'display_id': display_id, From b68599ed473c24477cefb3f09580e7a8cbb666d9 Mon Sep 17 00:00:00 2001 From: zurfyx <zurfyx@gmail.com> Date: Sat, 19 Nov 2016 19:23:49 +0100 Subject: [PATCH 46/80] [mitele] Relax _VALID_URL --- youtube_dl/extractor/mitele.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index c41ab1e91..48d94992c 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -75,7 +75,7 @@ class MiTeleBaseIE(InfoExtractor): class MiTeleIE(InfoExtractor): IE_DESC = 'mitele.es' - _VALID_URL = r'https?://(?:www\.)?mitele\.es/programas-tv/(?:[^/]+/)(?P<id>[^/]+)/player' + _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player' _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player', @@ -109,6 +109,9 @@ class MiTeleIE(InfoExtractor): 'skip_download': True, }, 'add_ie': ['Ooyala'], + }, { + 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player', + 'only_matching': True, }] def _real_extract(self, url): From 8eb7b5c3f170d8791d37ae980cd5024eba1c83c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Nov 2016 22:43:02 +0700 Subject: [PATCH 47/80] [mitele] Modernize and extract more metadata --- youtube_dl/extractor/mitele.py | 90 +++++++++++++++++++++++++--------- 1 file changed, 66 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 48d94992c..f577836be 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -86,7 +86,10 @@ class MiTeleIE(InfoExtractor): 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', 'series': 'Diario de', 'season': 'La redacción', + 'season_number': 14, + 'season_id': 'diario_de_t14_11981', 'episode': 'Programa 144', + 'episode_number': 3, 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 2913, }, @@ -101,7 +104,10 @@ class MiTeleIE(InfoExtractor): 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f', 'series': 'Cuarto Milenio', 'season': 'Temporada 6', + 'season_number': 6, + 'season_id': 'cuarto_milenio_t06_12715', 'episode': 'Programa 226', + 'episode_number': 24, 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 7313, }, @@ -118,35 +124,68 @@ class MiTeleIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - gigya_url = self._search_regex(r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s*src="([^"]*)">[^>]*</script>', webpage, 'gigya', default=None) - gigya_sc = self._download_webpage(compat_urlparse.urljoin(r'http://www.mitele.es/', gigya_url), video_id, 'Downloading gigya script') + gigya_url = self._search_regex( + r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>', + webpage, 'gigya', default=None) + gigya_sc = self._download_webpage( + compat_urlparse.urljoin('http://www.mitele.es/', gigya_url), + video_id, 'Downloading gigya script') + # Get a appKey/uuid for getting the session key - appKey_var = self._search_regex(r'value\("appGridApplicationKey",([0-9a-f]+)\)', gigya_sc, 'appKey variable') - appKey = self._search_regex(r'var %s="([0-9a-f]+)"' % appKey_var, gigya_sc, 'appKey') - uid = compat_str(uuid.uuid4()) - session_url = 'https://appgrid-api.cloud.accedo.tv/session?appKey=%s&uuid=%s' % (appKey, uid) - session_json = self._download_json(session_url, video_id, 'Downloading session keys') - sessionKey = compat_str(session_json['sessionKey']) + appKey_var = self._search_regex( + r'value\s*\(\s*["\']appGridApplicationKey["\']\s*,\s*([0-9a-f]+)', + gigya_sc, 'appKey variable') + appKey = self._search_regex( + r'var\s+%s\s*=\s*["\']([0-9a-f]+)' % appKey_var, gigya_sc, 'appKey') + + session_json = self._download_json( + 'https://appgrid-api.cloud.accedo.tv/session', + video_id, 'Downloading session keys', query={ + 'appKey': appKey, + 'uuid': compat_str(uuid.uuid4()), + }) + + paths = self._download_json( + 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration', + video_id, 'Downloading paths JSON', + query={'sessionKey': compat_str(session_json['sessionKey'])}) - paths_url = 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration?sessionKey=' + sessionKey - paths = self._download_json(paths_url, video_id, 'Downloading paths JSON') ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search'] - data_p = ( - 'http://' + ooyala_s['base_url'] + ooyala_s['full_path'] + ooyala_s['provider_id'] + - '/docs/' + video_id + '?include_titles=Series,Season&product_name=test&format=full') - data = self._download_json(data_p, video_id, 'Downloading data JSON') - source = data['hits']['hits'][0]['_source'] - embedCode = source['offers'][0]['embed_codes'][0] + source = self._download_json( + 'http://%s%s%s/docs/%s' % ( + ooyala_s['base_url'], ooyala_s['full_path'], + ooyala_s['provider_id'], video_id), + video_id, 'Downloading data JSON', query={ + 'include_titles': 'Series,Season', + 'product_name': 'test', + 'format': 'full', + })['hits']['hits'][0]['_source'] + embedCode = source['offers'][0]['embed_codes'][0] titles = source['localizable_titles'][0] + title = titles.get('title_medium') or titles['title_long'] - episode = titles['title_sort_name'] - description = titles['summary_long'] - titles_series = source['localizable_titles_series'][0] - series = titles_series['title_long'] - titles_season = source['localizable_titles_season'][0] - season = titles_season['title_medium'] - duration = parse_duration(source['videos'][0]['duration']) + + description = titles.get('summary_long') or titles.get('summary_medium') + + def get(key1, key2): + value1 = source.get(key1) + if not value1 or not isinstance(value1, list): + return + if not isinstance(value1[0], dict): + return + return value1[0].get(key2) + + series = get('localizable_titles_series', 'title_medium') + + season = get('localizable_titles_season', 'title_medium') + season_number = int_or_none(source.get('season_number')) + season_id = source.get('season_id') + + episode = titles.get('title_sort_name') + episode_number = int_or_none(source.get('episode_number')) + + duration = parse_duration(get('videos', 'duration')) return { '_type': 'url_transparent', @@ -157,7 +196,10 @@ class MiTeleIE(InfoExtractor): 'description': description, 'series': series, 'season': season, + 'season_number': season_number, + 'season_id': season_id, 'episode': episode, + 'episode_number': episode_number, 'duration': duration, - 'thumbnail': source['images'][0]['url'], + 'thumbnail': get('images', 'url'), } From 8b27d83e4e07064898c5ec842e916c84cf7a1826 Mon Sep 17 00:00:00 2001 From: "Andrew J. Erickson" <andy@bolt.me> Date: Wed, 9 Nov 2016 14:54:17 -0800 Subject: [PATCH 48/80] vevo: fixing naming when there are featured artists --- youtube_dl/extractor/vevo.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 783efda7d..ce607945f 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -91,14 +91,30 @@ class VevoIE(VevoBaseIE): 'info_dict': { 'id': 'USUV71503000', 'ext': 'mp4', - 'title': 'K Camp - Till I Die', + 'title': 'K Camp ft. T.I. - Till I Die', 'age_limit': 18, 'timestamp': 1449468000, 'upload_date': '20151207', 'uploader': 'K Camp', 'track': 'Till I Die', 'artist': 'K Camp', - 'genre': 'Rap/Hip-Hop', + 'genre': 'Hip-Hop', + }, + }, { + 'note': 'Featured test', + 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190', + 'md5': 'd28675e5e8805035d949dc5cf161071d', + 'info_dict': { + 'id': 'USUV71402190', + 'ext': 'mp4', + 'title': 'Lemaitre ft. LoLo - Wait', + 'age_limit': 0, + 'timestamp': 1413432000, + 'upload_date': '20141016', + 'uploader': 'Lemaitre', + 'track': 'Wait', + 'artist': 'Lemaitre', + 'genre': 'Electronic', }, }, { 'note': 'Only available via webpage', @@ -242,8 +258,15 @@ class VevoIE(VevoBaseIE): timestamp = parse_iso8601(video_info.get('releaseDate')) artists = video_info.get('artists') - if artists: - artist = uploader = artists[0]['name'] + for curr_artist in artists: + if 'role' in curr_artist: + if curr_artist['role'] == 'Featured': + featured_artist = curr_artist['name'] + elif curr_artist['role'] == 'Main': + artist = uploader = curr_artist['name'] + else: + artist = uploader = curr_artist['name'] + break view_count = int_or_none(video_info.get('views', {}).get('total')) for video_version in video_versions: From e94eeb1dd3e3171e6409313c619b248da0dd4886 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Nov 2016 23:09:35 +0700 Subject: [PATCH 49/80] [vevo] Simplify artists extraction --- youtube_dl/extractor/vevo.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index ce607945f..5aa097885 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -259,14 +259,10 @@ class VevoIE(VevoBaseIE): timestamp = parse_iso8601(video_info.get('releaseDate')) artists = video_info.get('artists') for curr_artist in artists: - if 'role' in curr_artist: - if curr_artist['role'] == 'Featured': - featured_artist = curr_artist['name'] - elif curr_artist['role'] == 'Main': - artist = uploader = curr_artist['name'] + if curr_artist.get('role') == 'Featured': + featured_artist = curr_artist['name'] else: artist = uploader = curr_artist['name'] - break view_count = int_or_none(video_info.get('views', {}).get('total')) for video_version in video_versions: From 1db058466dfa8c0e647dbd57938b63f04a7a84c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Nov 2016 23:10:58 +0700 Subject: [PATCH 50/80] [vevo] Allow video info to fail in tests --- youtube_dl/extractor/vevo.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 5aa097885..d82261e5e 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -51,7 +51,7 @@ class VevoIE(VevoBaseIE): 'artist': 'Hurts', 'genre': 'Pop', }, - 'expected_warnings': ['Unable to download SMIL file'], + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], }, { 'note': 'v3 SMIL format', 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', @@ -67,7 +67,7 @@ class VevoIE(VevoBaseIE): 'artist': 'Cassadee Pope', 'genre': 'Country', }, - 'expected_warnings': ['Unable to download SMIL file'], + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], }, { 'note': 'Age-limited video', 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', @@ -83,7 +83,7 @@ class VevoIE(VevoBaseIE): 'artist': 'Justin Timberlake', 'genre': 'Pop', }, - 'expected_warnings': ['Unable to download SMIL file'], + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], }, { 'note': 'No video_info', 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', @@ -100,6 +100,7 @@ class VevoIE(VevoBaseIE): 'artist': 'K Camp', 'genre': 'Hip-Hop', }, + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], }, { 'note': 'Featured test', 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190', @@ -116,6 +117,7 @@ class VevoIE(VevoBaseIE): 'artist': 'Lemaitre', 'genre': 'Electronic', }, + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], }, { 'note': 'Only available via webpage', 'url': 'http://www.vevo.com/watch/GBUV71600656', From 74394b5e10c1a681022e99fe1955837fb9078f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Nov 2016 23:37:32 +0700 Subject: [PATCH 51/80] [puls4] Relax _VALID_URL (closes #11267) --- youtube_dl/extractor/puls4.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py index 1c54af002..80091b85f 100644 --- a/youtube_dl/extractor/puls4.py +++ b/youtube_dl/extractor/puls4.py @@ -10,7 +10,7 @@ from ..utils import ( class Puls4IE(ProSiebenSat1BaseIE): - _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>(?:[^/]+/)*?videos/[^?#]+)' + _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)' _TESTS = [{ 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118', 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03', @@ -22,6 +22,12 @@ class Puls4IE(ProSiebenSat1BaseIE): 'upload_date': '20160830', 'uploader': 'PULS_4', }, + }, { + 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer', + 'only_matching': True, + }, { + 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598', + 'only_matching': True, }] _TOKEN = 'puls4' _SALT = '01!kaNgaiNgah1Ie4AeSha' From 9338a0eae34de9e81bc6b1cee5a000bc6ff9256c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Nov 2016 00:13:46 +0700 Subject: [PATCH 52/80] [viki] Fix rtmp formats extraction (closes #11255) --- youtube_dl/extractor/viki.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 4351ac457..cb8bfb348 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -1,11 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import json -import time -import hmac import hashlib +import hmac import itertools +import json +import re +import time from .common import InfoExtractor from ..utils import ( @@ -276,9 +277,13 @@ class VikiIE(VikiBaseIE): height = int_or_none(self._search_regex( r'^(\d+)[pP]$', format_id, 'height', default=None)) for protocol, format_dict in stream_dict.items(): + # rtmps URLs does not seem to work + if protocol == 'rtmps': + continue + format_url = format_dict['url'] if format_id == 'm3u8': m3u8_formats = self._extract_m3u8_formats( - format_dict['url'], video_id, 'mp4', + format_url, video_id, 'mp4', entry_protocol='m3u8_native', preference=-1, m3u8_id='m3u8-%s' % protocol, fatal=False) # Despite CODECS metadata in m3u8 all video-only formats @@ -287,9 +292,23 @@ class VikiIE(VikiBaseIE): if f.get('acodec') == 'none' and f.get('vcodec') != 'none': f['acodec'] = None formats.extend(m3u8_formats) + elif format_url.startswith('rtmp'): + mobj = re.search( + r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$', + format_url) + if not mobj: + continue + formats.append({ + 'format_id': 'rtmp-%s' % format_id, + 'ext': 'flv', + 'url': mobj.group('url'), + 'play_path': mobj.group('playpath'), + 'app': mobj.group('app'), + 'page_url': url, + }) else: formats.append({ - 'url': format_dict['url'], + 'url': format_url, 'format_id': '%s-%s' % (format_id, protocol), 'height': height, }) From 560c8c6ec033b7b436c49b708d9d7362e7672aa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Nov 2016 00:14:09 +0700 Subject: [PATCH 53/80] [viki] Prefer hls --- youtube_dl/extractor/viki.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index cb8bfb348..9c48701c1 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -284,7 +284,7 @@ class VikiIE(VikiBaseIE): if format_id == 'm3u8': m3u8_formats = self._extract_m3u8_formats( format_url, video_id, 'mp4', - entry_protocol='m3u8_native', preference=-1, + entry_protocol='m3u8_native', m3u8_id='m3u8-%s' % protocol, fatal=False) # Despite CODECS metadata in m3u8 all video-only formats # are actually video+audio From 69016738688199f95e6f732e4a5c68c99988309c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Nov 2016 15:40:28 +0700 Subject: [PATCH 54/80] [azubu] Add support for azubu.uol.com.br (closes #11305) --- youtube_dl/extractor/azubu.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index 72e1bd59d..1eebf5dfd 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -11,7 +11,7 @@ from ..utils import ( class AzubuIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/[^/]+#!/play/(?P<id>\d+)' _TESTS = [ { 'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1', @@ -103,12 +103,15 @@ class AzubuIE(InfoExtractor): class AzubuLiveIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P<id>[^/]+)$' + _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/(?P<id>[^/]+)$' - _TEST = { + _TESTS = [{ 'url': 'http://www.azubu.tv/MarsTVMDLen', 'only_matching': True, - } + }, { + 'url': 'http://azubu.uol.com.br/adolfz', + 'only_matching': True, + }] def _real_extract(self, url): user = self._match_id(url) From f25e1c8d8c145ea4044b56786256cd71f861cf62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Nov 2016 19:54:59 +0700 Subject: [PATCH 55/80] [webcaster] Add support for webcaster.pro --- youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/webcaster.py | 85 ++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 youtube_dl/extractor/webcaster.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9107f0b96..d71d01de3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1121,6 +1121,10 @@ from .wdr import ( WDRIE, WDRMobileIE, ) +from .webcaster import ( + WebcasterIE, + WebcasterFeedIE, +) from .webofstories import ( WebOfStoriesIE, WebOfStoriesPlaylistIE, diff --git a/youtube_dl/extractor/webcaster.py b/youtube_dl/extractor/webcaster.py new file mode 100644 index 000000000..d366511a2 --- /dev/null +++ b/youtube_dl/extractor/webcaster.py @@ -0,0 +1,85 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + xpath_text, +) + + +class WebcasterIE(InfoExtractor): + _VALID_URL = r'https?://bl\.webcaster\.pro/(?:quote|media)/start/free_(?P<id>[^/]+)' + _TESTS = [{ + # http://video.khl.ru/quotes/393859 + 'url': 'http://bl.webcaster.pro/quote/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104?sr%3D105%26fa%3D1%26type_id%3D18', + 'md5': '0c162f67443f30916ff1c89425dcd4cd', + 'info_dict': { + 'id': 'c8cefd240aa593681c8d068cff59f407_hd', + 'ext': 'mp4', + 'title': 'Сибирь - Нефтехимик. Лучшие моменты первого периода', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, { + 'url': 'http://bl.webcaster.pro/media/start/free_6246c7a4453ac4c42b4398f840d13100_hd/2_2991109016/e8d0d82587ef435480118f9f9c41db41/4635726126', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_xml(url, video_id) + + title = xpath_text(video, './/event_name', 'event name', fatal=True) + + def make_id(parts, separator): + return separator.join(filter(None, parts)) + + formats = [] + for format_id in (None, 'noise'): + track_tag = make_id(('track', format_id), '_') + for track in video.findall('.//iphone/%s' % track_tag): + track_url = track.text + if not track_url: + continue + if determine_ext(track_url) == 'm3u8': + m3u8_formats = self._extract_m3u8_formats( + track_url, video_id, 'mp4', + entry_protocol='m3u8_native', + m3u8_id=make_id(('hls', format_id), '-'), fatal=False) + for f in m3u8_formats: + f.update({ + 'source_preference': 0 if format_id == 'noise' else 1, + 'format_note': track.get('title'), + }) + formats.extend(m3u8_formats) + self._sort_formats(formats) + + thumbnail = xpath_text(video, './/image', 'thumbnail') + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } + + +class WebcasterFeedIE(InfoExtractor): + _VALID_URL = r'https?://bl\.webcaster\.pro/feed/start/free_(?P<id>[^/]+)' + _TEST = { + 'url': 'http://bl.webcaster.pro/feed/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104', + 'only_matching': True, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + feed = self._download_xml(url, video_id) + + video_url = xpath_text( + feed, ('video_hd', 'video'), 'video url', fatal=True) + + return self.url_result(video_url, WebcasterIE.ie_key()) From 83f1481baae72ca17364a12bec6ebcbe30234a3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Nov 2016 19:56:32 +0700 Subject: [PATCH 56/80] [extractor/generic] Add support for webcaster.pro embeds --- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/webcaster.py | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f9707c155..5aac65162 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -60,6 +60,7 @@ from .screenwavemedia import ScreenwaveMediaIE from .mtv import MTVServicesEmbeddedIE from .pladform import PladformIE from .videomore import VideomoreIE +from .webcaster import WebcasterFeedIE from .googledrive import GoogleDriveIE from .jwplatform import JWPlatformIE from .digiteka import DigitekaIE @@ -2140,6 +2141,11 @@ class GenericIE(InfoExtractor): if videomore_url: return self.url_result(videomore_url) + # Look for Webcaster embeds + webcaster_url = WebcasterFeedIE._extract_url(self, webpage) + if webcaster_url: + return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key()) + # Look for Playwire embeds mobj = re.search( r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage) diff --git a/youtube_dl/extractor/webcaster.py b/youtube_dl/extractor/webcaster.py index d366511a2..7486cb347 100644 --- a/youtube_dl/extractor/webcaster.py +++ b/youtube_dl/extractor/webcaster.py @@ -74,6 +74,23 @@ class WebcasterFeedIE(InfoExtractor): 'only_matching': True, } + @staticmethod + def _extract_url(ie, webpage): + mobj = re.search( + r'<(?:object|a[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_.*?)(?:[?&]|\1)', + webpage) + if mobj: + return mobj.group('url') + for secure in (True, False): + video_url = ie._og_search_video_url( + webpage, secure=secure, default=None) + if video_url: + mobj = re.search( + r'config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_[^?&=]+)', + video_url) + if mobj: + return mobj.group('url') + def _real_extract(self, url): video_id = self._match_id(url) From 294d4926d70d9b0bde38288c872a77ae5a95c6b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Nov 2016 20:04:03 +0700 Subject: [PATCH 57/80] [ChangeLog] Actualize --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index 7e784ed76..bb07fef32 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,15 @@ version <unreleased> Extractors ++ [webcaster] Add support for webcaster.pro ++ [azubu] Add support for azubu.uol.com.br (#11305) +* [viki] Prefer hls formats +* [viki] Fix rtmp formats extraction (#11255) +* [puls4] Relax URL regular expression (#11267) +* [vevo] Improve artist extraction (#10911) +* [mitele] Relax URL regular expression and extract more metadata (#11244) + [cbslocal] Recognize New York site (#11285) ++ [youtube:playlist] Pass disable_polymer in URL query (#11193) version 2016.11.22 From 2b380fc299adbea416b4bf81ea9a4c7d11c294f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Nov 2016 20:05:32 +0700 Subject: [PATCH 58/80] release 2016.11.27 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index b7fa566c8..0d96f651f 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.22** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.22 +[debug] youtube-dl version 2016.11.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index bb07fef32..5515a08ff 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.11.27 Extractors + [webcaster] Add support for webcaster.pro diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7c485349d..d9ad7bd1f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -882,6 +882,8 @@ - **WatchIndianPorn**: Watch Indian Porn - **WDR** - **wdr:mobile** + - **Webcaster** + - **WebcasterFeed** - **WebOfStories** - **WebOfStoriesPlaylist** - **WeiqiTV**: WQTV diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3c746baac..db7da3985 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.22' +__version__ = '2016.11.27' From 51b1378eeddb60bd99199741f2fcee29d8389142 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 27 Nov 2016 22:01:07 +0800 Subject: [PATCH 59/80] Ignore and clean .swf files Some videos on NicoNico are swf --- .gitignore | 1 + Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 87754f90f..9ce4b5e2d 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,7 @@ updates_key.pem *.wav *.ape *.mkv +*.swf *.part *.swp test/testdata diff --git a/Makefile b/Makefile index 68bbf5e96..9d1ddc9d1 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From 89533199160d484f94a9923016cb9a7921ae4956 Mon Sep 17 00:00:00 2001 From: felix <felix.von.s@posteo.de> Date: Mon, 28 Nov 2016 17:17:56 +0100 Subject: [PATCH 60/80] [screenwavemedia] Remove extractor Rewrite TeamFourStar and Normalboots extractors in terms of JWPlatform --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/generic.py | 16 --- youtube_dl/extractor/normalboots.py | 12 +- youtube_dl/extractor/screenwavemedia.py | 146 ------------------------ youtube_dl/extractor/teamfourstar.py | 48 ++++++++ 5 files changed, 54 insertions(+), 170 deletions(-) delete mode 100644 youtube_dl/extractor/screenwavemedia.py create mode 100644 youtube_dl/extractor/teamfourstar.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d71d01de3..563457fcb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -804,7 +804,6 @@ from .scivee import SciVeeIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .screenjunkies import ScreenJunkiesIE -from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE from .seeker import SeekerIE from .senateisvp import SenateISVPIE from .sendtonews import SendtoNewsIE @@ -897,6 +896,7 @@ from .teachertube import ( ) from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE +from .teamfourstar import TeamFourStarIE from .techtalks import TechTalksIE from .ted import TEDIE from .tele13 import Tele13IE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5aac65162..3949c8bf7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -56,7 +56,6 @@ from .dailymotion import ( ) from .onionstudios import OnionStudiosIE from .viewlift import ViewLiftEmbedIE -from .screenwavemedia import ScreenwaveMediaIE from .mtv import MTVServicesEmbeddedIE from .pladform import PladformIE from .videomore import VideomoreIE @@ -1190,16 +1189,6 @@ class GenericIE(InfoExtractor): 'duration': 248.667, }, }, - # ScreenwaveMedia embed - { - 'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1', - 'md5': '24ace5baba0d35d55c6810b51f34e9e0', - 'info_dict': { - 'id': 'cinemasnob-55d26273809dd', - 'ext': 'mp4', - 'title': 'cinemasnob', - }, - }, # BrightcoveInPageEmbed embed { 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/', @@ -2212,11 +2201,6 @@ class GenericIE(InfoExtractor): if jwplatform_url: return self.url_result(jwplatform_url, 'JWPlatform') - # Look for ScreenwaveMedia embeds - mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage) - if mobj is not None: - return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia') - # Look for Digiteka embeds digiteka_url = DigitekaIE._extract_url(webpage) if digiteka_url: diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 6aa0895b8..61fe571df 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .screenwavemedia import ScreenwaveMediaIE +from .jwplatform import JWPlatformIE from ..utils import ( unified_strdate, @@ -25,7 +25,7 @@ class NormalbootsIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - 'add_ie': ['ScreenwaveMedia'], + 'add_ie': ['JWPlatform'], } def _real_extract(self, url): @@ -39,15 +39,13 @@ class NormalbootsIE(InfoExtractor): r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', webpage, 'date', fatal=False)) - screenwavemedia_url = self._html_search_regex( - ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL', - group='url') + jwplatform_url = JWPlatformIE._extract_url(webpage) return { '_type': 'url_transparent', 'id': video_id, - 'url': screenwavemedia_url, - 'ie_key': ScreenwaveMediaIE.ie_key(), + 'url': jwplatform_url, + 'ie_key': JWPlatformIE.ie_key(), 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py deleted file mode 100644 index 7d77e8825..000000000 --- a/youtube_dl/extractor/screenwavemedia.py +++ /dev/null @@ -1,146 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unified_strdate, - js_to_json, -) - - -class ScreenwaveMediaIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)' - EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1' - _TESTS = [{ - 'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - playerdata = self._download_webpage( - 'http://player.screenwavemedia.com/player.php?id=%s' % video_id, - video_id, 'Downloading player webpage') - - vidtitle = self._search_regex( - r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/') - - playerconfig = self._download_webpage( - 'http://player.screenwavemedia.com/player.js', - video_id, 'Downloading playerconfig webpage') - - videoserver = self._search_regex(r'SWMServer\s*=\s*"([\d\.]+)"', playerdata, 'videoserver') - - sources = self._parse_json( - js_to_json( - re.sub( - r'(?s)/\*.*?\*/', '', - self._search_regex( - r'sources\s*:\s*(\[[^\]]+?\])', playerconfig, - 'sources', - ).replace( - "' + thisObj.options.videoserver + '", - videoserver - ).replace( - "' + playerVidId + '", - video_id - ) - ) - ), - video_id, fatal=False - ) - - # Fallback to hardcoded sources if JS changes again - if not sources: - self.report_warning('Falling back to a hardcoded list of streams') - sources = [{ - 'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id), - 'type': 'mp4', - 'label': format_label, - } for format_id, format_label in ( - ('low', '144p Low'), ('med', '160p Med'), ('high', '360p High'), ('hd1', '720p HD1'))] - sources.append({ - 'file': 'http://%s/vod/smil:%s.smil/playlist.m3u8' % (videoserver, video_id), - 'type': 'hls', - }) - - formats = [] - for source in sources: - file_ = source.get('file') - if not file_: - continue - if source.get('type') == 'hls': - formats.extend(self._extract_m3u8_formats(file_, video_id, ext='mp4')) - else: - format_id = self._search_regex( - r'_(.+?)\.[^.]+$', file_, 'format id', default=None) - if not self._is_valid_url(file_, video_id, format_id or 'video'): - continue - format_label = source.get('label') - height = int_or_none(self._search_regex( - r'^(\d+)[pP]', format_label, 'height', default=None)) - formats.append({ - 'url': file_, - 'format_id': format_id, - 'format': format_label, - 'ext': source.get('type'), - 'height': height, - }) - self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) - - return { - 'id': video_id, - 'title': vidtitle, - 'formats': formats, - } - - -class TeamFourIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?' - _TEST = { - 'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/', - 'info_dict': { - 'id': 'TeamFourStar-5292a02f20bfa', - 'ext': 'mp4', - 'upload_date': '20130401', - 'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar', - 'title': 'A Moment With TFS Episode 4', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - playerdata_url = self._search_regex( - r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', - webpage, 'player data URL') - - video_title = self._html_search_regex( - r'<div class="heroheadingtitle">(?P<title>.+?)</div>', - webpage, 'title') - video_date = unified_strdate(self._html_search_regex( - r'<div class="heroheadingdate">(?P<date>.+?)</div>', - webpage, 'date', fatal=False)) - video_description = self._html_search_regex( - r'(?s)<div class="postcontent">(?P<description>.+?)</div>', - webpage, 'description', fatal=False) - video_thumbnail = self._og_search_thumbnail(webpage) - - return { - '_type': 'url_transparent', - 'display_id': display_id, - 'title': video_title, - 'description': video_description, - 'upload_date': video_date, - 'thumbnail': video_thumbnail, - 'url': playerdata_url, - } diff --git a/youtube_dl/extractor/teamfourstar.py b/youtube_dl/extractor/teamfourstar.py new file mode 100644 index 000000000..a4db2ca98 --- /dev/null +++ b/youtube_dl/extractor/teamfourstar.py @@ -0,0 +1,48 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .jwplatform import JWPlatformIE +from ..utils import unified_strdate + + +class TeamFourStarIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)/?' + _TEST = { + 'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/', + 'info_dict': { + 'id': '0WdZO31W', + 'title': 'TFS Abridged Parody Episode 1', + 'description': 'Episode 1: The Return of Raditz! … Wait…\nCast\nMasakoX – Goku, Roshi\nLanipator – Piccolo, Radditz, Krillin, Vegeta\nVegeta3986 – Radditz, Yamcha, Oolong, Gohan\nHbi2k – Farmer with Shotgun\nMegami33 – Bulma, Puar\nTakahata101 – Nappa\nKaiserNeko – SpacePod\nSongs\nMorgenstemning by Edvard Hagerup Grieg\nCha-La-Head-Cha-La by Kageyama Hiranobu\nWE DO NOT OWN DRAGONBALL. DragonBall is Owned by TOEI ANIMATION, Ltd. and Licensed by FUNimation Productions, Ltd.. All Rights Reserved. DragonBall, DragonBall Z, DragonBall GT and all logos, character names and distinctive likenesses thereof are trademarks of TOEI ANIMATION, Ltd.\nThis is nothing more than a Parody made for entertainment purposes only.', + 'ext': 'mp4', + 'timestamp': 1394168400, + 'upload_date': '20080508', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + jwplatform_url = JWPlatformIE._extract_url(webpage) + + video_title = self._html_search_regex( + r'<h1 class="entry-title">(?P<title>.+?)</h1>', + webpage, 'title') + video_date = unified_strdate(self._html_search_regex( + r'<span class="meta-date date updated">(?P<date>.+?)</span>', + webpage, 'date', fatal=False)) + video_description = self._html_search_regex( + r'(?s)<div class="content-inner">.*?(?P<description><p>.+?)</div>', + webpage, 'description', fatal=False) + video_thumbnail = self._og_search_thumbnail(webpage) + + return { + '_type': 'url_transparent', + 'display_id': display_id, + 'title': video_title, + 'description': video_description, + 'upload_date': video_date, + 'thumbnail': video_thumbnail, + 'url': jwplatform_url, + } From c2530d3319fd32adfc43cc349b9491040ee631d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Nov 2016 23:22:29 +0700 Subject: [PATCH 61/80] [teamfourstar] Simplify _VALID_URL and relax regexes --- youtube_dl/extractor/teamfourstar.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/teamfourstar.py b/youtube_dl/extractor/teamfourstar.py index a4db2ca98..a8c6ed7be 100644 --- a/youtube_dl/extractor/teamfourstar.py +++ b/youtube_dl/extractor/teamfourstar.py @@ -7,13 +7,13 @@ from ..utils import unified_strdate class TeamFourStarIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)/?' + _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)' _TEST = { 'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/', 'info_dict': { 'id': '0WdZO31W', 'title': 'TFS Abridged Parody Episode 1', - 'description': 'Episode 1: The Return of Raditz! … Wait…\nCast\nMasakoX – Goku, Roshi\nLanipator – Piccolo, Radditz, Krillin, Vegeta\nVegeta3986 – Radditz, Yamcha, Oolong, Gohan\nHbi2k – Farmer with Shotgun\nMegami33 – Bulma, Puar\nTakahata101 – Nappa\nKaiserNeko – SpacePod\nSongs\nMorgenstemning by Edvard Hagerup Grieg\nCha-La-Head-Cha-La by Kageyama Hiranobu\nWE DO NOT OWN DRAGONBALL. DragonBall is Owned by TOEI ANIMATION, Ltd. and Licensed by FUNimation Productions, Ltd.. All Rights Reserved. DragonBall, DragonBall Z, DragonBall GT and all logos, character names and distinctive likenesses thereof are trademarks of TOEI ANIMATION, Ltd.\nThis is nothing more than a Parody made for entertainment purposes only.', + 'description': 'md5:d60bc389588ebab2ee7ad432bda953ae', 'ext': 'mp4', 'timestamp': 1394168400, 'upload_date': '20080508', @@ -27,13 +27,13 @@ class TeamFourStarIE(InfoExtractor): jwplatform_url = JWPlatformIE._extract_url(webpage) video_title = self._html_search_regex( - r'<h1 class="entry-title">(?P<title>.+?)</h1>', + r'<h1[^>]+class="entry-title"[^>]*>(?P<title>.+?)</h1>', webpage, 'title') video_date = unified_strdate(self._html_search_regex( - r'<span class="meta-date date updated">(?P<date>.+?)</span>', + r'<span[^>]+class="meta-date date updated"[^>]*>(?P<date>.+?)</span>', webpage, 'date', fatal=False)) video_description = self._html_search_regex( - r'(?s)<div class="content-inner">.*?(?P<description><p>.+?)</div>', + r'(?s)<div[^>]+class="content-inner"[^>]*>.*?(?P<description><p>.+?)</div>', webpage, 'description', fatal=False) video_thumbnail = self._og_search_thumbnail(webpage) From cc61fc3934bb3d130e814e2d2345fe6cda2ad9c3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 29 Nov 2016 10:11:08 +0100 Subject: [PATCH 62/80] [comedycentral] Add new extractor for full-episodes CC seems to have added yet another indirection for full episodes - the mgid is now only in a linked feed. This may be a little brittle, but it's better than failing outright. Plus, the current The Daily Show episode now works :) --- youtube_dl/extractor/comedycentral.py | 33 ++++++++++++++++++++++++++- youtube_dl/extractor/extractors.py | 1 + 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 88346dde7..528ff7fa3 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class ComedyCentralIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ - (video-clips|episodes|cc-studios|video-collections|full-episodes|shows) + (video-clips|episodes|cc-studios|video-collections|shows) /(?P<title>.*)''' _FEED_URL = 'http://comedycentral.com/feeds/mrss/' @@ -27,6 +27,37 @@ class ComedyCentralIE(MTVServicesInfoExtractor): }] +class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): + _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ + (?:full-episodes) + /(?P<id>[^?]+)''' + _FEED_URL = 'http://comedycentral.com/feeds/mrss/' + + _TESTS = [{ + 'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028', + 'info_dict': { + 'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."', + 'title': 'November 28, 2016 - Ryan Speedo Green', + }, + 'playlist_count': 4, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed') + feed = self._parse_json(feed_json, playlist_id) + zones = feed['manifest']['zones'] + + video_zone = zones['t2_lc_promo1'] + feed = self._download_json(video_zone['feed'], playlist_id) + mgid = feed['result']['data']['id'] + + videos_info = self._get_videos_info(mgid) + return videos_info + + class ToshIE(MTVServicesInfoExtractor): IE_DESC = 'Tosh.0' _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)' diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 563457fcb..46d007b7d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -180,6 +180,7 @@ from .cnn import ( from .coub import CoubIE from .collegerama import CollegeRamaIE from .comedycentral import ( + ComedyCentralFullEpisodesIE, ComedyCentralIE, ComedyCentralShortnameIE, ComedyCentralTVIE, From 6303fc820417423585b681a4415b0020e0e8dd31 Mon Sep 17 00:00:00 2001 From: Mark Lee <malept@users.noreply.github.com> Date: Tue, 29 Nov 2016 08:06:01 -0800 Subject: [PATCH 63/80] [spike] Fix full episodes extraction --- youtube_dl/extractor/mtv.py | 5 +++-- youtube_dl/extractor/spike.py | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 74a3a035e..03351917e 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -13,6 +13,7 @@ from ..utils import ( fix_xml_ampersands, float_or_none, HEADRequest, + NO_DEFAULT, RegexNotFoundError, sanitized_Request, strip_or_none, @@ -201,7 +202,7 @@ class MTVServicesInfoExtractor(InfoExtractor): [self._get_video_info(item) for item in idoc.findall('.//item')], playlist_title=title, playlist_description=description) - def _extract_mgid(self, webpage): + def _extract_mgid(self, webpage, default=NO_DEFAULT): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -221,7 +222,7 @@ class MTVServicesInfoExtractor(InfoExtractor): sm4_embed = self._html_search_meta( 'sm4:video:embed', webpage, 'sm4 embed', default='') mgid = self._search_regex( - r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid') + r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default) return mgid def _real_extract(self, url): diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 218785ee4..abfee3ece 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .mtv import MTVServicesInfoExtractor @@ -16,6 +18,15 @@ class SpikeIE(MTVServicesInfoExtractor): 'timestamp': 1388120400, 'upload_date': '20131227', }, + }, { + 'url': 'http://www.spike.com/full-episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-209', + 'md5': 'b25c6f16418aefb9ad5a6cae2559321f', + 'info_dict': { + 'id': '37ace3a8-1df6-48be-85b8-38df8229e241', + 'ext': 'mp4', + 'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1', + 'description': 'md5:a739ca8f978a7802f67f8016d27ce114', + }, }, { 'url': 'http://www.spike.com/video-clips/lhtu8m/', 'only_matching': True, @@ -32,3 +43,12 @@ class SpikeIE(MTVServicesInfoExtractor): _FEED_URL = 'http://www.spike.com/feeds/mrss/' _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' + _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)') + + def _extract_mgid(self, webpage): + mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None) + if mgid is None: + url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id') + video_type, episode_id = url_parts.split('/', 1) + mgid = 'mgid:arc:{0}:spike.com:{1}'.format(video_type, episode_id) + return mgid From 3779d524dfd3cf72120847b235d4a3906e47a4f8 Mon Sep 17 00:00:00 2001 From: Varun <mailvarunest@gmail.com> Date: Tue, 29 Nov 2016 22:07:30 +0530 Subject: [PATCH 64/80] [liveleak] Add support for youtube embeds --- youtube_dl/extractor/liveleak.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index ea0565ac0..40fef9bb6 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -54,6 +54,19 @@ class LiveLeakIE(InfoExtractor): 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia', 'thumbnail': 're:^https?://.*\.jpg$' } + }, { + # Covers https://github.com/rg3/youtube-dl/pull/10664#issuecomment-247439521 + 'url' : 'http://m.liveleak.com/view?i=763_1473349649', + 'add_ie': ['Youtube'], + 'info_dict': { + 'id': '763_1473349649', + 'ext': 'mp4', + 'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty', + 'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.', + 'uploader': 'Ziz', + 'upload_date': '20160908', + 'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw' + } }] @staticmethod @@ -87,7 +100,7 @@ class LiveLeakIE(InfoExtractor): else: # Maybe an embed? embed_url = self._search_regex( - r'<iframe[^>]+src="(http://www.prochan.com/embed\?[^"]+)"', + r'<iframe[^>]+src="((?:(?:http://www.prochan.com/embed\?)|(?:http://www.youtube.com/embed))[^"]+)"', webpage, 'embed URL') return { '_type': 'url_transparent', @@ -107,6 +120,7 @@ class LiveLeakIE(InfoExtractor): 'format_note': s.get('label'), 'url': s['file'], } for i, s in enumerate(sources)] + for i, s in enumerate(sources): # Removing '.h264_*.mp4' gives the raw video, which is essentially # the same video without the LiveLeak logo at the top (see From 8b0d3ee64ee20de35d0828b01ece98f59bb19e1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Nov 2016 23:42:19 +0700 Subject: [PATCH 65/80] [liveleak] Simplify and PEP 8 --- youtube_dl/extractor/liveleak.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index 40fef9bb6..b84e4dd6c 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -56,7 +56,7 @@ class LiveLeakIE(InfoExtractor): } }, { # Covers https://github.com/rg3/youtube-dl/pull/10664#issuecomment-247439521 - 'url' : 'http://m.liveleak.com/view?i=763_1473349649', + 'url': 'http://m.liveleak.com/view?i=763_1473349649', 'add_ie': ['Youtube'], 'info_dict': { 'id': '763_1473349649', @@ -66,7 +66,10 @@ class LiveLeakIE(InfoExtractor): 'uploader': 'Ziz', 'upload_date': '20160908', 'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw' - } + }, + 'params': { + 'skip_download': True, + }, }] @staticmethod @@ -100,7 +103,7 @@ class LiveLeakIE(InfoExtractor): else: # Maybe an embed? embed_url = self._search_regex( - r'<iframe[^>]+src="((?:(?:http://www.prochan.com/embed\?)|(?:http://www.youtube.com/embed))[^"]+)"', + r'<iframe[^>]+src="(https?://(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"', webpage, 'embed URL') return { '_type': 'url_transparent', From db75f14d8a6eb998f08d2774f5d609a02ef13646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Nov 2016 04:19:38 +0700 Subject: [PATCH 66/80] [ruutu] Detect DRM videos --- youtube_dl/extractor/ruutu.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index 2fce4e81b..6db3e3e93 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( determine_ext, + ExtractorError, int_or_none, xpath_attr, xpath_text, @@ -101,6 +102,11 @@ class RuutuIE(InfoExtractor): }) extract_formats(video_xml.find('./Clip')) + + drm = xpath_text(video_xml, './Clip/DRM', default=None) + if not formats and drm: + raise ExtractorError('This video is DRM protected.', expected=True) + self._sort_formats(formats) return { From f882554815c42381e84af98860434b040b2d127c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 30 Nov 2016 11:52:19 +0100 Subject: [PATCH 67/80] [comedcycentral] Give /shows/.+/full-episodes URLs to the COmedyCentralFullEpisodesIE --- youtube_dl/extractor/comedycentral.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 528ff7fa3..0239dfd84 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class ComedyCentralIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ - (video-clips|episodes|cc-studios|video-collections|shows) + (video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes))) /(?P<title>.*)''' _FEED_URL = 'http://comedycentral.com/feeds/mrss/' @@ -29,7 +29,7 @@ class ComedyCentralIE(MTVServicesInfoExtractor): class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ - (?:full-episodes) + (?:full-episodes|shows(?=/[^/]+/full-episodes)) /(?P<id>[^?]+)''' _FEED_URL = 'http://comedycentral.com/feeds/mrss/' @@ -40,6 +40,9 @@ class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): 'title': 'November 28, 2016 - Ryan Speedo Green', }, 'playlist_count': 4, + }, { + 'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', + 'only_matching': True, }] def _real_extract(self, url): From 4c4765dba23c40136d575ab58b26e410ec42212a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Nov 2016 23:17:30 +0700 Subject: [PATCH 68/80] [soundcloud] Update client id (closes #11327) --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 3b7ecb3c3..5a201eaa8 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -121,7 +121,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea' + _CLIENT_ID = 'fDoItMDbsbZz8dY16ZzARCZmzgHBPotA' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' @staticmethod From f150530f4d536ebf5375efe96b2362062e02797e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Dec 2016 00:13:06 +0700 Subject: [PATCH 69/80] [ChangeLog] Actualize --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5515a08ff..0d5ab2eb3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +version <unreleased> + +Extractors +* [soundcloud] Update client id (#11327) +* [ruutu] Detect DRM protected videos ++ [liveleak] Add support for youtube embeds (#10688) +* [spike] Fix full episodes support (#11312) +* [comedycentral] Fix full episodes support +* [normalboots] Rewrite in terms of JWPlatform (#11184) +* [teamfourstar] Rewrite in terms of JWPlatform (#11184) +- [screenwavemedia] Remove extractor (#11184) + + version 2016.11.27 Extractors From 73ec479c7d787c58d249583f4bb00657c370a938 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Dec 2016 00:15:12 +0700 Subject: [PATCH 70/80] release 2016.12.01 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 4 ++-- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 0d96f651f..36559dd7b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.27** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.27 +[debug] youtube-dl version 2016.12.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 0d5ab2eb3..a91de7b63 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.12.01 Extractors * [soundcloud] Update client id (#11327) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d9ad7bd1f..edb76d9cc 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -158,6 +158,7 @@ - **CollegeRama** - **ComCarCoff** - **ComedyCentral** + - **ComedyCentralFullEpisodes** - **ComedyCentralShortname** - **ComedyCentralTV** - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED @@ -643,7 +644,6 @@ - **Screencast** - **ScreencastOMatic** - **ScreenJunkies** - - **ScreenwaveMedia** - **Seeker** - **SenateISVP** - **SendtoNews** @@ -715,7 +715,7 @@ - **teachertube:user:collection**: teachertube.com user and collection videos - **TeachingChannel** - **Teamcoco** - - **TeamFour** + - **TeamFourStar** - **TechTalks** - **techtv.mit.edu** - **ted** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index db7da3985..1acb630af 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.27' +__version__ = '2016.12.01' From 98b08f94b15930e359fa0d85834d7e9651ce6445 Mon Sep 17 00:00:00 2001 From: Laneone <dude.1996@live.com> Date: Thu, 1 Dec 2016 00:01:21 +0530 Subject: [PATCH 71/80] [README.md] Fix typo Just a minor spelling mistake in the readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ea9131c3a..840932298 100644 --- a/README.md +++ b/README.md @@ -664,7 +664,7 @@ $ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' # Download best format available but not better that 480p $ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]' -# Download best video only format but no bigger that 50 MB +# Download best video only format but no bigger than 50 MB $ youtube-dl -f 'best[filesize<50M]' # Download best format available via direct link over HTTP/HTTPS protocol From d17bfe4095a10bc52402e17d088c66e86f5f0bde Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 1 Dec 2016 14:56:52 +0800 Subject: [PATCH 72/80] [thisoldhouse] Recognize /tv-episode/ URLs and update _TESTS Closes #11271 --- ChangeLog | 5 +++++ youtube_dl/extractor/thisoldhouse.py | 7 +++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index a91de7b63..bf5f26943 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version <unreleased> + +Extractors ++ [thisoldhouse] Recognize /tv-episode/ URLs (#11271) + version 2016.12.01 Extractors diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py index 7629f0d10..197258df1 100644 --- a/youtube_dl/extractor/thisoldhouse.py +++ b/youtube_dl/extractor/thisoldhouse.py @@ -5,10 +5,10 @@ from .common import InfoExtractor class ThisOldHouseIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench', - 'md5': '568acf9ca25a639f0c4ff905826b662f', + 'md5': '946f05bbaa12a33f9ae35580d2dfcfe3', 'info_dict': { 'id': '2REGtUDQ', 'ext': 'mp4', @@ -20,6 +20,9 @@ class ThisOldHouseIE(InfoExtractor): }, { 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins', 'only_matching': True, + }, { + 'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric', + 'only_matching': True, }] def _real_extract(self, url): From a94e7f4a0ca333aabf08adb1c329b4b5b8a5d897 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 1 Dec 2016 12:15:35 +0100 Subject: [PATCH 73/80] [aenetworks] extract more formats(closes #11321) --- youtube_dl/extractor/aenetworks.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 6adb6d824..c5e079a40 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -26,7 +26,7 @@ class AENetworksIE(AENetworksBaseIE): _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)' _TESTS = [{ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', - 'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', + 'md5': 'a97a65f7e823ae10e9244bc5433d5fe6', 'info_dict': { 'id': '22253814', 'ext': 'mp4', @@ -99,7 +99,7 @@ class AENetworksIE(AENetworksBaseIE): query = { 'mbr': 'true', - 'assetTypes': 'medium_video_s3' + 'assetTypes': 'high_video_s3' } video_id = self._html_search_meta('aetn:VideoID', webpage) media_url = self._search_regex( @@ -155,7 +155,7 @@ class HistoryTopicIE(AENetworksBaseIE): 'id': 'world-war-i-history', 'title': 'World War I History', }, - 'playlist_mincount': 24, + 'playlist_mincount': 23, }, { 'url': 'http://www.history.com/topics/world-war-i-history/videos', 'only_matching': True, @@ -193,7 +193,8 @@ class HistoryTopicIE(AENetworksBaseIE): return self.theplatform_url_result( release_url, video_id, { 'mbr': 'true', - 'switch': 'hls' + 'switch': 'hls', + 'assetTypes': 'high_video_ak', }) else: webpage = self._download_webpage(url, topic_id) @@ -203,6 +204,7 @@ class HistoryTopicIE(AENetworksBaseIE): entries.append(self.theplatform_url_result( video_attributes['data-release-url'], video_attributes['data-id'], { 'mbr': 'true', - 'switch': 'hls' + 'switch': 'hls', + 'assetTypes': 'high_video_ak', })) return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage)) From 83442966194640d9bc00e7f3086aa5e8b25c4ae3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 3 Dec 2016 21:53:41 +0800 Subject: [PATCH 74/80] [socks] Fix error reporting (#11355) --- youtube_dl/socks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index 63d19b3a5..fece28062 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -60,7 +60,7 @@ class ProxyError(IOError): def __init__(self, code=None, msg=None): if code is not None and msg is None: - msg = self.CODES.get(code) and 'unknown error' + msg = self.CODES.get(code) or 'unknown error' super(ProxyError, self).__init__(code, msg) From 9b5288c92ae43436a5d48775bbe1ee537588625f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 4 Dec 2016 23:20:14 +0700 Subject: [PATCH 75/80] [1tv] Improve extraction and add support for playlists (closes #11335) --- youtube_dl/extractor/firsttv.py | 105 +++++++++++++++++++++----------- 1 file changed, 70 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 6b662cc3c..4463d3d20 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( int_or_none, qualities, @@ -22,8 +25,7 @@ class FirstTVIE(InfoExtractor): 'info_dict': { 'id': '40049', 'ext': 'mp4', - 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', - 'description': 'md5:36a39c1d19618fec57d12efe212a8370', + 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', 'upload_date': '20150212', 'duration': 2694, @@ -34,8 +36,7 @@ class FirstTVIE(InfoExtractor): 'info_dict': { 'id': '364746', 'ext': 'mp4', - 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', - 'description': 'md5:a242eea0031fd180a4497d52640a9572', + 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', 'upload_date': '20160407', 'duration': 179, @@ -44,6 +45,17 @@ class FirstTVIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'http://www.1tv.ru/news/issue/2016-12-01/14:00', + 'info_dict': { + 'id': '14:00', + 'title': 'Выпуск новостей в 14:00 1 декабря 2016 года. Новости. Первый канал', + 'description': 'md5:2e921b948f8c1ff93901da78ebdb1dfd', + }, + 'playlist_count': 13, + }, { + 'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016', + 'only_matching': True, }] def _real_extract(self, url): @@ -51,43 +63,66 @@ class FirstTVIE(InfoExtractor): webpage = self._download_webpage(url, display_id) playlist_url = compat_urlparse.urljoin(url, self._search_regex( - r'data-playlist-url="([^"]+)', webpage, 'playlist url')) + r'data-playlist-url=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'playlist url', group='url')) - item = self._download_json(playlist_url, display_id)[0] - video_id = item['id'] - quality = qualities(('ld', 'sd', 'hd', )) - formats = [] - for f in item.get('mbr', []): - src = f.get('src') - if not src: - continue - fname = f.get('name') - formats.append({ - 'url': src, - 'format_id': fname, - 'quality': quality(fname), + parsed_url = compat_urlparse.urlparse(playlist_url) + qs = compat_urlparse.parse_qs(parsed_url.query) + item_ids = qs.get('videos_ids[]') or qs.get('news_ids[]') + + items = self._download_json(playlist_url, display_id) + + if item_ids: + items = [ + item for item in items + if item.get('uid') and compat_str(item['uid']) in item_ids] + else: + items = [items[0]] + + entries = [] + QUALITIES = ('ld', 'sd', 'hd', ) + + for item in items: + title = item['title'] + quality = qualities(QUALITIES) + formats = [] + for f in item.get('mbr', []): + src = f.get('src') + if not src or not isinstance(src, compat_str): + continue + tbr = int_or_none(self._search_regex( + r'_(\d{3,})\.mp4', src, 'tbr', default=None)) + formats.append({ + 'url': src, + 'format_id': f.get('name'), + 'tbr': tbr, + 'quality': quality(f.get('name')), + }) + self._sort_formats(formats) + + thumbnail = item.get('poster') or self._og_search_thumbnail(webpage) + duration = int_or_none(item.get('duration') or self._html_search_meta( + 'video:duration', webpage, 'video duration', fatal=False)) + upload_date = unified_strdate(self._html_search_meta( + 'ya:ovs:upload_date', webpage, 'upload date', default=None)) + + entries.append({ + 'id': item.get('id') or uid, + 'thumbnail': thumbnail, + 'title': title, + 'upload_date': upload_date, + 'duration': int_or_none(duration), + 'formats': formats }) - self._sort_formats(formats) title = self._html_search_regex( (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', r"'title'\s*:\s*'([^']+)'"), - webpage, 'title', default=None) or item['title'] + webpage, 'title', default=None) or self._og_search_title( + webpage, default=None) description = self._html_search_regex( r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', default=None) or self._html_search_meta( - 'description', webpage, 'description') - duration = int_or_none(self._html_search_meta( - 'video:duration', webpage, 'video duration', fatal=False)) - upload_date = unified_strdate(self._html_search_meta( - 'ya:ovs:upload_date', webpage, 'upload date', fatal=False)) + 'description', webpage, 'description', default=None) - return { - 'id': video_id, - 'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage), - 'title': title, - 'description': description, - 'upload_date': upload_date, - 'duration': int_or_none(duration), - 'formats': formats - } + return self.playlist_result(entries, display_id, title, description) From 4bd7d9d4ae05319ebf6eb2aeffce7bde4fa7b6cf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 5 Dec 2016 00:31:02 +0800 Subject: [PATCH 76/80] [socks] Refine exception model for better error handling 1. ProxyError now inherits from socket.error instead of IOError The only functions socks.py overrides are connect and connect_ex. In Python 2.x and Python <= 3.2, socket functions raises socket.error. In newer Python versions, those functions raises OSError instead. The name socket.error is preserved as an alias of OSError for backward compability. To keep socks.py compatible with Python's standard library, it should raise the same exception as raw sockets. See PEP 3151 (https://www.python.org/dev/peps/pep-3151/) for more information about the change in Python 3.3. 2. Raise EOFError instead of IOError when the socket receives less data than it expects There's no common convention, but both ftplib and telnetlib raises EOFError for similar situations. socks.py follows them. Closes #11355 In #11355, only Python 2 is affected. In Python 3, both socket.error and IOError are alias of OSError, so AbstractHTTPHandler.do_open correctly catches the error and thus InfoExtractor._is_valid_url works fine. --- youtube_dl/socks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index fece28062..0f5d7bdb2 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -55,7 +55,7 @@ class Socks5AddressType(object): ATYP_IPV6 = 0x04 -class ProxyError(IOError): +class ProxyError(socket.error): ERR_SUCCESS = 0x00 def __init__(self, code=None, msg=None): @@ -123,7 +123,7 @@ class sockssocket(socket.socket): while len(data) < cnt: cur = self.recv(cnt - len(data)) if not cur: - raise IOError('{0} bytes missing'.format(cnt - len(data))) + raise EOFError('{0} bytes missing'.format(cnt - len(data))) data += cur return data From 3ed81714d8db61ea6d1633184af15d239af0445c Mon Sep 17 00:00:00 2001 From: vordep <up201303880@fe.up.pt> Date: Sun, 4 Dec 2016 23:53:49 +0000 Subject: [PATCH 77/80] [fusion] Update ooyala id regex --- youtube_dl/extractor/fusion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/fusion.py b/youtube_dl/extractor/fusion.py index b4ab4cbb7..ede729b52 100644 --- a/youtube_dl/extractor/fusion.py +++ b/youtube_dl/extractor/fusion.py @@ -29,7 +29,7 @@ class FusionIE(InfoExtractor): webpage = self._download_webpage(url, display_id) ooyala_code = self._search_regex( - r'data-video-id=(["\'])(?P<code>.+?)\1', + r'data-ooyala-id=(["\'])(?P<code>(?:(?!\1).)+)\1', webpage, 'ooyala code', group='code') return OoyalaIE._build_url_result(ooyala_code) From 4afa4ff223365601603b6a1cc77eb9d96d8e629d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 5 Dec 2016 23:28:57 +0700 Subject: [PATCH 78/80] [1tv] Fix video id extraction --- youtube_dl/extractor/firsttv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 4463d3d20..47673e2d4 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -107,7 +107,7 @@ class FirstTVIE(InfoExtractor): 'ya:ovs:upload_date', webpage, 'upload date', default=None)) entries.append({ - 'id': item.get('id') or uid, + 'id': compat_str(item.get('id') or item['uid']), 'thumbnail': thumbnail, 'title': title, 'upload_date': upload_date, From 875ddd740902dd0de15d21939ef75fbfc2535f30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 6 Dec 2016 00:41:03 +0700 Subject: [PATCH 79/80] [bloomberg] Add another video id regex (closes #11371) --- youtube_dl/extractor/bloomberg.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 2a8cd64b9..c5e11e8eb 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -45,7 +45,8 @@ class BloombergIE(InfoExtractor): name = self._match_id(url) webpage = self._download_webpage(url, name) video_id = self._search_regex( - r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1', + (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', + r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'), webpage, 'id', group='url', default=None) if not video_id: bplayer_data = self._parse_json(self._search_regex( From 283d1c6a8bec0150a498c6909893179335f06f0f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 6 Dec 2016 19:01:09 +0100 Subject: [PATCH 80/80] [telebruxelles] extract all formats and add support for emission urls --- youtube_dl/extractor/telebruxelles.py | 45 +++++++++++++-------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/telebruxelles.py b/youtube_dl/extractor/telebruxelles.py index eefecc490..5886e9c1b 100644 --- a/youtube_dl/extractor/telebruxelles.py +++ b/youtube_dl/extractor/telebruxelles.py @@ -7,33 +7,30 @@ from .common import InfoExtractor class TeleBruxellesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt|emission)/?(?P<id>[^/#?]+)' _TESTS = [{ - 'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/', - 'md5': '59439e568c9ee42fb77588b2096b214f', + 'url': 'http://bx1.be/news/que-risque-lauteur-dune-fausse-alerte-a-la-bombe/', + 'md5': 'a2a67a5b1c3e8c9d33109b902f474fd9', 'info_dict': { - 'id': '11942', - 'display_id': 'auditions-devant-parlement-francken-galant-tres-attendus', - 'ext': 'flv', - 'title': 'Parlement : Francken et Galant répondent aux interpellations de l’opposition', - 'description': 're:Les auditions des ministres se poursuivent*' - }, - 'params': { - 'skip_download': 'requires rtmpdump' + 'id': '158856', + 'display_id': 'que-risque-lauteur-dune-fausse-alerte-a-la-bombe', + 'ext': 'mp4', + 'title': 'Que risque l’auteur d’une fausse alerte à la bombe ?', + 'description': 'md5:3cf8df235d44ebc5426373050840e466', }, }, { - 'url': 'http://www.telebruxelles.be/sport/basket-brussels-bat-mons-80-74/', - 'md5': '181d3fbdcf20b909309e5aef5c6c6047', + 'url': 'http://bx1.be/sport/futsal-schaerbeek-sincline-5-3-a-thulin/', + 'md5': 'dfe07ecc9c153ceba8582ac912687675', 'info_dict': { - 'id': '10091', - 'display_id': 'basket-brussels-bat-mons-80-74', - 'ext': 'flv', - 'title': 'Basket : le Brussels bat Mons 80-74', - 'description': 're:^Ils l\u2019on fait ! En basket, le B*', - }, - 'params': { - 'skip_download': 'requires rtmpdump' + 'id': '158433', + 'display_id': 'futsal-schaerbeek-sincline-5-3-a-thulin', + 'ext': 'mp4', + 'title': 'Futsal : Schaerbeek s’incline 5-3 à Thulin', + 'description': 'md5:fd013f1488d5e2dceb9cebe39e2d569b', }, + }, { + 'url': 'http://bx1.be/emission/bxenf1-gastronomie/', + 'only_matching': True, }] def _real_extract(self, url): @@ -50,13 +47,13 @@ class TeleBruxellesIE(InfoExtractor): r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"', webpage, 'RTMP url') rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url) + formats = self._extract_wowza_formats(rtmp_url, article_id or display_id) + self._sort_formats(formats) return { 'id': article_id or display_id, 'display_id': display_id, 'title': title, 'description': description, - 'url': rtmp_url, - 'ext': 'flv', - 'rtmp_live': True # if rtmpdump is not called with "--live" argument, the download is blocked and can be completed + 'formats': formats, }