From 0c265486016b06342fb257966474ce591667aaff Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 1 May 2017 23:09:18 +0800 Subject: [PATCH 01/47] [cda] Implement birthday verification (closes #12789) --- ChangeLog | 1 + test/test_utils.py | 11 ++++++ youtube_dl/extractor/cda.py | 52 +++++++++++++++++++++++-- youtube_dl/extractor/videopress.py | 9 ++--- youtube_dl/utils.py | 61 ++++++++++++++++++++++++++++++ 5 files changed, 125 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index e31f54304..7324a9dd9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [cda] Support birthday verification (#12789) * [leeco] Fix extraction (#12974) diff --git a/test/test_utils.py b/test/test_utils.py index 05fdc0e95..f31559e71 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -44,6 +44,7 @@ from youtube_dl.utils import ( limit_length, mimetype2ext, month_by_name, + multipart_encode, ohdave_rsa_encrypt, OnDemandPagedList, orderedSet, @@ -620,6 +621,16 @@ class TestUtil(unittest.TestCase): 'http://example.com/path', {'test': '第二行тест'})), query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) + def test_multipart_encode(self): + self.assertEqual( + multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0], + b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n') + self.assertEqual( + multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0], + b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n') + self.assertRaises( + ValueError, multipart_encode, {b'field': b'value'}, boundary='value') + def test_dict_get(self): FALSE_VALUES = { 'none': None, diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index 1ee35b501..78b7a923c 100755 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -9,7 +9,10 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + multipart_encode, parse_duration, + random_birthday, + urljoin, ) @@ -27,7 +30,8 @@ class CDAIE(InfoExtractor): 'description': 'md5:269ccd135d550da90d1662651fcb9772', 'thumbnail': r're:^https?://.*\.jpg$', 'average_rating': float, - 'duration': 39 + 'duration': 39, + 'age_limit': 0, } }, { 'url': 'http://www.cda.pl/video/57413289', @@ -41,13 +45,41 @@ class CDAIE(InfoExtractor): 'uploader': 'crash404', 'view_count': int, 'average_rating': float, - 'duration': 137 + 'duration': 137, + 'age_limit': 0, } + }, { + # Age-restricted + 'url': 'http://www.cda.pl/video/1273454c4', + 'info_dict': { + 'id': '1273454c4', + 'ext': 'mp4', + 'title': 'Bronson (2008) napisy HD 1080p', + 'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c', + 'height': 1080, + 'uploader': 'boniek61', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 5554, + 'age_limit': 18, + 'view_count': int, + 'average_rating': float, + }, }, { 'url': 'http://ebd.cda.pl/0x0/5749950c', 'only_matching': True, }] + def _download_age_confirm_page(self, url, video_id, *args, **kwargs): + form_data = random_birthday('rok', 'miesiac', 'dzien') + form_data.update({'return': url, 'module': 'video', 'module_id': video_id}) + data, content_type = multipart_encode(form_data) + return self._download_webpage( + urljoin(url, '/a/validatebirth'), video_id, *args, + data=data, headers={ + 'Referer': url, + 'Content-Type': content_type, + }, **kwargs) + def _real_extract(self, url): video_id = self._match_id(url) self._set_cookie('cda.pl', 'cda.player', 'html5') @@ -57,6 +89,13 @@ class CDAIE(InfoExtractor): if 'Ten film jest dostępny dla użytkowników premium' in webpage: raise ExtractorError('This video is only available for premium users.', expected=True) + need_confirm_age = False + if self._html_search_regex(r'(]+action="/a/validatebirth")', + webpage, 'birthday validate form', default=None): + webpage = self._download_age_confirm_page( + url, video_id, note='Confirming age') + need_confirm_age = True + formats = [] uploader = self._search_regex(r'''(?x) @@ -81,6 +120,7 @@ class CDAIE(InfoExtractor): 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'duration': None, + 'age_limit': 18 if need_confirm_age else 0, } def extract_format(page, version): @@ -121,7 +161,12 @@ class CDAIE(InfoExtractor): for href, resolution in re.findall( r']+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', webpage): - webpage = self._download_webpage( + if need_confirm_age: + handler = self._download_age_confirm_page + else: + handler = self._download_webpage + + webpage = handler( self._BASE_URL + href, video_id, 'Downloading %s version information' % resolution, fatal=False) if not webpage: @@ -129,6 +174,7 @@ class CDAIE(InfoExtractor): # invalid version is requested. self.report_warning('Unable to download %s version information' % resolution) continue + extract_format(webpage, resolution) self._sort_formats(formats) diff --git a/youtube_dl/extractor/videopress.py b/youtube_dl/extractor/videopress.py index 049db25a5..e5f964d39 100644 --- a/youtube_dl/extractor/videopress.py +++ b/youtube_dl/extractor/videopress.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import random import re from .common import InfoExtractor @@ -11,6 +10,7 @@ from ..utils import ( float_or_none, parse_age_limit, qualities, + random_birthday, try_get, unified_timestamp, urljoin, @@ -47,13 +47,10 @@ class VideoPressIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + query = random_birthday('birth_year', 'birth_month', 'birth_day') video = self._download_json( 'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id, - video_id, query={ - 'birth_month': random.randint(1, 12), - 'birth_day': random.randint(1, 31), - 'birth_year': random.randint(1950, 1995), - }) + video_id, query=query) title = video['title'] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 86fc5ccac..25bd228ab 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -11,6 +11,7 @@ import contextlib import ctypes import datetime import email.utils +import email.header import errno import functools import gzip @@ -2097,6 +2098,58 @@ def update_Request(req, url=None, data=None, headers={}, query={}): return new_req +def try_multipart_encode(data, boundary): + content_type = 'multipart/form-data; boundary=%s' % boundary + + out = b'' + for k, v in data.items(): + out += b'--' + boundary.encode('ascii') + b'\r\n' + if isinstance(k, compat_str): + k = k.encode('utf-8') + if isinstance(v, compat_str): + v = v.encode('utf-8') + # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578 + # suggests sending UTF-8 directly. Firefox sends UTF-8, too + content = b'Content-Disposition: form-data; name="%s"\r\n\r\n' % k + v + b'\r\n' + if boundary.encode('ascii') in content: + raise ValueError('Boundary overlaps with data') + out += content + + out += b'--' + boundary.encode('ascii') + b'--\r\n' + + return out, content_type + + +def multipart_encode(data, boundary=None): + ''' + Encode a dict to RFC 7578-compliant form-data + + data: + A dict where keys and values can be either Unicode or bytes-like + objects. + boundary: + If specified a Unicode object, it's used as the boundary. Otherwise + a random boundary is generated. + + Reference: https://tools.ietf.org/html/rfc7578 + ''' + has_specified_boundary = boundary is not None + + while True: + if boundary is None: + boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff)) + + try: + out, content_type = try_multipart_encode(data, boundary) + break + except ValueError: + if has_specified_boundary: + raise + boundary = None + + return out, content_type + + def dict_get(d, key_or_keys, default=None, skip_false_values=True): if isinstance(key_or_keys, (list, tuple)): for key in key_or_keys: @@ -3760,3 +3813,11 @@ def write_xattr(path, key, value): "Couldn't find a tool to set the xattrs. " "Install either the python 'xattr' module, " "or the 'xattr' binary.") + + +def random_birthday(year_field, month_field, day_field): + return { + year_field: str(random.randint(1950, 1995)), + month_field: str(random.randint(1, 12)), + day_field: str(random.randint(1, 31)), + } From ff6f9a67040c47ea1645e3f91f3153c212ebc7ca Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 May 2017 16:04:25 +0100 Subject: [PATCH 02/47] [extractor/common] fix typo in _extract_akamai_formats --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9541e5b42..b9ad8461a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2174,7 +2174,7 @@ class InfoExtractor(object): def _extract_akamai_formats(self, manifest_url, video_id, hosts={}): formats = [] hdcore_sign = 'hdcore=3.7.0' - f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') + f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') hds_host = hosts.get('hds') if hds_host: f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url) From 4fe14732a24214d030cdbdfc25e5906244be2dc2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 May 2017 16:06:21 +0100 Subject: [PATCH 03/47] [laola1tv] fix extraction(closes #12880) --- youtube_dl/extractor/laola1tv.py | 92 +++++++++++++++++++++++--------- 1 file changed, 67 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index 3190b187c..e8853fb77 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import json + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -10,6 +12,7 @@ from ..utils import ( xpath_text, urljoin, update_url_query, + js_to_json, ) @@ -28,6 +31,29 @@ class Laola1TvEmbedIE(InfoExtractor): }, } + def _extract_token_url(self, stream_access_url, video_id, data): + return self._download_json( + stream_access_url, video_id, headers={ + 'Content-Type': 'application/json', + }, data=json.dumps(data).encode())['data']['stream-access'][0] + + def _extract_formats(self, token_url, video_id): + token_doc = self._download_xml( + token_url, video_id, 'Downloading token', + headers=self.geo_verification_headers()) + + token_attrib = xpath_element(token_doc, './/token').attrib + + if token_attrib['status'] != '0': + raise ExtractorError( + 'Token error: %s' % token_attrib['comment'], expected=True) + + formats = self._extract_akamai_formats( + '%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']), + video_id) + self._sort_formats(formats) + return formats + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -68,29 +94,16 @@ class Laola1TvEmbedIE(InfoExtractor): else: data_abo = urlencode_postdata( dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))) - token_url = self._download_json( - 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', - video_id, query={ + stream_access_url = update_url_query( + 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', { 'videoId': _v('id'), 'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'), 'label': _v('label'), 'area': _v('area'), - }, data=data_abo)['data']['stream-access'][0] + }) + token_url = self._extract_token_url(stream_access_url, video_id, data_abo) - token_doc = self._download_xml( - token_url, video_id, 'Downloading token', - headers=self.geo_verification_headers()) - - token_attrib = xpath_element(token_doc, './/token').attrib - - if token_attrib['status'] != '0': - raise ExtractorError( - 'Token error: %s' % token_attrib['comment'], expected=True) - - formats = self._extract_akamai_formats( - '%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']), - video_id) - self._sort_formats(formats) + formats = self._extract_formats(token_url, video_id) categories_str = _v('meta_sports') categories = categories_str.split(',') if categories_str else [] @@ -107,7 +120,7 @@ class Laola1TvEmbedIE(InfoExtractor): } -class Laola1TvIE(InfoExtractor): +class Laola1TvIE(Laola1TvEmbedIE): IE_NAME = 'laola1tv' _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P[^/?#&]+)' _TESTS = [{ @@ -164,13 +177,42 @@ class Laola1TvIE(InfoExtractor): if 'Dieser Livestream ist bereits beendet.' in webpage: raise ExtractorError('This live stream has already finished.', expected=True) - iframe_url = urljoin(url, self._search_regex( - r']*?id="videoplayer"[^>]*?src="([^"]+)"', - webpage, 'iframe url')) + conf = self._parse_json(self._search_regex( + r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'), + display_id, js_to_json) + + video_id = conf['videoid'] + + config = self._download_json(conf['configUrl'], video_id, query={ + 'videoid': video_id, + 'partnerid': conf['partnerid'], + 'language': conf.get('language', ''), + 'portal': conf.get('portalid', ''), + }) + error = config.get('error') + if error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + + video_data = config['video'] + title = video_data['title'] + is_live = video_data.get('isLivestream') and video_data.get('isLive') + meta = video_data.get('metaInformation') + sports = meta.get('sports') + categories = sports.split(',') if sports else [] + + token_url = self._extract_token_url( + video_data['streamAccess'], video_id, + video_data['abo']['required']) + + formats = self._extract_formats(token_url, video_id) return { - '_type': 'url', + 'id': video_id, 'display_id': display_id, - 'url': iframe_url, - 'ie_key': 'Laola1TvEmbed', + 'title': self._live_title(title) if is_live else title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('image'), + 'categories': categories, + 'formats': formats, + 'is_live': is_live, } From 7f09e523e87ab6fcf5713fbcd6767d00d98039e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 4 May 2017 22:41:47 +0700 Subject: [PATCH 04/47] [laola1tv:embed] Fix tests --- youtube_dl/extractor/laola1tv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index e8853fb77..d9f53c356 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -19,7 +19,7 @@ from ..utils import ( class Laola1TvEmbedIE(InfoExtractor): IE_NAME = 'laola1tv:embed' _VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P\d+)' - _TEST = { + _TESTS = [{ # flashvars.premium = "false"; 'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024', 'info_dict': { @@ -29,7 +29,7 @@ class Laola1TvEmbedIE(InfoExtractor): 'uploader': 'ITTF - International Table Tennis Federation', 'upload_date': '20161211', }, - } + }] def _extract_token_url(self, stream_access_url, video_id, data): return self._download_json( From 4947f13cd0e541bd2223187df19389c8b721c665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 4 May 2017 22:42:49 +0700 Subject: [PATCH 05/47] [pbs] Improve multipart video support (closes #12981) --- youtube_dl/extractor/pbs.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 0727e381b..16cc667d0 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -10,6 +10,7 @@ from ..utils import ( int_or_none, float_or_none, js_to_json, + orderedSet, strip_jsonp, strip_or_none, unified_strdate, @@ -264,6 +265,13 @@ class PBSIE(InfoExtractor): }, 'playlist_count': 2, }, + { + 'url': 'http://www.pbs.org/wgbh/americanexperience/films/great-war/', + 'info_dict': { + 'id': 'great-war', + }, + 'playlist_count': 3, + }, { 'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/', 'info_dict': { @@ -382,10 +390,10 @@ class PBSIE(InfoExtractor): # tabbed frontline videos MULTI_PART_REGEXES = ( r']+class="videotab[^"]*"[^>]+vid="(\d+)"', - r']+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)', + r']+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)', ) for p in MULTI_PART_REGEXES: - tabbed_videos = re.findall(p, webpage) + tabbed_videos = orderedSet(re.findall(p, webpage)) if tabbed_videos: return tabbed_videos, presumptive_id, upload_date, description From 50ad078b7bca28ec4a85caf7689e3a260def2189 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 5 May 2017 15:13:40 +0800 Subject: [PATCH 06/47] [gdcvault] Fix extraction for videos with gdc-player.html Closes #12733 --- ChangeLog | 1 + youtube_dl/extractor/gdcvault.py | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 7324a9dd9..930fdded4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [gdcvault] Fix extraction for some videos (#12733) + [cda] Support birthday verification (#12789) * [leeco] Fix extraction (#12974) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 3136427db..f71d9092e 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -75,6 +75,19 @@ class GDCVaultIE(InfoExtractor): 'format': 'jp', # The japanese audio } }, + { + # gdc-player.html + 'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo', + 'info_dict': { + 'id': '1435', + 'display_id': 'An-American-engine-in-Tokyo', + 'ext': 'flv', + 'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT', + }, + 'params': { + 'skip_download': True, # Requires rtmpdump + }, + }, ] def _login(self, webpage_url, display_id): @@ -128,7 +141,7 @@ class GDCVaultIE(InfoExtractor): 'title': title, } - PLAYER_REGEX = r'' xml_root = self._html_search_regex( PLAYER_REGEX, start_page, 'xml root', default=None) From cc7bda4fffa3913178e5ad345a8c9c6d1d8a2626 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 5 May 2017 20:01:02 +0800 Subject: [PATCH 07/47] [vice] Fix extraction for non en_us videos (closes #12967) --- ChangeLog | 1 + youtube_dl/extractor/vice.py | 34 ++++++++++++++++++++++++++------ youtube_dl/extractor/viceland.py | 11 ++++++++--- 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index 930fdded4..23f83cc56 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [vice] Fix extraction for non en_us videos (#12967) * [gdcvault] Fix extraction for some videos (#12733) + [cda] Support birthday verification (#12789) * [leeco] Fix extraction (#12974) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index f0a7fd739..275fc1395 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -20,7 +20,7 @@ from ..utils import ( class ViceBaseIE(AdobePassIE): - def _extract_preplay_video(self, url, webpage): + def _extract_preplay_video(self, url, locale, webpage): watch_hub_data = extract_attributes(self._search_regex( r'(?s)()', webpage, 'watch hub')) video_id = watch_hub_data['vms-id'] @@ -45,7 +45,7 @@ class ViceBaseIE(AdobePassIE): try: host = 'www.viceland' if is_locked else self._PREPLAY_HOST - preplay = self._download_json('https://%s.com/en_us/preplay/%s' % (host, video_id), video_id, query=query) + preplay = self._download_json('https://%s.com/%s/preplay/%s' % (host, locale, video_id), video_id, query=query) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: error = json.loads(e.cause.read().decode()) @@ -88,7 +88,7 @@ class ViceBaseIE(AdobePassIE): class ViceIE(ViceBaseIE): - _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?P[^/]+/)(?:[^/]+/)?videos?/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.vice.com/video/cowboy-capitalists-part-1', @@ -115,20 +115,39 @@ class ViceIE(ViceBaseIE): 'add_ie': ['Youtube'], }, { 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56', - 'md5': '', 'info_dict': { 'id': '5816510690b70e6c5fd39a56', 'ext': 'mp4', 'uploader': 'Waypoint', 'title': 'The Signal From Tölva', + 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5', 'uploader_id': '57f7d621e05ca860fa9ccaf9', - 'timestamp': 1477941983938, + 'timestamp': 1477941983, + 'upload_date': '20161031', }, 'params': { # m3u8 download 'skip_download': True, }, 'add_ie': ['UplynkPreplay'], + }, { + 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f', + 'info_dict': { + 'id': '581b12b60a0e1f4c0fb6ea2f', + 'ext': 'mp4', + 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1', + 'description': '

Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.

', + 'uploader': 'VICE', + 'uploader_id': '57a204088cb727dec794c67b', + 'timestamp': 1485368119, + 'upload_date': '20170125', + 'age_limit': 14, + }, + 'params': { + # AES-encrypted m3u8 + 'skip_download': True, + }, + 'add_ie': ['UplynkPreplay'], }, { 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', 'only_matching': True, @@ -142,6 +161,9 @@ class ViceIE(ViceBaseIE): _PREPLAY_HOST = 'video.vice' def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + locale = mobj.group('locale') video_id = self._match_id(url) webpage, urlh = self._download_webpage_handle(url, video_id) embed_code = self._search_regex( @@ -153,7 +175,7 @@ class ViceIE(ViceBaseIE): r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None) if youtube_id: return self.url_result(youtube_id, 'Youtube') - return self._extract_preplay_video(urlh.geturl(), webpage) + return self._extract_preplay_video(urlh.geturl(), locale, webpage) class ViceShowIE(InfoExtractor): diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index 87f9216b5..bd60235c8 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -1,11 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .vice import ViceBaseIE class VicelandIE(ViceBaseIE): - _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P[a-f0-9]+)' + _VALID_URL = r'https?://(?:www\.)?viceland\.com/(?P[^/]+)/video/[^/]+/(?P[a-f0-9]+)' _TEST = { 'url': 'https://www.viceland.com/en_us/video/trapped/588a70d0dba8a16007de7316', 'info_dict': { @@ -24,10 +26,13 @@ class VicelandIE(ViceBaseIE): 'skip_download': True, }, 'add_ie': ['UplynkPreplay'], + 'skip': '404', } _PREPLAY_HOST = 'www.viceland' def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + locale = mobj.group('locale') webpage = self._download_webpage(url, video_id) - return self._extract_preplay_video(url, webpage) + return self._extract_preplay_video(url, locale, webpage) From 4ac6dc3732492839b8c5e9a144ac8ef8fd1539aa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 5 May 2017 20:26:51 +0800 Subject: [PATCH 08/47] [vice] Support Vice articles (closes #12968) --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vice.py | 79 ++++++++++++++++++++++++------ 3 files changed, 67 insertions(+), 14 deletions(-) diff --git a/ChangeLog b/ChangeLog index 23f83cc56..0908e2e93 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [vice] Support vice articles (#12968) * [vice] Fix extraction for non en_us videos (#12967) * [gdcvault] Fix extraction for some videos (#12733) + [cda] Support birthday verification (#12789) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c0020dd7d..1d7495910 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1125,6 +1125,7 @@ from .vgtv import ( from .vh1 import VH1IE from .vice import ( ViceIE, + ViceArticleIE, ViceShowIE, ) from .viceland import VicelandIE diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 275fc1395..b2e95734b 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -88,7 +88,7 @@ class ViceBaseIE(AdobePassIE): class ViceIE(ViceBaseIE): - _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?P[^/]+/)(?:[^/]+/)?videos?/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?P[^/]+)/(?:[^/]+/)?videos?/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.vice.com/video/cowboy-capitalists-part-1', @@ -100,19 +100,6 @@ class ViceIE(ViceBaseIE): 'duration': 725.983, }, 'add_ie': ['Ooyala'], - }, { - 'url': 'http://www.vice.com/video/how-to-hack-a-car', - 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', - 'info_dict': { - 'id': '3jstaBeXgAs', - 'ext': 'mp4', - 'title': 'How to Hack a Car: Phreaked Out (Episode 2)', - 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30', - 'uploader_id': 'MotherboardTV', - 'uploader': 'Motherboard', - 'upload_date': '20140529', - }, - 'add_ie': ['Youtube'], }, { 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56', 'info_dict': { @@ -208,3 +195,67 @@ class ViceShowIE(InfoExtractor): description = self._html_search_meta('description', webpage, 'description') return self.playlist_result(entries, show_id, title, description) + + +class ViceArticleIE(InfoExtractor): + _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P[^?#]+)' + + _TESTS = [{ + 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah', + 'info_dict': { + 'id': '58dc0a3dee202d2a0ccfcbd8', + 'ext': 'mp4', + 'title': 'Mormon War on Porn ', + 'description': 'md5:ad396a2481e7f8afb5ed486878421090', + 'uploader': 'VICE', + 'uploader_id': '57a204088cb727dec794c693', + 'timestamp': 1489160690, + 'upload_date': '20170310', + }, + 'params': { + # AES-encrypted m3u8 + 'skip_download': True, + }, + }, { + 'url': 'http://www.vice.com/video/how-to-hack-a-car', + 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', + 'info_dict': { + 'id': '3jstaBeXgAs', + 'ext': 'mp4', + 'title': 'How to Hack a Car: Phreaked Out (Episode 2)', + 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30', + 'uploader_id': 'MotherboardTV', + 'uploader': 'Motherboard', + 'upload_date': '20140529', + }, + 'add_ie': ['Youtube'], + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + prefetch_data = self._parse_json(self._search_regex( + r'window\.__PREFETCH_DATA\s*=\s*({.*});', + webpage, 'prefetch data'), display_id) + body = prefetch_data['body'] + youtube_url = self._html_search_regex( + r']+src="(.*youtube\.com/.*)"', body, 'YouTube URL', default=None) + if youtube_url: + return { + '_type': 'url_transparent', + 'url': youtube_url, + 'display_id': display_id, + 'ie_key': 'Youtube', + } + + video_url = self._html_search_regex( + r'data-video-url="([^"]+)"', prefetch_data['embed_code'], 'video URL') + + return { + '_type': 'url_transparent', + 'url': video_url, + 'display_id': display_id, + 'ie_key': ViceIE.ie_key(), + } From b2ad479d17c435b02fe4f8a02f21bb713c2833f1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 5 May 2017 20:51:59 +0800 Subject: [PATCH 09/47] [utils] Fix multipart_encode for Python < 3.5 --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 25bd228ab..911cacd29 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2110,7 +2110,7 @@ def try_multipart_encode(data, boundary): v = v.encode('utf-8') # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578 # suggests sending UTF-8 directly. Firefox sends UTF-8, too - content = b'Content-Disposition: form-data; name="%s"\r\n\r\n' % k + v + b'\r\n' + content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n' if boundary.encode('ascii') in content: raise ValueError('Boundary overlaps with data') out += content From 7ad53cb7ff9afdcffb0b2d027f06f4120dbde9a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 5 May 2017 21:59:23 +0700 Subject: [PATCH 10/47] [laola1tv] PEP 8 --- youtube_dl/extractor/laola1tv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index d9f53c356..1f91ba017 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -10,7 +10,6 @@ from ..utils import ( urlencode_postdata, xpath_element, xpath_text, - urljoin, update_url_query, js_to_json, ) From 1d9e0a4f40deaeb2f722cf964e6bf13b3835e617 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 May 2017 16:12:40 +0100 Subject: [PATCH 11/47] [vice] update tests and add support for ooyala embeds in article pages --- youtube_dl/extractor/vice.py | 83 +++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index b2e95734b..54e207b39 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -32,7 +32,8 @@ class ViceBaseIE(AdobePassIE): resource = self._get_mvpd_resource( 'VICELAND', title, video_id, watch_hub_data.get('video-rating')) - query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource) + query['tvetoken'] = self._extract_mvpd_auth( + url, video_id, 'VICELAND', resource) # signature generation algorithm is reverse engineered from signatureGenerator in # webpack:///../shared/~/vice-player/dist/js/vice-player.js in @@ -45,11 +46,14 @@ class ViceBaseIE(AdobePassIE): try: host = 'www.viceland' if is_locked else self._PREPLAY_HOST - preplay = self._download_json('https://%s.com/%s/preplay/%s' % (host, locale, video_id), video_id, query=query) + preplay = self._download_json( + 'https://%s.com/%s/preplay/%s' % (host, locale, video_id), + video_id, query=query) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: error = json.loads(e.cause.read().decode()) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True) + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, error['details']), expected=True) raise video_data = preplay['video'] @@ -88,16 +92,17 @@ class ViceBaseIE(AdobePassIE): class ViceIE(ViceBaseIE): - _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?P[^/]+)/(?:[^/]+/)?videos?/(?P[^/?#&]+)' + IE_NAME = 'vice' + _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P[^/]+)/)?videos?/(?P[^/?#&]+)' _TESTS = [{ - 'url': 'http://www.vice.com/video/cowboy-capitalists-part-1', - 'md5': 'e9d77741f9e42ba583e683cd170660f7', + 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', + 'md5': '7d3ae2f9ba5f196cdd9f9efd43657ac2', 'info_dict': { - 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', + 'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj', 'ext': 'flv', - 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', - 'duration': 725.983, + 'title': 'Monkey Labs of Holland', + 'description': 'md5:92b3c7dcbfe477f772dd4afa496c9149', }, 'add_ie': ['Ooyala'], }, { @@ -136,22 +141,13 @@ class ViceIE(ViceBaseIE): }, 'add_ie': ['UplynkPreplay'], }, { - 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', - 'only_matching': True, - }, { - 'url': 'http://www.vice.com/ru/video/big-night-out-ibiza-clive-martin-229', - 'only_matching': True, - }, { - 'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show', + 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4', 'only_matching': True, }] _PREPLAY_HOST = 'video.vice' def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - locale = mobj.group('locale') - video_id = self._match_id(url) + locale, video_id = re.match(self._VALID_URL, url).groups() webpage, urlh = self._download_webpage_handle(url, video_id) embed_code = self._search_regex( r'embedCode=([^&\'"]+)', webpage, @@ -166,6 +162,7 @@ class ViceIE(ViceBaseIE): class ViceShowIE(InfoExtractor): + IE_NAME = 'vice:show' _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P[^/?#&]+)' _TEST = { @@ -192,12 +189,14 @@ class ViceShowIE(InfoExtractor): r'(.+?)', webpage, 'title', default=None) if title: title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip() - description = self._html_search_meta('description', webpage, 'description') + description = self._html_search_meta( + 'description', webpage, 'description') return self.playlist_result(entries, show_id, title, description) class ViceArticleIE(InfoExtractor): + IE_NAME = 'vice:article' _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P[^?#]+)' _TESTS = [{ @@ -216,8 +215,9 @@ class ViceArticleIE(InfoExtractor): # AES-encrypted m3u8 'skip_download': True, }, + 'add_ie': ['UplynkPreplay'], }, { - 'url': 'http://www.vice.com/video/how-to-hack-a-car', + 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car', 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', 'info_dict': { 'id': '3jstaBeXgAs', @@ -229,6 +229,12 @@ class ViceArticleIE(InfoExtractor): 'upload_date': '20140529', }, 'add_ie': ['Youtube'], + }, { + 'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1', + 'only_matching': True, + }, { + 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229', + 'only_matching': True, }] def _real_extract(self, url): @@ -240,22 +246,29 @@ class ViceArticleIE(InfoExtractor): r'window\.__PREFETCH_DATA\s*=\s*({.*});', webpage, 'prefetch data'), display_id) body = prefetch_data['body'] - youtube_url = self._html_search_regex( - r']+src="(.*youtube\.com/.*)"', body, 'YouTube URL', default=None) - if youtube_url: + + def _url_res(video_url, ie_key): return { '_type': 'url_transparent', - 'url': youtube_url, + 'url': video_url, 'display_id': display_id, - 'ie_key': 'Youtube', + 'ie_key': ie_key, } - video_url = self._html_search_regex( - r'data-video-url="([^"]+)"', prefetch_data['embed_code'], 'video URL') + embed_code = self._search_regex( + r'embedCode=([^&\'"]+)', body, + 'ooyala embed code', default=None) + if embed_code: + return _url_res('ooyala:%s' % embed_code, 'Ooyala') - return { - '_type': 'url_transparent', - 'url': video_url, - 'display_id': display_id, - 'ie_key': ViceIE.ie_key(), - } + youtube_url = self._html_search_regex( + r']+src="(.*youtube\.com/.*)"', + body, 'YouTube URL', default=None) + if youtube_url: + return _url_res(youtube_url, 'Youtube') + + video_url = self._html_search_regex( + r'data-video-url="([^"]+)"', + prefetch_data['embed_code'], 'video URL') + + return _url_res(video_url, ViceIE.ie_key()) From 329e3dd5adf52520c87ba31395d090455114783b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 5 May 2017 22:59:15 +0700 Subject: [PATCH 12/47] [nrk] Extract chapters --- youtube_dl/extractor/nrk.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 7fe79cb53..3b4f51f61 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -148,13 +148,34 @@ class NRKBaseIE(InfoExtractor): vcodec = 'none' if data.get('mediaType') == 'Audio' else None - # TODO: extract chapters when https://github.com/rg3/youtube-dl/pull/9409 is merged - for entry in entries: entry.update(common_info) for f in entry['formats']: f['vcodec'] = vcodec + points = data.get('shortIndexPoints') + if isinstance(points, list): + chapters = [] + for next_num, point in enumerate(points, start=1): + if not isinstance(point, dict): + continue + start_time = parse_duration(point.get('startPoint')) + if start_time is None: + continue + end_time = parse_duration( + data.get('duration') + if next_num == len(points) + else points[next_num].get('startPoint')) + if end_time is None: + continue + chapters.append({ + 'start_time': start_time, + 'end_time': end_time, + 'title': point.get('title'), + }) + if chapters and len(entries) == 1: + entries[0]['chapters'] = chapters + return self.playlist_result(entries, video_id, title, description) From 9cafc3fd8b54b9b91a145cddf9e4db0bd59e1b5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 May 2017 02:27:06 +0700 Subject: [PATCH 13/47] [youtube] Extract chapters --- test/test_youtube_chapters.py | 267 ++++++++++++++++++++++++++++++++ youtube_dl/extractor/youtube.py | 36 ++++- 2 files changed, 301 insertions(+), 2 deletions(-) create mode 100644 test/test_youtube_chapters.py diff --git a/test/test_youtube_chapters.py b/test/test_youtube_chapters.py new file mode 100644 index 000000000..a5d05355f --- /dev/null +++ b/test/test_youtube_chapters.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import expect_value +from youtube_dl.extractor import YoutubeIE + + +class TestYoutubeChapters(unittest.TestCase): + + _TEST_CASES = [ + ( + # https://www.youtube.com/watch?v=A22oy8dFjqc + # pattern: 00:00 - + '''This is the absolute ULTIMATE experience of Queen's set at LIVE AID, this is the best video mixed to the absolutely superior stereo radio broadcast. This vastly superior audio mix takes a huge dump on all of the official mixes. Best viewed in 1080p. ENJOY! ***MAKE SURE TO READ THE DESCRIPTION***<br /><a href="#" onclick="yt.www.watch.player.seekTo(00*60+36);return false;">00:36</a> - Bohemian Rhapsody<br /><a href="#" onclick="yt.www.watch.player.seekTo(02*60+42);return false;">02:42</a> - Radio Ga Ga<br /><a href="#" onclick="yt.www.watch.player.seekTo(06*60+53);return false;">06:53</a> - Ay Oh!<br /><a href="#" onclick="yt.www.watch.player.seekTo(07*60+34);return false;">07:34</a> - Hammer To Fall<br /><a href="#" onclick="yt.www.watch.player.seekTo(12*60+08);return false;">12:08</a> - Crazy Little Thing Called Love<br /><a href="#" onclick="yt.www.watch.player.seekTo(16*60+03);return false;">16:03</a> - We Will Rock You<br /><a href="#" onclick="yt.www.watch.player.seekTo(17*60+18);return false;">17:18</a> - We Are The Champions<br /><a href="#" onclick="yt.www.watch.player.seekTo(21*60+12);return false;">21:12</a> - Is This The World We Created...?<br /><br />Short song analysis:<br /><br />- "Bohemian Rhapsody": Although it's a short medley version, it's one of the best performances of the ballad section, with Freddie nailing the Bb4s with the correct studio phrasing (for the first time ever!).<br /><br />- "Radio Ga Ga": Although it's missing one chorus, this is one of - if not the best - the best versions ever, Freddie nails all the Bb4s and sounds very clean! Spike Edney's Roland Jupiter 8 also really shines through on this mix, compared to the DVD releases!<br /><br />- "Audience Improv": A great improv, Freddie sounds strong and confident. You gotta love when he sustains that A4 for 4 seconds!<br /><br />- "Hammer To Fall": Despite missing a verse and a chorus, it's a strong version (possibly the best ever). Freddie sings the song amazingly, and even ad-libs a C#5 and a C5! Also notice how heavy Brian's guitar sounds compared to the thin DVD mixes - it roars!<br /><br />- "Crazy Little Thing Called Love": A great version, the crowd loves the song, the jam is great as well! Only downside to this is the slight feedback issues.<br /><br />- "We Will Rock You": Although cut down to the 1st verse and chorus, Freddie sounds strong. He nails the A4, and the solo from Dr. May is brilliant!<br /><br />- "We Are the Champions": Perhaps the high-light of the performance - Freddie is very daring on this version, he sustains the pre-chorus Bb4s, nails the 1st C5, belts great A4s, but most importantly: He nails the chorus Bb4s, in all 3 choruses! This is the only time he has ever done so! It has to be said though, the last one sounds a bit rough, but that's a side effect of belting high notes for the past 18 minutes, with nodules AND laryngitis!<br /><br />- "Is This The World We Created... ?": Freddie and Brian perform a beautiful version of this, and it is one of the best versions ever. It's both sad and hilarious that a couple of BBC engineers are talking over the song, one of them being completely oblivious of the fact that he is interrupting the performance, on live television... Which was being televised to almost 2 billion homes.<br /><br /><br />All rights go to their respective owners!<br />-----Copyright Disclaimer Under Section 107 of the Copyright Act 1976, allowance is made for fair use for purposes such as criticism, comment, news reporting, teaching, scholarship, and research. Fair use is a use permitted by copyright statute that might otherwise be infringing. Non-profit, educational or personal use tips the balance in favor of fair use''', + 1477, + [{ + 'start_time': 36, + 'end_time': 162, + 'title': 'Bohemian Rhapsody', + }, { + 'start_time': 162, + 'end_time': 413, + 'title': 'Radio Ga Ga', + }, { + 'start_time': 413, + 'end_time': 454, + 'title': 'Ay Oh!', + }, { + 'start_time': 454, + 'end_time': 728, + 'title': 'Hammer To Fall', + }, { + 'start_time': 728, + 'end_time': 963, + 'title': 'Crazy Little Thing Called Love', + }, { + 'start_time': 963, + 'end_time': 1038, + 'title': 'We Will Rock You', + }, { + 'start_time': 1038, + 'end_time': 1272, + 'title': 'We Are The Champions', + }, { + 'start_time': 1272, + 'end_time': 1477, + 'title': 'Is This The World We Created...?', + }] + ), + ( + # https://www.youtube.com/watch?v=ekYlRhALiRQ + # pattern: <num>. <title> 0:00 + '1. Those Beaten Paths of Confusion <a href="#" onclick="yt.www.watch.player.seekTo(0*60+00);return false;">0:00</a><br />2. Beyond the Shadows of Emptiness & Nothingness <a href="#" onclick="yt.www.watch.player.seekTo(11*60+47);return false;">11:47</a><br />3. Poison Yourself...With Thought <a href="#" onclick="yt.www.watch.player.seekTo(26*60+30);return false;">26:30</a><br />4. The Agents of Transformation <a href="#" onclick="yt.www.watch.player.seekTo(35*60+57);return false;">35:57</a><br />5. Drowning in the Pain of Consciousness <a href="#" onclick="yt.www.watch.player.seekTo(44*60+32);return false;">44:32</a><br />6. Deny the Disease of Life <a href="#" onclick="yt.www.watch.player.seekTo(53*60+07);return false;">53:07</a><br /><br />More info/Buy: http://crepusculonegro.storenvy.com/products/257645-cn-03-arizmenda-within-the-vacuum-of-infinity<br /><br />No copyright is intended. The rights to this video are assumed by the owner and its affiliates.', + 4009, + [{ + 'start_time': 0, + 'end_time': 707, + 'title': '1. Those Beaten Paths of Confusion', + }, { + 'start_time': 707, + 'end_time': 1590, + 'title': '2. Beyond the Shadows of Emptiness & Nothingness', + }, { + 'start_time': 1590, + 'end_time': 2157, + 'title': '3. Poison Yourself...With Thought', + }, { + 'start_time': 2157, + 'end_time': 2672, + 'title': '4. The Agents of Transformation', + }, { + 'start_time': 2672, + 'end_time': 3187, + 'title': '5. Drowning in the Pain of Consciousness', + }, { + 'start_time': 3187, + 'end_time': 4009, + 'title': '6. Deny the Disease of Life', + }] + ), + ( + # https://www.youtube.com/watch?v=WjL4pSzog9w + # pattern: 00:00 <title> + '<a href="https://arizmenda.bandcamp.com/merch/despairs-depths-descended-cd" class="yt-uix-servicelink " data-target-new-window="True" data-servicelink="CDAQ6TgYACITCNf1raqT2dMCFdRjGAod_o0CBSj4HQ" data-url="https://arizmenda.bandcamp.com/merch/despairs-depths-descended-cd" rel="nofollow noopener" target="_blank">https://arizmenda.bandcamp.com/merch/...</a><br /><br /><a href="#" onclick="yt.www.watch.player.seekTo(00*60+00);return false;">00:00</a> Christening Unborn Deformities <br /><a href="#" onclick="yt.www.watch.player.seekTo(07*60+08);return false;">07:08</a> Taste of Purity<br /><a href="#" onclick="yt.www.watch.player.seekTo(16*60+16);return false;">16:16</a> Sculpting Sins of a Universal Tongue<br /><a href="#" onclick="yt.www.watch.player.seekTo(24*60+45);return false;">24:45</a> Birth<br /><a href="#" onclick="yt.www.watch.player.seekTo(31*60+24);return false;">31:24</a> Neves<br /><a href="#" onclick="yt.www.watch.player.seekTo(37*60+55);return false;">37:55</a> Libations in Limbo', + 2705, + [{ + 'start_time': 0, + 'end_time': 428, + 'title': 'Christening Unborn Deformities', + }, { + 'start_time': 428, + 'end_time': 976, + 'title': 'Taste of Purity', + }, { + 'start_time': 976, + 'end_time': 1485, + 'title': 'Sculpting Sins of a Universal Tongue', + }, { + 'start_time': 1485, + 'end_time': 1884, + 'title': 'Birth', + }, { + 'start_time': 1884, + 'end_time': 2275, + 'title': 'Neves', + }, { + 'start_time': 2275, + 'end_time': 2705, + 'title': 'Libations in Limbo', + }] + ), + ( + # https://www.youtube.com/watch?v=o3r1sn-t3is + # pattern: <title> 00:00 <note> + 'Download this show in MP3: <a href="http://sh.st/njZKK" class="yt-uix-servicelink " data-url="http://sh.st/njZKK" data-target-new-window="True" data-servicelink="CDAQ6TgYACITCK3j8_6o2dMCFVDCGAoduVAKKij4HQ" rel="nofollow noopener" target="_blank">http://sh.st/njZKK</a><br /><br />Setlist:<br />I-E-A-I-A-I-O <a href="#" onclick="yt.www.watch.player.seekTo(00*60+45);return false;">00:45</a><br />Suite-Pee <a href="#" onclick="yt.www.watch.player.seekTo(4*60+26);return false;">4:26</a> (Incomplete)<br />Attack <a href="#" onclick="yt.www.watch.player.seekTo(5*60+31);return false;">5:31</a> (First live performance since 2011)<br />Prison Song <a href="#" onclick="yt.www.watch.player.seekTo(8*60+42);return false;">8:42</a><br />Know <a href="#" onclick="yt.www.watch.player.seekTo(12*60+32);return false;">12:32</a> (First live performance since 2011)<br />Aerials <a href="#" onclick="yt.www.watch.player.seekTo(15*60+32);return false;">15:32</a><br />Soldier Side - Intro <a href="#" onclick="yt.www.watch.player.seekTo(19*60+13);return false;">19:13</a><br />B.Y.O.B. <a href="#" onclick="yt.www.watch.player.seekTo(20*60+09);return false;">20:09</a><br />Soil <a href="#" onclick="yt.www.watch.player.seekTo(24*60+32);return false;">24:32</a><br />Darts <a href="#" onclick="yt.www.watch.player.seekTo(27*60+48);return false;">27:48</a><br />Radio/Video <a href="#" onclick="yt.www.watch.player.seekTo(30*60+38);return false;">30:38</a><br />Hypnotize <a href="#" onclick="yt.www.watch.player.seekTo(35*60+05);return false;">35:05</a><br />Temper <a href="#" onclick="yt.www.watch.player.seekTo(38*60+08);return false;">38:08</a> (First live performance since 1999)<br />CUBErt <a href="#" onclick="yt.www.watch.player.seekTo(41*60+00);return false;">41:00</a><br />Needles <a href="#" onclick="yt.www.watch.player.seekTo(42*60+57);return false;">42:57</a><br />Deer Dance <a href="#" onclick="yt.www.watch.player.seekTo(46*60+27);return false;">46:27</a><br />Bounce <a href="#" onclick="yt.www.watch.player.seekTo(49*60+38);return false;">49:38</a><br />Suggestions <a href="#" onclick="yt.www.watch.player.seekTo(51*60+25);return false;">51:25</a><br />Psycho <a href="#" onclick="yt.www.watch.player.seekTo(53*60+52);return false;">53:52</a><br />Chop Suey! <a href="#" onclick="yt.www.watch.player.seekTo(58*60+13);return false;">58:13</a><br />Lonely Day <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+01*60+15);return false;">1:01:15</a><br />Question! <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+04*60+14);return false;">1:04:14</a><br />Lost in Hollywood <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+08*60+10);return false;">1:08:10</a><br />Vicinity of Obscenity <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+13*60+40);return false;">1:13:40</a>(First live performance since 2012)<br />Forest <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+16*60+17);return false;">1:16:17</a><br />Cigaro <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+20*60+02);return false;">1:20:02</a><br />Toxicity <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+23*60+57);return false;">1:23:57</a>(with Chino Moreno)<br />Sugar <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+27*60+53);return false;">1:27:53</a>', + 5640, + [{ + 'start_time': 45, + 'end_time': 266, + 'title': 'I-E-A-I-A-I-O', + }, { + 'start_time': 266, + 'end_time': 331, + 'title': 'Suite-Pee (Incomplete)', + }, { + 'start_time': 331, + 'end_time': 522, + 'title': 'Attack (First live performance since 2011)', + }, { + 'start_time': 522, + 'end_time': 752, + 'title': 'Prison Song', + }, { + 'start_time': 752, + 'end_time': 932, + 'title': 'Know (First live performance since 2011)', + }, { + 'start_time': 932, + 'end_time': 1153, + 'title': 'Aerials', + }, { + 'start_time': 1153, + 'end_time': 1209, + 'title': 'Soldier Side - Intro', + }, { + 'start_time': 1209, + 'end_time': 1472, + 'title': 'B.Y.O.B.', + }, { + 'start_time': 1472, + 'end_time': 1668, + 'title': 'Soil', + }, { + 'start_time': 1668, + 'end_time': 1838, + 'title': 'Darts', + }, { + 'start_time': 1838, + 'end_time': 2105, + 'title': 'Radio/Video', + }, { + 'start_time': 2105, + 'end_time': 2288, + 'title': 'Hypnotize', + }, { + 'start_time': 2288, + 'end_time': 2460, + 'title': 'Temper (First live performance since 1999)', + }, { + 'start_time': 2460, + 'end_time': 2577, + 'title': 'CUBErt', + }, { + 'start_time': 2577, + 'end_time': 2787, + 'title': 'Needles', + }, { + 'start_time': 2787, + 'end_time': 2978, + 'title': 'Deer Dance', + }, { + 'start_time': 2978, + 'end_time': 3085, + 'title': 'Bounce', + }, { + 'start_time': 3085, + 'end_time': 3232, + 'title': 'Suggestions', + }, { + 'start_time': 3232, + 'end_time': 3493, + 'title': 'Psycho', + }, { + 'start_time': 3493, + 'end_time': 3675, + 'title': 'Chop Suey!', + }, { + 'start_time': 3675, + 'end_time': 3854, + 'title': 'Lonely Day', + }, { + 'start_time': 3854, + 'end_time': 4090, + 'title': 'Question!', + }, { + 'start_time': 4090, + 'end_time': 4420, + 'title': 'Lost in Hollywood', + }, { + 'start_time': 4420, + 'end_time': 4577, + 'title': 'Vicinity of Obscenity (First live performance since 2012)', + }, { + 'start_time': 4577, + 'end_time': 4802, + 'title': 'Forest', + }, { + 'start_time': 4802, + 'end_time': 5037, + 'title': 'Cigaro', + }, { + 'start_time': 5037, + 'end_time': 5273, + 'title': 'Toxicity (with Chino Moreno)', + }, { + 'start_time': 5273, + 'end_time': 5640, + 'title': 'Sugar', + }] + ), + ( + # https://www.youtube.com/watch?v=PkYLQbsqCE8 + # pattern: <num> - <title> [<latinized title>] 0:00:00 + '''Затемно (Zatemno) is an Obscure Black Metal Band from Russia.<br /><br />"Во прах (Vo prakh)'' Into The Ashes", Debut mini-album released may 6, 2016, by Death Knell Productions<br />Released on 6 panel digipak CD, limited to 100 copies only<br />And digital format on Bandcamp<br /><br />Tracklist<br /><br />1 - Во прах [Vo prakh] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;">0:00:00</a><br />2 - Искупление [Iskupleniye] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+08*60+10);return false;">0:08:10</a><br />3 - Из серпов луны...[Iz serpov luny] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+14*60+30);return false;">0:14:30</a><br /><br />Links:<br /><a href="https://deathknellprod.bandcamp.com/album/--2" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://deathknellprod.bandcamp.com/album/--2" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://deathknellprod.bandcamp.com/a...</a><br /><a href="https://www.facebook.com/DeathKnellProd/" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://www.facebook.com/DeathKnellProd/" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://www.facebook.com/DeathKnellProd/</a><br /><br /><br />I don't have any right about this artifact, my only intention is to spread the music of the band, all rights are reserved to the Затемно (Zatemno) and his producers, Death Knell Productions.<br /><br />------------------------------------------------------------------<br /><br />Subscribe for more videos like this.<br />My link: <a href="https://web.facebook.com/AttackOfTheDragons" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://web.facebook.com/AttackOfTheDragons" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://web.facebook.com/AttackOfTheD...</a>''', + 1138, + [{ + 'start_time': 0, + 'end_time': 490, + 'title': '1 - Во прах [Vo prakh]', + }, { + 'start_time': 490, + 'end_time': 870, + 'title': '2 - Искупление [Iskupleniye]', + }, { + 'start_time': 870, + 'end_time': 1138, + 'title': '3 - Из серпов луны...[Iz serpov luny]', + }] + ), + ] + + def test_youtube_chapters(self): + for description, duration, expected_chapters in self._TEST_CASES: + ie = YoutubeIE() + expect_value( + self, ie._extract_chapters(description, duration), + expected_chapters, None) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 480f403da..ec12e313c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1257,6 +1257,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor): url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') + @staticmethod + def _extract_chapters(description, duration): + if not description: + return None + chapter_lines = re.findall( + r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)', + description) + if not chapter_lines: + return None + chapters = [] + for next_num, (chapter_line, time_point) in enumerate( + chapter_lines, start=1): + start_time = parse_duration(time_point) + if start_time is None: + continue + end_time = (duration if next_num == len(chapter_lines) + else parse_duration(chapter_lines[next_num][1])) + if end_time is None: + continue + chapter_title = re.sub( + r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-') + chapter_title = re.sub(r'\s+', ' ', chapter_title) + chapters.append({ + 'start_time': start_time, + 'end_time': end_time, + 'title': chapter_title, + }) + return chapters + def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -1399,9 +1428,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_title = '_' # description - video_description = get_element_by_id("eow-description", video_webpage) + description_original = video_description = get_element_by_id("eow-description", video_webpage) if video_description: - video_description = re.sub(r'''(?x) + description_original = video_description = re.sub(r'''(?x) <a\s+ (?:[a-zA-Z-]+="[^"]*"\s+)*? (?:title|href)="([^"]+)"\s+ @@ -1558,6 +1587,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if self._downloader.params.get('writeannotations', False): video_annotations = self._extract_annotations(video_id) + chapters = self._extract_chapters(description_original, video_duration) + if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() formats = [{ @@ -1790,6 +1821,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'duration': video_duration, 'age_limit': 18 if age_gate else 0, 'annotations': video_annotations, + 'chapters': chapters, 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id, 'view_count': view_count, 'like_count': like_count, From a99cc4ca167e3244f068737110168716352dbbc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 May 2017 02:46:37 +0700 Subject: [PATCH 14/47] [pornhub] Extend _VALID_URL (closes #12996) --- youtube_dl/extractor/pornhub.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index b25f1f193..1dcc8df00 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - (?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)| + (?:[a-z]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ ) (?P<id>[\da-z]+) @@ -97,6 +97,9 @@ class PornHubIE(InfoExtractor): }, { 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex', 'only_matching': True, + }, { + 'url': 'http://www.pornhub.com/video/show?viewkey=648719015', + 'only_matching': True, }] @staticmethod From 8b4774dcac4cb82b47213126f3430ac9dc25ec28 Mon Sep 17 00:00:00 2001 From: Luca Steeb <contact@luca-steeb.com> Date: Fri, 5 May 2017 22:35:42 +0200 Subject: [PATCH 15/47] [bandcamp] Fix thumbnail extraction --- youtube_dl/extractor/bandcamp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index df2972f26..489d0ba53 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -47,6 +47,7 @@ class BandcampIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) title = mobj.group('title') webpage = self._download_webpage(url, title) + thumbnail = self._html_search_meta('og:image', webpage, default=None) m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) if not m_download: m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage) @@ -75,6 +76,7 @@ class BandcampIE(InfoExtractor): return { 'id': track_id, 'title': data['title'], + 'thumbnail': thumbnail, 'formats': formats, 'duration': float_or_none(data.get('duration')), } @@ -143,7 +145,7 @@ class BandcampIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'thumbnail': info.get('thumb_url'), + 'thumbnail': info.get('thumb_url') or thumbnail, 'uploader': info.get('artist'), 'artist': artist, 'track': track, From 1f9fefe7f578169d41d75324decbd46ec8594311 Mon Sep 17 00:00:00 2001 From: Tithen-Firion <Tithen-Firion@users.noreply.github.com> Date: Fri, 5 May 2017 22:39:14 +0200 Subject: [PATCH 16/47] [crackle] Update test --- youtube_dl/extractor/crackle.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index f919ed208..13f425b2b 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -21,9 +21,10 @@ class CrackleIE(InfoExtractor): 'season_number': 8, 'episode_number': 4, 'subtitles': { - 'en-US': [{ - 'ext': 'ttml', - }] + 'en-US': [ + {'ext': 'vtt'}, + {'ext': 'tt'}, + ] }, }, 'params': { From a57a8e991860fb96e6111769ab0a22455c8854fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 May 2017 05:30:56 +0700 Subject: [PATCH 17/47] [test_youtube_chapters] Add coding cookie --- test/test_youtube_chapters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_youtube_chapters.py b/test/test_youtube_chapters.py index a5d05355f..5435ee4a8 100644 --- a/test/test_youtube_chapters.py +++ b/test/test_youtube_chapters.py @@ -1,3 +1,4 @@ +# coding: utf-8 #!/usr/bin/env python from __future__ import unicode_literals From fd178b8748979ab19f1cb3d7d689fe19a447ca2f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 6 May 2017 07:19:07 +0100 Subject: [PATCH 18/47] [theplatform] extract chapters --- youtube_dl/extractor/theplatform.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 9a424b1c6..de236bbba 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -80,14 +80,33 @@ class ThePlatformBaseIE(OnceIE): 'url': src, }) + duration = info.get('duration') + tp_chapters = info.get('chapters', []) + chapters = [] + if tp_chapters: + def _add_chapter(start_time, end_time): + start_time = float_or_none(start_time, 1000) + end_time = float_or_none(end_time, 1000) + if start_time is None or end_time is None: + return + chapters.append({ + 'start_time': start_time, + 'end_time': end_time, + }) + + for chapter in tp_chapters[:-1]: + _add_chapter(chapter.get('startTime'), chapter.get('endTime')) + _add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration) + return { 'title': info['title'], 'subtitles': subtitles, 'description': info['description'], 'thumbnail': info['defaultThumbnailUrl'], - 'duration': int_or_none(info.get('duration'), 1000), + 'duration': float_or_none(duration, 1000), 'timestamp': int_or_none(info.get('pubDate'), 1000) or None, 'uploader': info.get('billingCode'), + 'chapters': chapters, } def _extract_theplatform_metadata(self, path, video_id): From 74c09c852a183813174803306176a86d449da889 Mon Sep 17 00:00:00 2001 From: midas02 <midas02@users.noreply.github.com> Date: Sun, 30 Apr 2017 19:36:44 +0200 Subject: [PATCH 19/47] [rmcdecouverte] Fix extraction --- youtube_dl/extractor/rmcdecouverte.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dl/extractor/rmcdecouverte.py index 2340dae53..f6ef6d747 100644 --- a/youtube_dl/extractor/rmcdecouverte.py +++ b/youtube_dl/extractor/rmcdecouverte.py @@ -13,15 +13,15 @@ class RMCDecouverteIE(InfoExtractor): _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/mediaplayer-replay.*?\bid=(?P<id>\d+)' _TEST = { - 'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=1430&title=LES%20HEROS%20DU%2088e%20ETAGE', + 'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=16548', 'info_dict': { - 'id': '5111223049001', + 'id': '5411254766001', 'ext': 'mp4', - 'title': ': LES HEROS DU 88e ETAGE', - 'description': 'Découvrez comment la bravoure de deux hommes dans la Tour Nord du World Trade Center a sauvé la vie d\'innombrables personnes le 11 septembre 2001.', + 'title': '39/45:LE RESEAU DES FAUX BILLETS', + 'description': 'ic Brunet propose un nouvel \u00e9pisode des Grains de sable de l\'Histoire sur la plus grosse affaire de contrefa\u00e7on de la Seconde Guerre mondiale.', 'uploader_id': '1969646226001', - 'upload_date': '20160904', - 'timestamp': 1472951103, + 'upload_date': '20170426', + 'timestamp': 1493166610, }, 'params': { # rtmp download @@ -35,5 +35,8 @@ class RMCDecouverteIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) - brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0] + if brightcove_legacy_url: + brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0] + else: + brightcove_id = self._search_regex(r'data-video-id="(.*?)"', webpage, 'brightcove_id') return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) From 566fbbaefd459df46ebce512cd916248e069e4f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 May 2017 17:55:32 +0700 Subject: [PATCH 20/47] [rmcdecouverte] Improve (closes #12937) --- youtube_dl/extractor/rmcdecouverte.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dl/extractor/rmcdecouverte.py index f6ef6d747..e921ca3e6 100644 --- a/youtube_dl/extractor/rmcdecouverte.py +++ b/youtube_dl/extractor/rmcdecouverte.py @@ -13,21 +13,20 @@ class RMCDecouverteIE(InfoExtractor): _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/mediaplayer-replay.*?\bid=(?P<id>\d+)' _TEST = { - 'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=16548', + 'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=13502&title=AQUAMEN:LES%20ROIS%20DES%20AQUARIUMS%20:UN%20DELICIEUX%20PROJET', 'info_dict': { - 'id': '5411254766001', + 'id': '5419055995001', 'ext': 'mp4', - 'title': '39/45:LE RESEAU DES FAUX BILLETS', - 'description': 'ic Brunet propose un nouvel \u00e9pisode des Grains de sable de l\'Histoire sur la plus grosse affaire de contrefa\u00e7on de la Seconde Guerre mondiale.', + 'title': 'UN DELICIEUX PROJET', + 'description': 'md5:63610df7c8b1fc1698acd4d0d90ba8b5', 'uploader_id': '1969646226001', - 'upload_date': '20170426', - 'timestamp': 1493166610, + 'upload_date': '20170502', + 'timestamp': 1493745308, }, 'params': { - # rtmp download 'skip_download': True, }, - 'skip': 'Only works from France', + 'skip': 'only available for a week', } BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s' @@ -36,7 +35,11 @@ class RMCDecouverteIE(InfoExtractor): webpage = self._download_webpage(url, video_id) brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) if brightcove_legacy_url: - brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0] + brightcove_id = compat_parse_qs(compat_urlparse.urlparse( + brightcove_legacy_url).query)['@videoPlayer'][0] else: - brightcove_id = self._search_regex(r'data-video-id="(.*?)"', webpage, 'brightcove_id') - return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) + brightcove_id = self._search_regex( + r'data-video-id=["\'](\d+)', webpage, 'brightcove id') + return self.url_result( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', + brightcove_id) From 228cd9bb906eb1dae476a05dbff0796a2d9f9be5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 6 May 2017 18:57:24 +0800 Subject: [PATCH 21/47] [bilibili] Fix video downloading (closes #13001) --- ChangeLog | 1 + youtube_dl/extractor/bilibili.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 0908e2e93..7cceec10f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [bilibili] Fix video downloading (#13001) + [vice] Support vice articles (#12968) * [vice] Fix extraction for non en_us videos (#12967) * [gdcvault] Fix extraction for some videos (#12733) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 80dd8382e..1e3f25515 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -122,6 +122,11 @@ class BiliBiliIE(InfoExtractor): 'preference': -2 if 'hd.mp4' in backup_url else -3, }) + for a_format in formats: + a_format.setdefault('http_headers', {}).update({ + 'Referer': url, + }) + self._sort_formats(formats) entries.append({ From 10c87c151b6f6a2f45b1a3ad1ac2d38fcf0fb0a6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 6 May 2017 19:06:18 +0800 Subject: [PATCH 22/47] [utils] Rename try_multipart_encode to _multipart_encode_impl To state that this is an internal function and people should be careful when using it outside youtube-dl. --- youtube_dl/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 911cacd29..aa1900436 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2098,7 +2098,7 @@ def update_Request(req, url=None, data=None, headers={}, query={}): return new_req -def try_multipart_encode(data, boundary): +def _multipart_encode_impl(data, boundary): content_type = 'multipart/form-data; boundary=%s' % boundary out = b'' @@ -2140,7 +2140,7 @@ def multipart_encode(data, boundary=None): boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff)) try: - out, content_type = try_multipart_encode(data, boundary) + out, content_type = _multipart_encode_impl(data, boundary) break except ValueError: if has_specified_boundary: From e00eb564e921b609c94f87d7db7ad7f709b704f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 May 2017 23:58:47 +0700 Subject: [PATCH 23/47] [youtube] Fix authentication (closes #12927) --- youtube_dl/extractor/youtube.py | 142 +++++++++++++++++--------------- 1 file changed, 77 insertions(+), 65 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ec12e313c..c2e06c3a6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -37,8 +37,7 @@ from ..utils import ( parse_codecs, parse_duration, remove_quotes, - remove_start, - sanitized_Request, + # remove_start, smuggle_url, str_to_int, try_get, @@ -54,7 +53,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor): """Provide base functions for Youtube extractors""" _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge' - _PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password' + + _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup' + _LOOKUP_REQ_TEMPLATE = '["{0}",null,[],null,"US",null,null,2,false,true,[null,null,[2,1,null,1,"https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn",null,[],4],1,[null,null,[]],null,null,null,true],"{0}"]' + + _PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge' + _PASSWORD_CHALLENGE_REQ_TEMPLATE = '["{0}",null,1,null,[1,null,null,null,["{1}",null,true]],[null,null,[2,1,null,1,"https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn",null,[],4],1,[null,null,[]],null,null,null,true]]' + + _TFA_URL = 'https://accounts.google.com/_/signin/challenge' + _TFA_REQ_TEMPLATE = '["{0}",null,2,null,[9,null,null,null,null,null,null,null,[null,"{1}",false,2]]]' + _NETRC_MACHINE = 'youtube' # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False @@ -96,72 +104,76 @@ class YoutubeBaseInfoExtractor(InfoExtractor): login_form = self._hidden_inputs(login_page) - login_form.update({ - 'checkConnection': 'youtube', - 'Email': username, - 'Passwd': password, - }) - - login_results = self._download_webpage( - self._PASSWORD_CHALLENGE_URL, None, - note='Logging in', errnote='unable to log in', fatal=False, - data=urlencode_postdata(login_form)) - if login_results is False: - return False - - error_msg = self._html_search_regex( - r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<', - login_results, 'error message', default=None) - if error_msg: - raise ExtractorError('Unable to login: %s' % error_msg, expected=True) - - if re.search(r'id="errormsg_0_Passwd"', login_results) is not None: - raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True) - - # Two-Factor - # TODO add SMS and phone call support - these require making a request and then prompting the user - - if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None: - tfa_code = self._get_tfa_info('2-step verification code') - - if not tfa_code: - self._downloader.report_warning( - 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>' - '(Note that only TOTP (Google Authenticator App) codes work at this time.)') - return False - - tfa_code = remove_start(tfa_code, 'G-') - - tfa_form_strs = self._form_hidden_inputs('challenge', login_results) - - tfa_form_strs.update({ - 'Pin': tfa_code, - 'TrustDevice': 'on', + def req(url, f_req, note, errnote): + data = login_form.copy() + data.update({ + 'pstMsg': 1, + 'checkConnection': 'youtube', + 'checkedDomains': 'youtube', + 'hl': 'en', + 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]', + 'f.req': f_req, + 'flowName': 'GlifWebSignIn', + 'flowEntry': 'ServiceLogin', }) + return self._download_json( + url, None, note=note, errnote=errnote, + transform_source=lambda s: re.sub(r'^[^[]*', '', s), + fatal=False, + data=urlencode_postdata(data), headers={ + 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8', + 'Google-Accounts-XSRF': 1, + }) - tfa_data = urlencode_postdata(tfa_form_strs) + lookup_results = req( + self._LOOKUP_URL, self._LOOKUP_REQ_TEMPLATE.format(username), + 'Looking up account info', 'Unable to look up account info') - tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data) - tfa_results = self._download_webpage( - tfa_req, None, - note='Submitting TFA code', errnote='unable to submit tfa', fatal=False) - - if tfa_results is False: - return False - - if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None: - self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.') - return False - if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None: - self._downloader.report_warning('unable to log in - did the page structure change?') - return False - if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None: - self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.') - return False - - if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None: - self._downloader.report_warning('unable to log in: bad username or password') + if lookup_results is False: return False + + user_hash = lookup_results[0][2] + + password_challenge_results = req( + self._PASSWORD_CHALLENGE_URL, + self._PASSWORD_CHALLENGE_REQ_TEMPLATE.format(user_hash, password), + 'Logging in', 'Unable to log in')[0] + + if password_challenge_results is False: + return + + msg = password_challenge_results[5] + if msg is not None and isinstance(msg, list): + raise ExtractorError('Unable to login: %s' % msg[5], expected=True) + + password_challenge_results = password_challenge_results[-1] + + # tfa = password_challenge_results[0] + # if isinstance(tfa, list) and tfa[0][2] == 'TWO_STEP_VERIFICATION': + # tfa_code = self._get_tfa_info('2-step verification code') + # + # if not tfa_code: + # self._downloader.report_warning( + # 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>' + # '(Note that only TOTP (Google Authenticator App) codes work at this time.)') + # return False + # + # tfa_code = remove_start(tfa_code, 'G-') + # print('tfa', tfa_code) + # tfa_results = req( + # self._TFA_URL, + # self._TFA_REQ_TEMPLATE.format(user_hash, tfa_code), + # 'Submitting TFA code', 'Unable to submit TFA code') + # + # TODO + + check_cookie_results = self._download_webpage( + password_challenge_results[2], None, 'Checking cookie') + + if '>Sign out<' not in check_cookie_results: + self._downloader.report_warning('Unable to log in') + return False + return True def _real_initialize(self): From e4a75d7932013f49133178d5d84e5f33634a4879 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 7 May 2017 00:00:11 +0700 Subject: [PATCH 24/47] [test_youtube_chapters] PEP 8 --- test/test_youtube_chapters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_youtube_chapters.py b/test/test_youtube_chapters.py index 5435ee4a8..cb12f8384 100644 --- a/test/test_youtube_chapters.py +++ b/test/test_youtube_chapters.py @@ -1,5 +1,5 @@ -# coding: utf-8 #!/usr/bin/env python +# coding: utf-8 from __future__ import unicode_literals # Allow direct execution From 3995d37da58ed071b54b7f81757cff4d534f5b9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 7 May 2017 04:19:11 +0700 Subject: [PATCH 25/47] [youtube] Fix TFA (#12927) --- youtube_dl/extractor/youtube.py | 151 +++++++++++++++++++++++--------- 1 file changed, 110 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c2e06c3a6..44a39282f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -37,7 +37,7 @@ from ..utils import ( parse_codecs, parse_duration, remove_quotes, - # remove_start, + remove_start, smuggle_url, str_to_int, try_get, @@ -55,13 +55,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge' _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup' - _LOOKUP_REQ_TEMPLATE = '["{0}",null,[],null,"US",null,null,2,false,true,[null,null,[2,1,null,1,"https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn",null,[],4],1,[null,null,[]],null,null,null,true],"{0}"]' - - _PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge' - _PASSWORD_CHALLENGE_REQ_TEMPLATE = '["{0}",null,1,null,[1,null,null,null,["{1}",null,true]],[null,null,[2,1,null,1,"https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn",null,[],4],1,[null,null,[]],null,null,null,true]]' - - _TFA_URL = 'https://accounts.google.com/_/signin/challenge' - _TFA_REQ_TEMPLATE = '["{0}",null,2,null,[9,null,null,null,null,null,null,null,[null,"{1}",false,2]]]' + _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge' + _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}' _NETRC_MACHINE = 'youtube' # If True it will raise an error if no login info is provided @@ -112,7 +107,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'checkedDomains': 'youtube', 'hl': 'en', 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]', - 'f.req': f_req, + 'f.req': json.dumps(f_req), 'flowName': 'GlifWebSignIn', 'flowEntry': 'ServiceLogin', }) @@ -125,53 +120,127 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'Google-Accounts-XSRF': 1, }) + def warn(message): + self._downloader.report_warning(message) + + lookup_req = [ + username, + None, [], None, 'US', None, None, 2, False, True, + [ + None, None, + [2, 1, None, 1, + 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', + None, [], 4], + 1, [None, None, []], None, None, None, True + ], + username, + ] + lookup_results = req( - self._LOOKUP_URL, self._LOOKUP_REQ_TEMPLATE.format(username), + self._LOOKUP_URL, lookup_req, 'Looking up account info', 'Unable to look up account info') if lookup_results is False: return False - user_hash = lookup_results[0][2] + user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str) + if not user_hash: + warn('Unable to extract user hash') + return False - password_challenge_results = req( - self._PASSWORD_CHALLENGE_URL, - self._PASSWORD_CHALLENGE_REQ_TEMPLATE.format(user_hash, password), - 'Logging in', 'Unable to log in')[0] + challenge_req = [ + user_hash, + None, 1, None, [1, None, None, None, [password, None, True]], + [ + None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4], + 1, [None, None, []], None, None, None, True + ]] - if password_challenge_results is False: + challenge_results = req( + self._CHALLENGE_URL, challenge_req, + 'Logging in', 'Unable to log in') + + if challenge_results is False: return - msg = password_challenge_results[5] - if msg is not None and isinstance(msg, list): - raise ExtractorError('Unable to login: %s' % msg[5], expected=True) + login_res = try_get(challenge_results, lambda x: x[0][5], list) + if login_res: + login_msg = try_get(login_res, lambda x: x[5], compat_str) + warn( + 'Unable to login: %s' % 'Invalid password' + if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg) + return False - password_challenge_results = password_challenge_results[-1] + res = try_get(challenge_results, lambda x: x[0][-1], list) + if not res: + warn('Unable to extract result entry') + return False - # tfa = password_challenge_results[0] - # if isinstance(tfa, list) and tfa[0][2] == 'TWO_STEP_VERIFICATION': - # tfa_code = self._get_tfa_info('2-step verification code') - # - # if not tfa_code: - # self._downloader.report_warning( - # 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>' - # '(Note that only TOTP (Google Authenticator App) codes work at this time.)') - # return False - # - # tfa_code = remove_start(tfa_code, 'G-') - # print('tfa', tfa_code) - # tfa_results = req( - # self._TFA_URL, - # self._TFA_REQ_TEMPLATE.format(user_hash, tfa_code), - # 'Submitting TFA code', 'Unable to submit TFA code') - # - # TODO + tfa = try_get(res, lambda x: x[0][0], list) + if tfa: + tfa_str = try_get(tfa, lambda x: x[2], compat_str) + if tfa_str == 'TWO_STEP_VERIFICATION': + # SEND_SUCCESS - TFA code has been successfully sent to phone + # QUOTA_EXCEEDED - reached the limit of TFA codes + status = try_get(tfa, lambda x: x[5], compat_str) + if status == 'QUOTA_EXCEEDED': + warn('Exceeded the limit of TFA codes, try later') + return False + + tl = try_get(challenge_results, lambda x: x[1][2], compat_str) + if not tl: + warn('Unable to extract TL') + return False + + tfa_code = self._get_tfa_info('2-step verification code') + + if not tfa_code: + warn( + 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>' + '(Note that only TOTP (Google Authenticator App) codes work at this time.)') + return False + + tfa_code = remove_start(tfa_code, 'G-') + + tfa_req = [ + user_hash, None, 2, None, + [ + 9, None, None, None, None, None, None, None, + [None, tfa_code, True, 2] + ]] + + tfa_results = req( + self._TFA_URL.format(tl), tfa_req, + 'Submitting TFA code', 'Unable to submit TFA code') + + if tfa_results is False: + return False + + tfa_res = try_get(tfa_results, lambda x: x[0][5], list) + if tfa_res: + tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str) + warn( + 'Unable to finish TFA: %s' % 'Invalid TFA code' + if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg) + return False + + check_cookie_url = try_get( + tfa_results, lambda x: x[0][-1][2], compat_str) + else: + check_cookie_url = try_get(res, lambda x: x[2], compat_str) + + if not check_cookie_url: + warn('Unable to extract CheckCookie URL') + return False check_cookie_results = self._download_webpage( - password_challenge_results[2], None, 'Checking cookie') + check_cookie_url, None, 'Checking cookie', fatal=False) - if '>Sign out<' not in check_cookie_results: - self._downloader.report_warning('Unable to log in') + if check_cookie_results is False: + return False + + if 'https://myaccount.google.com/' not in check_cookie_results: + warn('Unable to log in') return False return True From 3892a9f4ab54a0da5386b6ff612dd5ef9ff70267 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 7 May 2017 04:44:54 +0700 Subject: [PATCH 26/47] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index 7cceec10f..7a344c40f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,12 +1,32 @@ version <unreleased> +Common +* [extractor/common] Fix typo in _extract_akamai_formats ++ [postprocessor/ffmpeg] Embed chapters into media file with --add-metadata ++ [extractor/common] Introduce chapters meta field + Extractors +* [youtube] Fix authentication (#12820, #12927, #12973, #12992, #12993, #12995, + #13003) * [bilibili] Fix video downloading (#13001) +* [rmcdecouverte] Fix extraction (#12937) +* [theplatform] Extract chapters +* [bandcamp] Fix thumbnail extraction (#12980) +* [pornhub] Extend URL regular expression (#12996) ++ [youtube] Extract chapters ++ [nrk] Extract chapters ++ [vice] Add support for ooyala embeds in article pages + [vice] Support vice articles (#12968) * [vice] Fix extraction for non en_us videos (#12967) * [gdcvault] Fix extraction for some videos (#12733) +* [pbs] Improve multipart video support (#12981) +* [laola1tv] Fix extraction (#12880) + [cda] Support birthday verification (#12789) * [leeco] Fix extraction (#12974) ++ [pbs] Extract chapters +* [amp] Imporove thumbnail and subtitles extraction +* [foxsports] Fix extraction (#12945) +- [coub] Remove comment count extraction (#12941) version 2017.05.01 From 4ac0f573ef10d4e2bd883b63936e3fe1a41c8abb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 7 May 2017 04:51:34 +0700 Subject: [PATCH 27/47] release 2017.05.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 5 +++-- youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index fc221594f..86d11e142 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.05.01 +[debug] youtube-dl version 2017.05.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 7a344c40f..5418bb95f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.05.07 Common * [extractor/common] Fix typo in _extract_akamai_formats diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e3c038c48..d867dfe65 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -879,9 +879,10 @@ - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** - **Viafree** - - **Vice** + - **vice** + - **vice:article** + - **vice:show** - **Viceland** - - **ViceShow** - **Vidbit** - **Viddler** - **Videa** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c19ac49b0..dc707071a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.05.01' +__version__ = '2017.05.07' From 2eeb588efe9a7df4b2dcd90de9e461e8ff4a40fa Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 7 May 2017 08:58:34 +0100 Subject: [PATCH 28/47] [nbc] improve extraction(closes #12364) --- youtube_dl/extractor/nbc.py | 98 ++++++++++++------------------------- 1 file changed, 31 insertions(+), 67 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index d2a44d05d..3b31ca3ef 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -17,7 +17,7 @@ from ..utils import ( class NBCIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' + _VALID_URL = r'https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+)' _TESTS = [ { @@ -36,16 +36,6 @@ class NBCIE(AdobePassIE): 'skip_download': True, }, }, - { - 'url': 'http://www.nbc.com/the-tonight-show/episodes/176', - 'info_dict': { - 'id': '176', - 'ext': 'flv', - 'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen', - 'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.', - }, - 'skip': '404 Not Found', - }, { 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821', 'info_dict': { @@ -63,11 +53,6 @@ class NBCIE(AdobePassIE): }, 'skip': 'Only works from US', }, - { - # This video has expired but with an escaped embedURL - 'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515', - 'only_matching': True, - }, { # HLS streams requires the 'hdnea3' cookie 'url': 'http://www.nbc.com/Kings/video/goliath/n1806', @@ -89,58 +74,37 @@ class NBCIE(AdobePassIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - info = { - '_type': 'url_transparent', - 'ie_key': 'ThePlatform', - 'id': video_id, + video_data = self._download_json( + 'https://api.nbc.com/v3/videos', video_id, query={ + 'filter[permalink]': url, + })['data'][0]['attributes'] + query = { + 'mbr': 'true', + 'manifest': 'm3u', + } + video_id = video_data['guid'] + title = video_data['title'] + if video_data.get('entitlement') == 'auth': + resource = self._get_mvpd_resource( + 'nbcentertainment', title, video_id, + video_data.get('vChipRating')) + query['auth'] = self._extract_mvpd_auth( + url, video_id, 'nbcentertainment', resource) + theplatform_url = smuggle_url(update_url_query( + 'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id, + query), {'force_smil_url': True}) + return { + '_type': 'url_transparent', + 'id': video_id, + 'title': title, + 'url': theplatform_url, + 'description': video_data.get('description'), + 'keywords': video_data.get('keywords'), + 'season_number': int_or_none(video_data.get('seasonNumber')), + 'episode_number': int_or_none(video_data.get('episodeNumber')), + 'series': video_data.get('showName'), + 'ie_key': 'ThePlatform', } - video_data = None - preload = self._search_regex( - r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None) - if preload: - preload_data = self._parse_json(preload, video_id) - path = compat_urllib_parse_urlparse(url).path.rstrip('/') - entity_id = preload_data.get('xref', {}).get(path) - video_data = preload_data.get('entities', {}).get(entity_id) - if video_data: - query = { - 'mbr': 'true', - 'manifest': 'm3u', - } - video_id = video_data['guid'] - title = video_data['title'] - if video_data.get('entitlement') == 'auth': - resource = self._get_mvpd_resource( - 'nbcentertainment', title, video_id, - video_data.get('vChipRating')) - query['auth'] = self._extract_mvpd_auth( - url, video_id, 'nbcentertainment', resource) - theplatform_url = smuggle_url(update_url_query( - 'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id, - query), {'force_smil_url': True}) - info.update({ - 'id': video_id, - 'title': title, - 'url': theplatform_url, - 'description': video_data.get('description'), - 'keywords': video_data.get('keywords'), - 'season_number': int_or_none(video_data.get('seasonNumber')), - 'episode_number': int_or_none(video_data.get('episodeNumber')), - 'series': video_data.get('showName'), - }) - else: - theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex( - [ - r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', - r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"', - r'"embedURL"\s*:\s*"([^"]+)"' - ], - webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/'))) - if theplatform_url.startswith('//'): - theplatform_url = 'http:' + theplatform_url - info['url'] = smuggle_url(theplatform_url, {'source_url': url}) - return info class NBCSportsVPlayerIE(InfoExtractor): From 52294cdda761ad08785e7118ae8e121ceef257ec Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 7 May 2017 09:31:14 +0100 Subject: [PATCH 29/47] [nbc] remove unused imports and extract permalink from modified urls --- youtube_dl/extractor/nbc.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 3b31ca3ef..62db70b43 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -5,10 +5,8 @@ import re from .common import InfoExtractor from .theplatform import ThePlatformIE from .adobepass import AdobePassIE -from ..compat import compat_urllib_parse_urlparse from ..utils import ( find_xpath_attr, - lowercase_escape, smuggle_url, unescapeHTML, update_url_query, @@ -17,7 +15,7 @@ from ..utils import ( class NBCIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+)' + _VALID_URL = r'(?P<permalink>https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))' _TESTS = [ { @@ -73,10 +71,10 @@ class NBCIE(AdobePassIE): ] def _real_extract(self, url): - video_id = self._match_id(url) + permalink, video_id = re.match(self._VALID_URL, url).groups() video_data = self._download_json( 'https://api.nbc.com/v3/videos', video_id, query={ - 'filter[permalink]': url, + 'filter[permalink]': permalink, })['data'][0]['attributes'] query = { 'mbr': 'true', From 3d40084b835afe49adff7eca67dec26bc9b95c0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 8 May 2017 20:03:38 +0700 Subject: [PATCH 30/47] [nuevo] Pass headers to _extract_nuevo --- youtube_dl/extractor/nuevo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py index 87fb94d1f..be1e09d37 100644 --- a/youtube_dl/extractor/nuevo.py +++ b/youtube_dl/extractor/nuevo.py @@ -10,9 +10,10 @@ from ..utils import ( class NuevoBaseIE(InfoExtractor): - def _extract_nuevo(self, config_url, video_id): + def _extract_nuevo(self, config_url, video_id, headers={}): config = self._download_xml( - config_url, video_id, transform_source=lambda s: s.strip()) + config_url, video_id, transform_source=lambda s: s.strip(), + headers=headers) title = xpath_text(config, './title', 'title', fatal=True).strip() video_id = xpath_text(config, './mediaid', default=video_id) From b6eb74e340fd523fb0c710b3141d6f9c696069f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 8 May 2017 20:08:58 +0700 Subject: [PATCH 31/47] [nonktube] Add extractor (closes #8647, closes #13024) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nonktube.py | 37 ++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 youtube_dl/extractor/nonktube.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 1d7495910..968cca9d2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -663,6 +663,7 @@ from .nintendo import NintendoIE from .njpwworld import NJPWWorldIE from .nobelprize import NobelPrizeIE from .noco import NocoIE +from .nonktube import NonkTubeIE from .noovo import NoovoIE from .normalboots import NormalbootsIE from .nosvideo import NosVideoIE diff --git a/youtube_dl/extractor/nonktube.py b/youtube_dl/extractor/nonktube.py new file mode 100644 index 000000000..ba6007cd1 --- /dev/null +++ b/youtube_dl/extractor/nonktube.py @@ -0,0 +1,37 @@ +from __future__ import unicode_literals + +from .nuevo import NuevoBaseIE + + +class NonkTubeIE(NuevoBaseIE): + _VALID_URL = r'https?://(?:www\.)?nonktube\.com/(?:(?:video|embed)/|media/nuevo/embed\.php\?.*?\bid=)(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.nonktube.com/video/118636/sensual-wife-uncensored-fucked-in-hairy-pussy-and-facialized', + 'info_dict': { + 'id': '118636', + 'ext': 'mp4', + 'title': 'Sensual Wife Uncensored Fucked In Hairy Pussy And Facialized', + 'age_limit': 18, + 'duration': 1150.98, + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'https://www.nonktube.com/embed/118636', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + nuevo_url = 'https://www.nonktube.com/media/nuevo/config.php?key=%s' % video_id + + info = self._extract_nuevo( + nuevo_url, video_id, headers={ + 'Referer': 'https://www.nonktube.com/media/nuevo/player.swf?config=%s' % nuevo_url + }) + info.update({ + 'age_limit': 18 + }) + return info From bf82b8732353f45582cfefc6cced01721fbf4154 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 8 May 2017 20:13:22 +0700 Subject: [PATCH 32/47] [nonktube] Use econfig nuevo URL --- youtube_dl/extractor/nonktube.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/nonktube.py b/youtube_dl/extractor/nonktube.py index ba6007cd1..63e58aae2 100644 --- a/youtube_dl/extractor/nonktube.py +++ b/youtube_dl/extractor/nonktube.py @@ -25,13 +25,9 @@ class NonkTubeIE(NuevoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - nuevo_url = 'https://www.nonktube.com/media/nuevo/config.php?key=%s' % video_id - info = self._extract_nuevo( - nuevo_url, video_id, headers={ - 'Referer': 'https://www.nonktube.com/media/nuevo/player.swf?config=%s' % nuevo_url - }) - info.update({ - 'age_limit': 18 - }) + 'https://www.nonktube.com/media/nuevo/econfig.php?key=%s' + % video_id, video_id) + + info['age_limit'] = 18 return info From 04c09f19618345203a787c300e4ecf99f1ff9b02 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 8 May 2017 15:00:57 +0100 Subject: [PATCH 33/47] [turner] extract thumbnail and is_live and strip description --- youtube_dl/extractor/turner.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 1c0be9fc6..efeb677ee 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -13,6 +13,7 @@ from ..utils import ( xpath_attr, update_url_query, ExtractorError, + strip_or_none, ) @@ -163,17 +164,21 @@ class TurnerBaseIE(AdobePassIE): 'height': int_or_none(image.get('height')), } for image in video_data.findall('images/image')] + is_live = xpath_text(video_data, 'isLive') == 'true' + return { 'id': video_id, - 'title': title, + 'title': self._live_title(title) if is_live else title, 'formats': formats, 'subtitles': subtitles, 'thumbnails': thumbnails, - 'description': xpath_text(video_data, 'description'), + 'thumbnail': xpath_text(video_data, 'poster'), + 'description': strip_or_none(xpath_text(video_data, 'description')), 'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')), 'timestamp': self._extract_timestamp(video_data), 'upload_date': xpath_attr(video_data, 'metas', 'version'), 'series': xpath_text(video_data, 'showTitle'), 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), + 'is_live': is_live, } From 3b859145c2ad4efd5250fc0199174f1ec2068b34 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 8 May 2017 15:01:10 +0100 Subject: [PATCH 34/47] [adultswim] Fix Extraction(closes #8640)(closes #10950)(closes closes #11042)(closes #12121) - add support for adobe pass authentication - add support for live streams - add support for show pages --- youtube_dl/extractor/adultswim.py | 285 +++++++++++------------------- 1 file changed, 106 insertions(+), 179 deletions(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 989505c82..9c37ea33c 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -5,91 +5,52 @@ import re from .turner import TurnerBaseIE from ..utils import ( - ExtractorError, int_or_none, + strip_or_none, ) class AdultSwimIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?' + _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<show_path>[^/?#]+)(?:/(?P<episode_path>[^/?#]+))?' _TESTS = [{ 'url': 'http://adultswim.com/videos/rick-and-morty/pilot', - 'playlist': [ - { - 'md5': '247572debc75c7652f253c8daa51a14d', - 'info_dict': { - 'id': 'rQxZvXQ4ROaSOqq-or2Mow-0', - 'ext': 'flv', - 'title': 'Rick and Morty - Pilot Part 1', - 'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " - }, - }, - { - 'md5': '77b0e037a4b20ec6b98671c4c379f48d', - 'info_dict': { - 'id': 'rQxZvXQ4ROaSOqq-or2Mow-3', - 'ext': 'flv', - 'title': 'Rick and Morty - Pilot Part 4', - 'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " - }, - }, - ], 'info_dict': { 'id': 'rQxZvXQ4ROaSOqq-or2Mow', + 'ext': 'mp4', 'title': 'Rick and Morty - Pilot', - 'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " - }, - 'skip': 'This video is only available for registered users', - }, { - 'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/', - 'playlist': [ - { - 'md5': '2eb5c06d0f9a1539da3718d897f13ec5', - 'info_dict': { - 'id': '-t8CamQlQ2aYZ49ItZCFog-0', - 'ext': 'flv', - 'title': 'American Dad - Putting Francine Out of Business', - 'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' - }, - } - ], - 'info_dict': { - 'id': '-t8CamQlQ2aYZ49ItZCFog', - 'title': 'American Dad - Putting Francine Out of Business', - 'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' - }, - }, { - 'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/', - 'playlist': [ - { - 'md5': '3e346a2ab0087d687a05e1e7f3b3e529', - 'info_dict': { - 'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0', - 'ext': 'mp4', - 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine', - 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n', - }, - } - ], - 'info_dict': { - 'id': 'sY3cMUR_TbuE4YmdjzbIcQ', - 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine', - 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n', + 'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.', + 'timestamp': 1493267400, + 'upload_date': '20170427', }, 'params': { # m3u8 download 'skip_download': True, - } + }, + 'expected_warnings': ['Unable to download f4m manifest'], + }, { + 'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/', + 'info_dict': { + 'id': 'sY3cMUR_TbuE4YmdjzbIcQ', + 'ext': 'mp4', + 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine', + 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.', + 'upload_date': '20080124', + 'timestamp': 1201150800, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { - # heroMetadata.trailer 'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/', 'info_dict': { 'id': 'I0LQFQkaSUaFp8PnAWHhoQ', 'ext': 'mp4', 'title': 'Decker - Inside Decker: A New Hero', - 'description': 'md5:c916df071d425d62d70c86d4399d3ee0', - 'duration': 249.008, + 'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.', + 'timestamp': 1469480460, + 'upload_date': '20160725', }, 'params': { # m3u8 download @@ -97,136 +58,102 @@ class AdultSwimIE(TurnerBaseIE): }, 'expected_warnings': ['Unable to download f4m manifest'], }, { - 'url': 'http://www.adultswim.com/videos/toonami/friday-october-14th-2016/', + 'url': 'http://www.adultswim.com/videos/attack-on-titan', 'info_dict': { - 'id': 'eYiLsKVgQ6qTC6agD67Sig', - 'title': 'Toonami - Friday, October 14th, 2016', - 'description': 'md5:99892c96ffc85e159a428de85c30acde', + 'id': 'b7A69dzfRzuaXIECdxW8XQ', + 'title': 'Attack on Titan', + 'description': 'md5:6c8e003ea0777b47013e894767f5e114', + }, + 'playlist_mincount': 12, + }, { + 'url': 'http://www.adultswim.com/videos/streams/williams-stream', + 'info_dict': { + 'id': 'd8DEBj7QRfetLsRgFnGEyg', + 'ext': 'mp4', + 'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'description': 'original programming', }, - 'playlist': [{ - 'md5': '', - 'info_dict': { - 'id': 'eYiLsKVgQ6qTC6agD67Sig', - 'ext': 'mp4', - 'title': 'Toonami - Friday, October 14th, 2016', - 'description': 'md5:99892c96ffc85e159a428de85c30acde', - }, - }], 'params': { # m3u8 download 'skip_download': True, }, - 'expected_warnings': ['Unable to download f4m manifest'], }] - @staticmethod - def find_video_info(collection, slug): - for video in collection.get('videos'): - if video.get('slug') == slug: - return video - - @staticmethod - def find_collection_by_linkURL(collections, linkURL): - for collection in collections: - if collection.get('linkURL') == linkURL: - return collection - - @staticmethod - def find_collection_containing_video(collections, slug): - for collection in collections: - for video in collection.get('videos'): - if video.get('slug') == slug: - return collection, video - return None, None - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - show_path = mobj.group('show_path') - episode_path = mobj.group('episode_path') - is_playlist = True if mobj.group('is_playlist') else False + show_path, episode_path = re.match(self._VALID_URL, url).groups() + display_id = episode_path or show_path + webpage = self._download_webpage(url, display_id) + initial_data = self._parse_json(self._search_regex( + r'AS_INITIAL_DATA(?:__)?\s*=\s*({.+?});', + webpage, 'initial data'), display_id) - webpage = self._download_webpage(url, episode_path) + is_stream = show_path == 'streams' + if is_stream: + if not episode_path: + episode_path = 'live-stream' - # Extract the value of `bootstrappedData` from the Javascript in the page. - bootstrapped_data = self._parse_json(self._search_regex( - r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path) + video_data = next(stream for stream_path, stream in initial_data['streams'].items() if stream_path == episode_path) + video_id = video_data.get('stream') - # Downloading videos from a /videos/playlist/ URL needs to be handled differently. - # NOTE: We are only downloading one video (the current one) not the playlist - if is_playlist: - collections = bootstrapped_data['playlists']['collections'] - collection = self.find_collection_by_linkURL(collections, show_path) - video_info = self.find_video_info(collection, episode_path) - - show_title = video_info['showTitle'] - segment_ids = [video_info['videoPlaybackID']] + if not video_id: + entries = [] + for episode in video_data.get('archiveEpisodes', []): + episode_url = episode.get('url') + if not episode_url: + continue + entries.append(self.url_result( + episode_url, 'AdultSwim', episode.get('id'))) + return self.playlist_result( + entries, video_data.get('id'), video_data.get('title'), + strip_or_none(video_data.get('description'))) else: - collections = bootstrapped_data['show']['collections'] - collection, video_info = self.find_collection_containing_video(collections, episode_path) - # Video wasn't found in the collections, let's try `slugged_video`. - if video_info is None: - if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path: - video_info = bootstrapped_data['slugged_video'] - if not video_info: - video_info = bootstrapped_data.get( - 'heroMetadata', {}).get('trailer', {}).get('video') - if not video_info: - video_info = bootstrapped_data.get('onlineOriginals', [None])[0] - if not video_info: - raise ExtractorError('Unable to find video info') + show_data = initial_data['show'] - show = bootstrapped_data['show'] - show_title = show['title'] - stream = video_info.get('stream') - if stream and stream.get('videoPlaybackID'): - segment_ids = [stream['videoPlaybackID']] - elif video_info.get('clips'): - segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']] - elif video_info.get('videoPlaybackID'): - segment_ids = [video_info['videoPlaybackID']] - elif video_info.get('id'): - segment_ids = [video_info['id']] - else: - if video_info.get('auth') is True: - raise ExtractorError( - 'This video is only available via cable service provider subscription that' - ' is not currently supported. You may want to use --cookies.', expected=True) - else: - raise ExtractorError('Unable to find stream or clips') + if not episode_path: + entries = [] + for video in show_data.get('videos', []): + slug = video.get('slug') + if not slug: + continue + entries.append(self.url_result( + 'http://adultswim.com/videos/%s/%s' % (show_path, slug), + 'AdultSwim', video.get('id'))) + return self.playlist_result( + entries, show_data.get('id'), show_data.get('title'), + strip_or_none(show_data.get('metadata', {}).get('description'))) - episode_id = video_info['id'] - episode_title = video_info['title'] - episode_description = video_info.get('description') - episode_duration = int_or_none(video_info.get('duration')) - view_count = int_or_none(video_info.get('views')) + video_data = show_data['sluggedVideo'] + video_id = video_data['id'] - entries = [] - for part_num, segment_id in enumerate(segment_ids): - segement_info = self._extract_cvp_info( - 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id, - segment_id, { - 'secure': { - 'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big', - 'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do', - }, - }) - segment_title = '%s - %s' % (show_title, episode_title) - if len(segment_ids) > 1: - segment_title += ' Part %d' % (part_num + 1) - segement_info.update({ - 'id': segment_id, - 'title': segment_title, - 'description': episode_description, + info = self._extract_cvp_info( + 'http://www.adultswim.com/videos/api/v0/assets?id=' + video_id, + video_id, { + 'secure': { + 'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big', + 'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do', + }, + }, { + 'url': url, + 'site_name': 'AdultSwim', + 'auth_required': video_data.get('auth'), }) - entries.append(segement_info) - return { - '_type': 'playlist', - 'id': episode_id, - 'display_id': episode_path, - 'entries': entries, - 'title': '%s - %s' % (show_title, episode_title), - 'description': episode_description, - 'duration': episode_duration, - 'view_count': view_count, - } + info.update({ + 'id': video_id, + 'display_id': display_id, + 'description': info.get('description') or strip_or_none(video_data.get('description')), + }) + if not is_stream: + info.update({ + 'duration': info.get('duration') or int_or_none(video_data.get('duration')), + 'timestamp': info.get('timestamp') or int_or_none(video_data.get('launch_date')), + 'season_number': info.get('season_number') or int_or_none(video_data.get('season_number')), + 'episode': info['title'], + 'episode_number': info.get('episode_number') or int_or_none(video_data.get('episode_number')), + }) + + info['series'] = video_data.get('collection_title') or info.get('series') + if info['series'] and info['series'] != info['title']: + info['title'] = '%s - %s' % (info['series'], info['title']) + + return info From 8fa17117df723ab0bb4b0105142ea861df663ba5 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 8 May 2017 18:13:58 +0100 Subject: [PATCH 35/47] [funimation] fix authentication(closes #13021) --- youtube_dl/extractor/funimation.py | 40 ++++++++++++++---------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index e44a2a87f..c1559be0a 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -20,6 +20,7 @@ class FunimationIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)' _NETRC_MACHINE = 'funimation' + _TOKEN = None _TESTS = [{ 'url': 'https://www.funimation.com/shows/hacksign/role-play/', @@ -67,27 +68,19 @@ class FunimationIE(InfoExtractor): (username, password) = self._get_login_info() if username is None: return - data = urlencode_postdata({ - 'email_field': username, - 'password_field': password, - }) - user_agent = self._extract_cloudflare_session_ua(self._LOGIN_URL) - if not user_agent: - user_agent = 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0' - login_request = sanitized_Request(self._LOGIN_URL, data, headers={ - 'User-Agent': user_agent, - 'Content-Type': 'application/x-www-form-urlencoded' - }) - login_page = self._download_webpage( - login_request, None, 'Logging in as %s' % username) - if any(p in login_page for p in ('funimation.com/logout', '>Log Out<')): - return - error = self._html_search_regex( - r'(?s)<div[^>]+id=["\']errorMessages["\'][^>]*>(.+?)</div>', - login_page, 'error messages', default=None) - if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) - raise ExtractorError('Unable to log in') + try: + data = self._download_json( + 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/', + None, 'Logging in as %s' % username, data=urlencode_postdata({ + 'username': username, + 'password': password, + })) + self._TOKEN = data['token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + error = self._parse_json(e.cause.read().decode(), None)['error'] + raise ExtractorError(error, expected=True) + raise def _real_initialize(self): self._login() @@ -125,9 +118,12 @@ class FunimationIE(InfoExtractor): description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True) try: + headers = {} + if self._TOKEN: + headers['Authorization'] = 'Token %s' % self._TOKEN sources = self._download_json( 'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id, - video_id)['items'] + video_id, headers=headers)['items'] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: error = self._parse_json(e.cause.read(), video_id)['errors'][0] From 804181dda9cbb7f2cbb8305bb4097908312a83f6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 8 May 2017 18:50:25 +0100 Subject: [PATCH 36/47] [funimation] remove codes related to old login method and update test --- youtube_dl/extractor/funimation.py | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index c1559be0a..8c37509ec 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -2,15 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_urllib_parse_unquote_plus, -) +from ..compat import compat_HTTPError from ..utils import ( determine_ext, int_or_none, js_to_json, - sanitized_Request, ExtractorError, urlencode_postdata ) @@ -39,31 +35,21 @@ class FunimationIE(InfoExtractor): }, { 'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/', 'info_dict': { - 'id': '9635', + 'id': '210051', 'display_id': 'broadcast-dub-preview', 'ext': 'mp4', 'title': 'Attack on Titan: Junior High - Broadcast Dub Preview', - 'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803', 'thumbnail': r're:https?://.*\.(?:jpg|png)', }, - 'skip': 'Access without user interaction is forbidden by CloudFlare', + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/', 'only_matching': True, }] - _LOGIN_URL = 'http://www.funimation.com/login' - - def _extract_cloudflare_session_ua(self, url): - ci_session_cookie = self._get_cookies(url).get('ci_session') - if ci_session_cookie: - ci_session = compat_urllib_parse_unquote_plus(ci_session_cookie.value) - # ci_session is a string serialized by PHP function serialize() - # This case is simple enough to use regular expressions only - return self._search_regex( - r'"user_agent";s:\d+:"([^"]+)"', ci_session, 'user agent', - default=None) - def _login(self): (username, password) = self._get_login_info() if username is None: From e9137224b3ef638cce3dac024814397fd020f3b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 9 May 2017 01:14:02 +0700 Subject: [PATCH 37/47] [YoutubeDL] Force restrict filenames when no locale is set for python 2 as well (#13027) --- youtube_dl/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index eb465c425..4c33d494a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -370,10 +370,10 @@ class YoutubeDL(object): else: raise - if (sys.version_info >= (3,) and sys.platform != 'win32' and + if (sys.platform != 'win32' and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and not params.get('restrictfilenames', False)): - # On Python 3, the Unicode filesystem API will throw errors (#1474) + # Unicode filesystem API will throw errors (#1474, #13027) self.report_warning( 'Assuming --restrict-filenames since file system encoding ' 'cannot encode all characters. ' From afa0200bf05671be5a380a79206309bdd4f886d6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 8 May 2017 20:03:26 +0100 Subject: [PATCH 38/47] [vrv] extract dash formats and subtitles --- youtube_dl/extractor/vrv.py | 47 +++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 487047fd7..9959627c0 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -112,21 +112,41 @@ class VRVIE(VRVBaseIE): audio_locale = streams_json.get('audio_locale') formats = [] - for stream_id, stream in streams_json.get('streams', {}).get('adaptive_hls', {}).items(): - stream_url = stream.get('url') - if not stream_url: - continue - stream_id = stream_id or audio_locale - m3u8_formats = self._extract_m3u8_formats( - stream_url, video_id, 'mp4', m3u8_id=stream_id, - note='Downloading %s m3u8 information' % stream_id, - fatal=False) - if audio_locale: - for f in m3u8_formats: - f['language'] = audio_locale - formats.extend(m3u8_formats) + for stream_type, streams in streams_json.get('streams', {}).items(): + if stream_type in ('adaptive_hls', 'adaptive_dash'): + for stream in streams.values(): + stream_url = stream.get('url') + if not stream_url: + continue + stream_id = stream.get('hardsub_locale') or audio_locale + format_id = '%s-%s' % (stream_type.split('_')[1], stream_id) + if stream_type == 'adaptive_hls': + adaptive_formats = self._extract_m3u8_formats( + stream_url, video_id, 'mp4', m3u8_id=format_id, + note='Downloading %s m3u8 information' % stream_id, + fatal=False) + else: + adaptive_formats = self._extract_mpd_formats( + stream_url, video_id, mpd_id=format_id, + note='Downloading %s MPD information' % stream_id, + fatal=False) + if audio_locale: + for f in adaptive_formats: + if f.get('acodec') != 'none': + f['language'] = audio_locale + formats.extend(adaptive_formats) self._sort_formats(formats) + subtitles = {} + for subtitle in streams_json.get('subtitles', {}).values(): + subtitle_url = subtitle.get('url') + if not subtitle_url: + continue + subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({ + 'url': subtitle_url, + 'ext': subtitle.get('format', 'ass'), + }) + thumbnails = [] for thumbnail in video_data.get('images', {}).get('thumbnails', []): thumbnail_url = thumbnail.get('source') @@ -142,6 +162,7 @@ class VRVIE(VRVBaseIE): 'id': video_id, 'title': title, 'formats': formats, + 'subtitles': subtitles, 'thumbnails': thumbnails, 'description': video_data.get('description'), 'duration': float_or_none(video_data.get('duration_ms'), 1000), From 5996d21aeac73509eee060bd26dbec95ede5e2eb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 9 May 2017 00:47:37 +0100 Subject: [PATCH 39/47] [cspan] add support for brightcove live embeds(closes #13028) --- youtube_dl/extractor/brightcove.py | 20 +++++++++++++++++--- youtube_dl/extractor/cspan.py | 15 +++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 3f017a2b1..0ed59bcbc 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -5,6 +5,7 @@ import re import json from .common import InfoExtractor +from .adobepass import AdobePassIE from ..compat import ( compat_etree_fromstring, compat_parse_qs, @@ -448,7 +449,7 @@ class BrightcoveLegacyIE(InfoExtractor): return info -class BrightcoveNewIE(InfoExtractor): +class BrightcoveNewIE(AdobePassIE): IE_NAME = 'brightcove:new' _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)' _TESTS = [{ @@ -602,6 +603,20 @@ class BrightcoveNewIE(InfoExtractor): raise ExtractorError(message, expected=True) raise + errors = json_data.get('errors') + if errors and errors[0].get('error_subcode') == 'TVE_AUTH': + custom_fields = json_data['custom_fields'] + tve_token = self._extract_mvpd_auth( + smuggled_data['source_url'], video_id, + custom_fields['bcadobepassrequestorid'], + custom_fields['bcadobepassresourceid']) + json_data = self._download_json( + api_url, video_id, headers={ + 'Accept': 'application/json;pk=%s' % policy_key + }, query={ + 'tveToken': tve_token, + }) + title = json_data['name'].strip() formats = [] @@ -667,7 +682,6 @@ class BrightcoveNewIE(InfoExtractor): }) formats.append(f) - errors = json_data.get('errors') if not formats and errors: error = errors[0] raise ExtractorError( @@ -684,7 +698,7 @@ class BrightcoveNewIE(InfoExtractor): is_live = False duration = float_or_none(json_data.get('duration'), 1000) - if duration and duration < 0: + if duration is not None and duration <= 0: is_live = True return { diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index d4576160b..171820e27 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -10,6 +10,7 @@ from ..utils import ( smuggle_url, determine_ext, ExtractorError, + extract_attributes, ) from .senateisvp import SenateISVPIE from .ustream import UstreamIE @@ -68,6 +69,7 @@ class CSpanIE(InfoExtractor): 'uploader_id': '12987475', }, }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' def _real_extract(self, url): video_id = self._match_id(url) @@ -78,6 +80,19 @@ class CSpanIE(InfoExtractor): if ustream_url: return self.url_result(ustream_url, UstreamIE.ie_key()) + if '&vod' not in url: + bc = self._search_regex( + r"(<[^>]+id='brightcove-player-embed'[^>]+>)", + webpage, 'brightcove embed', default=None) + if bc: + bc_attr = extract_attributes(bc) + bc_url = self.BRIGHTCOVE_URL_TEMPLATE % ( + bc_attr.get('data-bcaccountid', '3162030207001'), + bc_attr.get('data-noprebcplayerid', 'SyGGpuJy3g'), + bc_attr.get('data-newbcplayerid', 'default'), + bc_attr['data-bcid']) + return self.url_result(smuggle_url(bc_url, {'source_url': url})) + # We first look for clipid, because clipprog always appears before patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] results = list(filter(None, (re.search(p, webpage) for p in patterns))) From b972fb037bc4a07694df6c010a25f12aedb9f731 Mon Sep 17 00:00:00 2001 From: Rasmus Rendal <rasmusrendal@gmail.com> Date: Sun, 7 May 2017 13:04:23 +0200 Subject: [PATCH 40/47] [drtv] Lower preference for SignLanguage formats (closes #13013) --- youtube_dl/extractor/drtv.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index e4917014a..e6f72ac50 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -45,6 +45,18 @@ class DRTVIE(InfoExtractor): 'upload_date': '20160902', 'duration': 131.4, }, + }, { + 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder', + 'md5': '7c8ca12e6c3d3e3edd59ba5a9b7ca10a', + 'info_dict': { + 'id': 'historien-om-danmark-stenalder', + 'ext': 'mp4', + 'title': 'Historien om Danmark: Stenalder (1)', + 'description': 'Én fascinerende historie om tusindvis af år, hvor vores land bliver skabt ud af is og vand, og hvor de første danskere ankommer til vores egn. Det bliver en rejse ind i urtiden og det liv, som urtidsjægerne har levet i skovene og ved havet og helt frem til bondestenalderen. Gennem skeletfund afslører eksperter, hvordan vores forfædre har set ud i stenalderen og hvorfor stenaldermennesket byggede de imponerende jættestuer, som ligger overalt i det danske.', + 'timestamp': 1490401996, + 'upload_date': '20170325', + 'duration': 3502.04, + }, }] def _real_extract(self, url): @@ -85,7 +97,11 @@ class DRTVIE(InfoExtractor): kind = asset.get('Kind') if kind == 'Image': thumbnail = asset.get('Uri') - elif kind in ('VideoResource', 'AudioResource'): + preference = 0 + + sign_language = asset.get('Target') == 'SignLanguage' + + if kind in ('VideoResource', 'AudioResource'): duration = float_or_none(asset.get('DurationInMilliseconds'), 1000) restricted_to_denmark = asset.get('RestrictedToDenmark') spoken_subtitles = asset.get('Target') == 'SpokenSubtitles' @@ -95,10 +111,12 @@ class DRTVIE(InfoExtractor): continue target = link.get('Target') format_id = target or '' - preference = None if spoken_subtitles: preference = -1 format_id += '-spoken-subtitles' + if sign_language: + preference = -1 + format_id += "-sign-language" if target == 'HDS': f4m_formats = self._extract_f4m_formats( uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43', From 8d65880e247d32e47dfb42616b0734639d4c070f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 9 May 2017 15:37:09 +0700 Subject: [PATCH 41/47] [drtv] Improve extraction and update tests (closes #13013, closes #13016) --- youtube_dl/extractor/drtv.py | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index e6f72ac50..c84624f1e 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -20,7 +20,7 @@ class DRTVIE(InfoExtractor): IE_NAME = 'drtv' _TESTS = [{ 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', - 'md5': '25e659cccc9a2ed956110a299fdf5983', + 'md5': '7ae17b4e18eb5d29212f424a7511c184', 'info_dict': { 'id': 'klassen-darlig-taber-10', 'ext': 'mp4', @@ -30,32 +30,36 @@ class DRTVIE(InfoExtractor): 'upload_date': '20160823', 'duration': 606.84, }, - 'params': { - 'skip_download': True, - }, }, { + # embed 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', - 'md5': '2c37175c718155930f939ef59952474a', 'info_dict': { 'id': 'christiania-pusher-street-ryddes-drdkrjpo', 'ext': 'mp4', 'title': 'LIVE Christianias rydning af Pusher Street er i gang', - 'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.', + 'description': 'md5:2a71898b15057e9b97334f61d04e6eb5', 'timestamp': 1472800279, 'upload_date': '20160902', 'duration': 131.4, }, + 'params': { + 'skip_download': True, + }, }, { + # with SignLanguage formats 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder', - 'md5': '7c8ca12e6c3d3e3edd59ba5a9b7ca10a', 'info_dict': { 'id': 'historien-om-danmark-stenalder', 'ext': 'mp4', 'title': 'Historien om Danmark: Stenalder (1)', - 'description': 'Én fascinerende historie om tusindvis af år, hvor vores land bliver skabt ud af is og vand, og hvor de første danskere ankommer til vores egn. Det bliver en rejse ind i urtiden og det liv, som urtidsjægerne har levet i skovene og ved havet og helt frem til bondestenalderen. Gennem skeletfund afslører eksperter, hvordan vores forfædre har set ud i stenalderen og hvorfor stenaldermennesket byggede de imponerende jættestuer, som ligger overalt i det danske.', + 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a', 'timestamp': 1490401996, 'upload_date': '20170325', 'duration': 3502.04, + 'formats': 'mincount:20', + }, + 'params': { + 'skip_download': True, }, }] @@ -97,26 +101,20 @@ class DRTVIE(InfoExtractor): kind = asset.get('Kind') if kind == 'Image': thumbnail = asset.get('Uri') - preference = 0 - - sign_language = asset.get('Target') == 'SignLanguage' - - if kind in ('VideoResource', 'AudioResource'): + elif kind in ('VideoResource', 'AudioResource'): duration = float_or_none(asset.get('DurationInMilliseconds'), 1000) restricted_to_denmark = asset.get('RestrictedToDenmark') - spoken_subtitles = asset.get('Target') == 'SpokenSubtitles' + asset_target = asset.get('Target') for link in asset.get('Links', []): uri = link.get('Uri') if not uri: continue target = link.get('Target') format_id = target or '' - if spoken_subtitles: + preference = None + if asset_target in ('SpokenSubtitles', 'SignLanguage'): preference = -1 - format_id += '-spoken-subtitles' - if sign_language: - preference = -1 - format_id += "-sign-language" + format_id += '-%s' % asset_target if target == 'HDS': f4m_formats = self._extract_f4m_formats( uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43', From 5d0968f0af3ce2a7da9a5f3098c6436f07c661aa Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 9 May 2017 11:14:29 +0100 Subject: [PATCH 42/47] [packtpub] add support for authentication(closes #12622) --- youtube_dl/extractor/packtpub.py | 39 +++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py index 881f3bcc7..bb668c999 100644 --- a/youtube_dl/extractor/packtpub.py +++ b/youtube_dl/extractor/packtpub.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_HTTPError, +) from ..utils import ( clean_html, ExtractorError, @@ -11,6 +14,7 @@ from ..utils import ( strip_or_none, unified_timestamp, urljoin, + urlencode_postdata, ) @@ -34,6 +38,32 @@ class PacktPubIE(PacktPubBaseIE): 'upload_date': '20170331', }, } + _NETRC_MACHINE = 'packtpub' + _TOKEN = None + + def _real_initialize(self): + (username, password) = self._get_login_info() + if username is None: + return + webpage = self._download_webpage(self._PACKT_BASE, None) + login_form = self._form_hidden_inputs( + 'packt-user-login-form', webpage) + login_form.update({ + 'email': username, + 'password': password, + }) + self._download_webpage( + self._PACKT_BASE, None, 'Logging in as %s' % username, + data=urlencode_postdata(login_form)) + try: + self._TOKEN = self._download_json( + '%s/users/tokens/sessions' % self._MAPT_REST, None, + 'Downloading Authorization Token')['data']['token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 404): + message = self._parse_json(e.cause.read().decode(), None)['message'] + raise ExtractorError(message, expected=True) + raise def _handle_error(self, response): if response.get('status') != 'success': @@ -51,14 +81,17 @@ class PacktPubIE(PacktPubBaseIE): course_id, chapter_id, video_id = mobj.group( 'course_id', 'chapter_id', 'id') + headers = {} + if self._TOKEN: + headers['Authorization'] = self._TOKEN video = self._download_json( '%s/users/me/products/%s/chapters/%s/sections/%s' % (self._MAPT_REST, course_id, chapter_id, video_id), video_id, - 'Downloading JSON video')['data'] + 'Downloading JSON video', headers=headers)['data'] content = video.get('content') if not content: - raise ExtractorError('This video is locked', expected=True) + self.raise_login_required('This video is locked') video_url = content['file'] From 6d1ded75021ca76f14c4fe8d3aa698704bae1dd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 9 May 2017 23:07:01 +0700 Subject: [PATCH 43/47] [francetv] Adapt to site redesign (closes #13034) --- youtube_dl/extractor/extractors.py | 4 +- youtube_dl/extractor/francetv.py | 217 +++++++++++------------------ 2 files changed, 86 insertions(+), 135 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 968cca9d2..61a6f1013 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -350,9 +350,9 @@ from .foxsports import FoxSportsIE from .franceculture import FranceCultureIE from .franceinter import FranceInterIE from .francetv import ( - PluzzIE, - FranceTvInfoIE, FranceTVIE, + FranceTVEmbedIE, + FranceTVInfoIE, GenerationQuoiIE, CultureboxIE, ) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 48d43ae58..6e1a537a0 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -21,11 +21,13 @@ from .dailymotion import ( class FranceTVBaseInfoExtractor(InfoExtractor): - def _extract_video(self, video_id, catalogue): + def _extract_video(self, video_id, catalogue=None): info = self._download_json( - 'http://webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=%s&catalogue=%s' - % (video_id, catalogue), - video_id, 'Downloading video JSON') + 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/', + video_id, 'Downloading video JSON', query={ + 'idDiffusion': video_id, + 'catalogue': catalogue or '', + }) if info.get('status') == 'NOK': raise ExtractorError( @@ -109,27 +111,94 @@ class FranceTVBaseInfoExtractor(InfoExtractor): } -class PluzzIE(FranceTVBaseInfoExtractor): - IE_NAME = 'pluzz.francetv.fr' - _VALID_URL = r'https?://(?:m\.)?pluzz\.francetv\.fr/videos/(?P<id>.+?)\.html' +class FranceTVIE(FranceTVBaseInfoExtractor): + _VALID_URL = r'https?://(?:www\.)?france\.tv/(?:[^/]+/)+(?P<id>[^/]+)\.html' - # Can't use tests, videos expire in 7 days + _TESTS = [{ + 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', + 'info_dict': { + 'id': '157550144', + 'ext': 'mp4', + 'title': '13h15, le dimanche... - Les mystères de Jésus', + 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42', + 'timestamp': 1494156300, + 'upload_date': '20170507', + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + }, + }, { + # france3 + 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html', + 'only_matching': True, + }, { + # france4 + 'url': 'https://www.france.tv/france-4/hero-corp/saison-1/134151-apres-le-calme.html', + 'only_matching': True, + }, { + # france5 + 'url': 'https://www.france.tv/france-5/c-a-dire/saison-10/137013-c-a-dire.html', + 'only_matching': True, + }, { + # franceo + 'url': 'https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html', + 'only_matching': True, + }, { + # france2 live + 'url': 'https://www.france.tv/france-2/direct.html', + 'only_matching': True, + }, { + 'url': 'https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html', + 'only_matching': True, + }, { + 'url': 'https://www.france.tv/jeux-et-divertissements/divertissements/133965-le-web-contre-attaque.html', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._html_search_meta( - 'id_video', webpage, 'video id', default=None) + catalogue = None + video_id = self._search_regex( + r'data-main-video=(["\'])(?P<id>(?:(?!\1).)+)\1', + webpage, 'video id', default=None, group='id') + if not video_id: - video_id = self._search_regex( - r'data-diffusion=["\'](\d+)', webpage, 'video id') - - return self._extract_video(video_id, 'Pluzz') + video_id, catalogue = self._html_search_regex( + r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"', + webpage, 'video ID').split('@') + return self._extract_video(video_id, catalogue) -class FranceTvInfoIE(FranceTVBaseInfoExtractor): +class FranceTVEmbedIE(FranceTVBaseInfoExtractor): + _VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)' + + _TEST = { + 'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961', + 'info_dict': { + 'id': 'NI_983319', + 'ext': 'mp4', + 'title': 'Le Pen Reims', + 'upload_date': '20170505', + 'timestamp': 1493981780, + 'duration': 16, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id, + video_id) + + return self._extract_video(video['video_id'], video.get('catalog')) + + +class FranceTVInfoIE(FranceTVBaseInfoExtractor): IE_NAME = 'francetvinfo.fr' _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)' @@ -233,124 +302,6 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): return self._extract_video(video_id, catalogue) -class FranceTVIE(FranceTVBaseInfoExtractor): - IE_NAME = 'francetv' - IE_DESC = 'France 2, 3, 4, 5 and Ô' - _VALID_URL = r'''(?x) - https?:// - (?: - (?:www\.)?france[2345o]\.fr/ - (?: - emissions/[^/]+/(?:videos|diffusions)| - emission/[^/]+| - videos| - jt - ) - /| - embed\.francetv\.fr/\?ue= - ) - (?P<id>[^/?]+) - ''' - - _TESTS = [ - # france2 - { - 'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', - 'md5': 'c03fc87cb85429ffd55df32b9fc05523', - 'info_dict': { - 'id': '109169362', - 'ext': 'flv', - 'title': '13h15, le dimanche...', - 'description': 'md5:9a0932bb465f22d377a449be9d1a0ff7', - 'upload_date': '20140914', - 'timestamp': 1410693600, - }, - }, - # france3 - { - 'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575', - 'md5': '679bb8f8921f8623bd658fa2f8364da0', - 'info_dict': { - 'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au', - 'ext': 'mp4', - 'title': 'Le scandale du prix des médicaments', - 'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce', - 'upload_date': '20131113', - 'timestamp': 1384380000, - }, - }, - # france4 - { - 'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', - 'md5': 'a182bf8d2c43d88d46ec48fbdd260c1c', - 'info_dict': { - 'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', - 'ext': 'mp4', - 'title': 'Hero Corp Making of - Extrait 1', - 'description': 'md5:c87d54871b1790679aec1197e73d650a', - 'upload_date': '20131106', - 'timestamp': 1383766500, - }, - }, - # france5 - { - 'url': 'http://www.france5.fr/emissions/c-a-dire/videos/quels_sont_les_enjeux_de_cette_rentree_politique__31-08-2015_908948?onglet=tous&page=1', - 'md5': 'f6c577df3806e26471b3d21631241fd0', - 'info_dict': { - 'id': '123327454', - 'ext': 'flv', - 'title': 'C à dire ?! - Quels sont les enjeux de cette rentrée politique ?', - 'description': 'md5:4a0d5cb5dce89d353522a84462bae5a4', - 'upload_date': '20150831', - 'timestamp': 1441035120, - }, - }, - # franceo - { - 'url': 'http://www.franceo.fr/jt/info-soir/18-07-2015', - 'md5': '47d5816d3b24351cdce512ad7ab31da8', - 'info_dict': { - 'id': '125377621', - 'ext': 'flv', - 'title': 'Infô soir', - 'description': 'md5:01b8c6915a3d93d8bbbd692651714309', - 'upload_date': '20150718', - 'timestamp': 1437241200, - 'duration': 414, - }, - }, - { - # francetv embed - 'url': 'http://embed.francetv.fr/?ue=8d7d3da1e3047c42ade5a5d7dfd3fc87', - 'info_dict': { - 'id': 'EV_30231', - 'ext': 'flv', - 'title': 'Alcaline, le concert avec Calogero', - 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff', - 'upload_date': '20150226', - 'timestamp': 1424989860, - 'duration': 5400, - }, - }, - { - 'url': 'http://www.france4.fr/emission/highlander/diffusion-du-17-07-2015-04h05', - 'only_matching': True, - }, - { - 'url': 'http://www.franceo.fr/videos/125377617', - 'only_matching': True, - } - ] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video_id, catalogue = self._html_search_regex( - r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"', - webpage, 'video ID').split('@') - return self._extract_video(video_id, catalogue) - - class GenerationQuoiIE(InfoExtractor): IE_NAME = 'france2.fr:generation-quoi' _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)' From cbd84b58175144fa898c5dfd32c58d37ffaee2c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 9 May 2017 23:17:22 +0700 Subject: [PATCH 44/47] [ChangeLog] Actualize --- ChangeLog | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5418bb95f..5137266f6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +version <unreleased> + +Core +* [YoutubeDL] Force --restrict-filenames when no locale is set on all python + versions (#13027) + +Extractors +* [francetv] Adapt to site redesign (#13034) ++ [packtpub] Add support for authentication (#12622) +* [drtv] Lower preference for SignLanguage formats (#13013, #13016) ++ [cspan] Add support for brightcove live embeds (#13028) +* [vrv] Extract DASH formats and subtitles +* [funimation] Fix authentication (#13021) +* [adultswim] Fix extraction (#8640, #10950, #11042, #12121) + + Add support for Adobe Pass authentication + + Add support for live streams + + Add support for show pages +* [turner] Extract thumbnail, is_live and strip description ++ [nonktube] Add support for nonktube.com (#8647, #13024) ++ [nuevo] Pass headers to _extract_nuevo +* [nbc] Improve extraction (#12364) + + version 2017.05.07 Common From a7ed6b341cce27b6b5731969d39711b15dca8d25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 9 May 2017 04:20:13 +0700 Subject: [PATCH 45/47] release 2017.05.09 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 5 +++-- youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 86d11e142..f7742ddcf 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.07** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.09** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.05.07 +[debug] youtube-dl version 2017.05.09 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 5137266f6..4775f8695 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.05.09 Core * [YoutubeDL] Force --restrict-filenames when no locale is set on all python diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d867dfe65..2f27686b3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -281,7 +281,8 @@ - **france2.fr:generation-quoi** - **FranceCulture** - **FranceInter** - - **francetv**: France 2, 3, 4, 5 and Ô + - **FranceTV** + - **FranceTVEmbed** - **francetvinfo.fr** - **Freesound** - **freespeech.org** @@ -530,6 +531,7 @@ - **NJPWWorld**: 新日本プロレスワールド - **NobelPrize** - **Noco** + - **NonkTube** - **Noovo** - **Normalboots** - **NosVideo** @@ -602,7 +604,6 @@ - **pluralsight** - **pluralsight:course** - **plus.google**: Google Plus - - **pluzz.francetv.fr** - **podomatic** - **Pokemon** - **PolskieRadio** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index dc707071a..33a0c2130 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.05.07' +__version__ = '2017.05.09' From 39ee2638194cb40bfc6652ecb2fca2455e87e327 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 10 May 2017 08:50:30 +0100 Subject: [PATCH 46/47] use platform=desktop in assets url(closes #13041) --- youtube_dl/extractor/adultswim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 9c37ea33c..acc4ce38d 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -126,7 +126,7 @@ class AdultSwimIE(TurnerBaseIE): video_id = video_data['id'] info = self._extract_cvp_info( - 'http://www.adultswim.com/videos/api/v0/assets?id=' + video_id, + 'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id, video_id, { 'secure': { 'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big', From 3166b1f0ac16769190f6ab1e7e5b0e45bb63d482 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 10 May 2017 22:35:10 +0700 Subject: [PATCH 47/47] [myspace] Improve _VALID_URL (closes #13040) --- youtube_dl/extractor/myspace.py | 100 ++++++++++++++++---------------- 1 file changed, 49 insertions(+), 51 deletions(-) diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py index f281238c9..e164d5940 100644 --- a/youtube_dl/extractor/myspace.py +++ b/youtube_dl/extractor/myspace.py @@ -12,64 +12,62 @@ from ..utils import ( class MySpaceIE(InfoExtractor): - _VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/|music/song/.*?)(?P<id>\d+)' + _VALID_URL = r'''(?x) + https?:// + myspace\.com/[^/]+/ + (?P<mediatype> + video/[^/]+/(?P<video_id>\d+)| + music/song/[^/?#&]+-(?P<song_id>\d+)-\d+(?:[/?#&]|$) + ) + ''' - _TESTS = [ - { - 'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919', - 'md5': '9c1483c106f4a695c47d2911feed50a7', - 'info_dict': { - 'id': '109594919', - 'ext': 'mp4', - 'title': 'Little Big Town', - 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.', - 'uploader': 'Five Minutes to the Stage', - 'uploader_id': 'fiveminutestothestage', - 'timestamp': 1414108751, - 'upload_date': '20141023', - }, + _TESTS = [{ + 'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919', + 'md5': '9c1483c106f4a695c47d2911feed50a7', + 'info_dict': { + 'id': '109594919', + 'ext': 'mp4', + 'title': 'Little Big Town', + 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.', + 'uploader': 'Five Minutes to the Stage', + 'uploader_id': 'fiveminutestothestage', + 'timestamp': 1414108751, + 'upload_date': '20141023', }, + }, { # songs - { - 'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681', - 'md5': '1d7ee4604a3da226dd69a123f748b262', - 'info_dict': { - 'id': '93388656', - 'ext': 'm4a', - 'title': 'Of weakened soul...', - 'uploader': 'Killsorrow', - 'uploader_id': 'killsorrow', - }, - }, { - 'add_ie': ['Youtube'], - 'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041', - 'info_dict': { - 'id': 'xqds0B_meys', - 'ext': 'webm', - 'title': 'Three Days Grace - Animal I Have Become', - 'description': 'md5:8bd86b3693e72a077cf863a8530c54bb', - 'uploader': 'ThreeDaysGraceVEVO', - 'uploader_id': 'ThreeDaysGraceVEVO', - 'upload_date': '20091002', - }, - }, { - 'add_ie': ['Youtube'], - 'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426', - 'info_dict': { - 'id': 'ypWvQgnJrSU', - 'ext': 'mp4', - 'title': 'Starset - First Light', - 'description': 'md5:2d5db6c9d11d527683bcda818d332414', - 'uploader': 'Yumi K', - 'uploader_id': 'SorenPromotions', - 'upload_date': '20140725', - } + 'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681', + 'md5': '1d7ee4604a3da226dd69a123f748b262', + 'info_dict': { + 'id': '93388656', + 'ext': 'm4a', + 'title': 'Of weakened soul...', + 'uploader': 'Killsorrow', + 'uploader_id': 'killsorrow', }, - ] + }, { + 'add_ie': ['Youtube'], + 'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041', + 'info_dict': { + 'id': 'xqds0B_meys', + 'ext': 'webm', + 'title': 'Three Days Grace - Animal I Have Become', + 'description': 'md5:8bd86b3693e72a077cf863a8530c54bb', + 'uploader': 'ThreeDaysGraceVEVO', + 'uploader_id': 'ThreeDaysGraceVEVO', + 'upload_date': '20091002', + }, + }, { + 'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426', + 'only_matching': True, + }, { + 'url': 'https://myspace.com/thelargemouthbassband/music/song/02-pure-eyes.mp3-94422330-105113388', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = mobj.group('video_id') or mobj.group('song_id') is_song = mobj.group('mediatype').startswith('music/song') webpage = self._download_webpage(url, video_id) player_url = self._search_regex(