From c0837a12c8a64c682a01e4bfdee6f22615568d69 Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister Date: Sat, 12 Mar 2016 18:00:26 +0100 Subject: [PATCH 001/501] [WDR] complete overhaul after relaunch of the site The WDR relaunched their site on 2016-02-23 which not only changed the URL-schema completely but also the layout of their pages. Apparently the whole "mediathek" now runs on the wdr-domain, so no separate URL for funkhauseuropa anymore. There seems to be no explicit handling of video-sizes on the page or in the URLs anymore. There seems to be only one size for HTML5, but still several sizes for flash. The extractor adds all to the list of formats. There is no metadata for the HTML5-stream, so that the best flash-stream will always be considered as the "best" format. At least in my tests this seemed to be true anyway. --- youtube_dl/extractor/wdr.py | 251 +++++++++++++++--------------------- 1 file changed, 101 insertions(+), 150 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 31c904303..f881b7300 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import itertools import re from .common import InfoExtractor @@ -11,204 +10,156 @@ from ..compat import ( ) from ..utils import ( unified_strdate, - qualities, + ExtractorError, ) class WDRIE(InfoExtractor): - _PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?' - _VALID_URL = r'(?Phttps?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P.+?)(?P%s)?\.html' % _PLAYER_REGEX + _PAGE_REGEX = r'/mediathek/(?P[^/]+)/(?P[^/]+)/(?P.+)\.html' + _VALID_URL = r'(?Phttps?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + + _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)' _TESTS = [ { - 'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html', + 'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html', + 'md5': 'e58c39c3e30077141d258bf588700a7b', 'info_dict': { - 'id': 'mdb-362427', + 'id': 'mdb-1058683', 'ext': 'flv', - 'title': 'Servicezeit', - 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb', - 'upload_date': '20140310', - 'is_live': False - }, - 'params': { - 'skip_download': True, + 'display_id': 'doku-am-freitag/video-geheimnis-aachener-dom-100', + 'title': 'Geheimnis Aachener Dom', + 'alt_title': 'Doku am Freitag', + 'upload_date': '20160304', + 'description': 'md5:87be8ff14d8dfd7a7ee46f0299b52318', + 'is_live': False, + 'subtitles': {'de': [{ + 'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml' + }]}, }, 'skip': 'Page Not Found', }, { - 'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html', + 'url': 'http://www1.wdr.de/mediathek/audio/wdr3/wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100.html', + 'md5': 'f4c1f96d01cf285240f53ea4309663d8', 'info_dict': { - 'id': 'mdb-363194', + 'id': 'mdb-1072000', + 'ext': 'mp3', + 'display_id': 'wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100', + 'title': 'Schriftstellerin Juli Zeh', + 'alt_title': 'WDR 3 Gespräch am Samstag', + 'upload_date': '20160312', + 'description': 'md5:e127d320bc2b1f149be697ce044a3dd7', + 'is_live': False, + 'subtitles': {} + }, + 'skip': 'Page Not Found', + }, + { + 'url': 'http://www1.wdr.de/mediathek/video/live/index.html', + 'info_dict': { + 'id': 'mdb-103364', 'ext': 'flv', - 'title': 'Marga Spiegel ist tot', - 'description': 'md5:2309992a6716c347891c045be50992e4', - 'upload_date': '20140311', - 'is_live': False - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Page Not Found', - }, - { - 'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html', - 'md5': '83e9e8fefad36f357278759870805898', - 'info_dict': { - 'id': 'mdb-194332', - 'ext': 'mp3', - 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)', - 'description': 'md5:2309992a6716c347891c045be50992e4', - 'upload_date': '20091129', - 'is_live': False - }, - }, - { - 'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html', - 'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa', - 'info_dict': { - 'id': 'mdb-478135', - 'ext': 'mp3', - 'title': 'Flavia Coelho: Amar é Amar', - 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', - 'upload_date': '20140717', - 'is_live': False - }, - 'skip': 'Page Not Found', - }, - { - 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html', - 'playlist_mincount': 146, - 'info_dict': { - 'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100', + 'display_id': 'index', + 'title': r're:^WDR Fernsehen im Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'alt_title': 'WDR Fernsehen Live', + 'upload_date': None, + 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9', + 'is_live': True, + 'subtitles': {} } }, { - 'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html', + 'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html', + 'playlist_mincount': 10, 'info_dict': { - 'id': 'mdb-103364', - 'title': 're:^WDR Fernsehen Live [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9', - 'ext': 'flv', - 'upload_date': '20150101', - 'is_live': True - }, - 'params': { - 'skip_download': True, + 'id': 'aktuelle-stunde/aktuelle-stunde-120', }, } ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - page_url = mobj.group('url') - page_id = mobj.group('id') + url_type = mobj.group('type') + page_url = mobj.group('page_url') + display_id = mobj.group('display_id') + webpage = self._download_webpage(url, display_id) - webpage = self._download_webpage(url, page_id) + js_url = self._search_regex(self._JS_URL_REGEX, webpage, 'js_url', default=None) - if mobj.group('player') is None: + if not js_url: entries = [ - self.url_result(page_url + href, 'WDR') + self.url_result(page_url + href[0], 'WDR') for href in re.findall( - r'\s*]*>\s*\s*]+href="([^"]+)"', - webpage, 'm3u8 url', default=None) - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, page_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - - direct_urls = re.findall( - r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage) - if direct_urls: - for quality, video_url in direct_urls: - formats.append({ - 'url': video_url, - 'preference': preference(quality), - 'http_headers': { - 'User-Agent': 'mobile', - }, - }) - self._sort_formats(formats) - description = self._html_search_meta('Description', webpage, 'description') - return { - 'id': page_id, - 'formats': formats, + 'id': metadata_tracker_data.get("trackerClipId", display_id), + 'display_id': display_id, 'title': title, - 'description': description, - 'thumbnail': thumbnail, + 'alt_title': metadata_tracker_data.get("trackerClipSubcategory"), + 'formats': formats, 'upload_date': upload_date, - 'is_live': is_live + 'description': self._html_search_meta("Description", webpage), + 'is_live': is_live, + 'subtitles': subtitles, } From 14f7a2b8af17d1f490c46a0a9028ba9d97cf7df2 Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister Date: Sat, 12 Mar 2016 20:14:46 +0100 Subject: [PATCH 002/501] [WDRMaus] switch current show to new WDR extractor (fixes #8562) It seems that the "current show" already uses the new WDR video-player, while all the others videos still use the old player. I just added the current show URL to the normal WDR-extractor, which works fine. This commit needs my changes from PR #8842 that fix the support for WDR. --- youtube_dl/extractor/wdr.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index f881b7300..ec81f1a28 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -15,8 +15,9 @@ from ..utils import ( class WDRIE(InfoExtractor): + _CURRENT_MAUS_URL = r'https?://www.wdrmaus.de/aktuelle-sendung/(wdr|index).php5' _PAGE_REGEX = r'/mediathek/(?P[^/]+)/(?P[^/]+)/(?P.+)\.html' - _VALID_URL = r'(?Phttps?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + _VALID_URL = r'(?Phttps?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + "|" + _CURRENT_MAUS_URL _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)' @@ -75,7 +76,18 @@ class WDRIE(InfoExtractor): 'info_dict': { 'id': 'aktuelle-stunde/aktuelle-stunde-120', }, - } + }, + { + 'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5', + 'info_dict': { + 'id': 'mdb-1096487', + 'ext': 'flv', + 'upload_date': 're:^[0-9]{8}$', + 'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$', + 'description': '- Die Sendung mit der Maus -', + }, + 'skip': 'The id changes from week to week because of the new episode' + }, ] def _real_extract(self, url): @@ -195,26 +207,17 @@ class WDRMobileIE(InfoExtractor): class WDRMausIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P[^/?#]+)(?:/index\.php5|(?[^/?#]+)((? Date: Sun, 24 Apr 2016 16:23:21 +0800 Subject: [PATCH 003/501] [generic] Unescape the video URL Fixes #9279 --- youtube_dl/extractor/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 95d233259..16c2c60d7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2045,6 +2045,7 @@ class GenericIE(InfoExtractor): entries = [] for video_url in found: + video_url = unescapeHTML(video_url) video_url = video_url.replace('\\/', '/') video_url = compat_urlparse.urljoin(url, video_url) video_id = compat_urllib_parse_unquote(os.path.basename(video_url)) From 2a7c38831cc8f789cdf4ee63f8d4450a46f45017 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 24 Apr 2016 17:01:18 +0800 Subject: [PATCH 004/501] [yahoo] Extend _VALID_URL and fix extraction Closes #9271 --- youtube_dl/extractor/yahoo.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b2d8f4b48..e2613659c 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -24,7 +24,7 @@ from .nbc import NBCSportsVPlayerIE class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?P(?Phttps?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P.+)?-(?P[0-9]+)(?:-[a-z]+)?\.html)' + _VALID_URL = r'(?P(?Phttps?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P.+)?-(?P[0-9]+)(?:-[a-z]+)?(?:\.html)?)' _TESTS = [ { 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', @@ -166,6 +166,17 @@ class YahooIE(InfoExtractor): 'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.', }, }, + { + # config['models']['applet_model']['data']['sapi'] has no query + 'url': 'https://www.yahoo.com/music/livenation/event/galactic-2016', + 'md5': 'dac0c72d502bc5facda80c9e6d5c98db', + 'info_dict': { + 'id': 'a6015640-e9e5-3efb-bb60-05589a183919', + 'ext': 'mp4', + 'description': 'Galactic', + 'title': 'Dolla Diva (feat. Maggie Koerner)', + }, + }, ] def _real_extract(self, url): @@ -202,7 +213,7 @@ class YahooIE(InfoExtractor): config = self._parse_json(config_json, display_id, fatal=False) if config: sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi') - if sapi: + if sapi and 'query' in sapi: return self._extract_info(display_id, sapi, webpage) items_json = self._search_regex( From 4f549580977ab94364fd404cdebba22575c74b91 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 24 Apr 2016 17:28:18 +0800 Subject: [PATCH 005/501] [yahoo] Update some tests One has new fields as ThePlatformIE changed, and others have changed files. --- youtube_dl/extractor/yahoo.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index e2613659c..e4f3d8937 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -38,7 +38,7 @@ class YahooIE(InfoExtractor): }, { 'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', - 'md5': 'd6e6fc6e1313c608f316ddad7b82b306', + 'md5': 'c3466d2b6d5dd6b9f41ba9ed04c24b23', 'info_dict': { 'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9', 'ext': 'mp4', @@ -49,7 +49,7 @@ class YahooIE(InfoExtractor): }, { 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed', - 'md5': '60e8ac193d8fb71997caa8fce54c6460', + 'md5': '75ffabdb87c16d4ffe8c036dc4d1c136', 'info_dict': { 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb', 'ext': 'mp4', @@ -122,7 +122,7 @@ class YahooIE(InfoExtractor): } }, { 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html', - 'md5': '989396ae73d20c6f057746fb226aa215', + 'md5': 'b17ac378b1134fa44370fb27db09a744', 'info_dict': { 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1', 'ext': 'mp4', @@ -141,6 +141,9 @@ class YahooIE(InfoExtractor): 'ext': 'flv', 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', + 'upload_date': '20150313', + 'uploader': 'NBCU-SPORTS', + 'timestamp': 1426270238, } }, { 'url': 'https://tw.news.yahoo.com/-100120367.html', @@ -148,7 +151,7 @@ class YahooIE(InfoExtractor): }, { # Query result is embedded in webpage, but explicit request to video API fails with geo restriction 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html', - 'md5': '4fbafb9c9b6f07aa8f870629f6671b35', + 'md5': '1ddbf7c850777548438e5c4f147c7b8c', 'info_dict': { 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504', 'ext': 'mp4', From d9ed362116969362e1c404aea63d9f6f3e833478 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 24 Apr 2016 17:46:25 +0800 Subject: [PATCH 006/501] [yahoo] Extract all ', webpage, 'embed url')) + + return { + '_type': 'url_transparent', + 'url': embed_url, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c9d1422e5..14b4f245f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -75,6 +75,7 @@ from .bigflix import BigflixIE from .bild import BildIE from .bilibili import BiliBiliIE from .biobiochiletv import BioBioChileTVIE +from .biqle import BIQLEIE from .bleacherreport import ( BleacherReportIE, BleacherReportCMSIE, diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 67220f1b7..041d93629 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -26,12 +26,16 @@ class VKIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - (?:m\.)?vk\.com/video_ext\.php\?.*?\boid=(?P-?\d+).*?\bid=(?P\d+)| + (?: + (?:m\.)?vk\.com/video_| + (?:www\.)?daxab.com/ + ) + ext\.php\?(?P.*?\boid=(?P-?\d+).*?\bid=(?P\d+).*)| (?: (?:m\.)?vk\.com/(?:.+?\?.*?z=)?video| - (?:www\.)?biqle\.ru/watch/ + (?:www\.)?daxab.com/embed/ ) - (?P[^s].*?)(?:\?(?:.*\blist=(?P[\da-f]+))?|%2F|$) + (?P-?\d+_\d+)(?:.*\blist=(?P[\da-f]+))? ) ''' _NETRC_MACHINE = 'vk' @@ -75,7 +79,8 @@ class VKIE(InfoExtractor): 'duration': 101, 'upload_date': '20120730', 'view_count': int, - } + }, + 'skip': 'This video has been removed from public access.', }, { # VIDEO NOW REMOVED @@ -142,7 +147,7 @@ class VKIE(InfoExtractor): 'id': 'V3K4mi0SYkc', 'ext': 'webm', 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate", - 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a', + 'description': 'md5:d9903938abdc74c738af77f527ca0596', 'duration': 178, 'upload_date': '20130116', 'uploader': "Children's Joy Foundation", @@ -173,11 +178,6 @@ class VKIE(InfoExtractor): 'url': 'https://vk.com/video205387401_164765225', 'only_matching': True, }, - { - # vk wrapper - 'url': 'http://www.biqle.ru/watch/847655_160197695', - 'only_matching': True, - }, { # pladform embed 'url': 'https://vk.com/video-76116461_171554880', @@ -217,20 +217,22 @@ class VKIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') - if not video_id: + info_url = url + if video_id: + info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id + # Some videos (removed?) can only be downloaded with list id specified + list_id = mobj.group('list_id') + if list_id: + info_url += '&list=%s' % list_id + else: + info_url = 'http://vk.com/video_ext.php?' + mobj.group('embed_query') video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id')) - info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id - - # Some videos (removed?) can only be downloaded with list id specified - list_id = mobj.group('list_id') - if list_id: - info_url += '&list=%s' % list_id - info_page = self._download_webpage(info_url, video_id) error_message = self._html_search_regex( - r'(?s)]+class="video_layer_message"[^>]*>(.+?)', + [r'(?s)]+class="video_layer_message"[^>]*>(.+?)', + r'(?s)]+id="video_ext_msg"[^>]*>(.+?)'], info_page, 'error message', default=None) if error_message: raise ExtractorError(error_message, expected=True) @@ -305,17 +307,17 @@ class VKIE(InfoExtractor): view_count = None views = self._html_search_regex( r'"mv_views_count_number"[^>]*>(.+?\bviews?)<', - info_page, 'view count', fatal=False) + info_page, 'view count', default=None) if views: view_count = str_to_int(self._search_regex( r'([\d,.]+)', views, 'view count', fatal=False)) formats = [] for k, v in data.items(): - if not k.startswith('url') and k != 'extra_data' or not v: + if not k.startswith('url') and not k.startswith('cache') and k != 'extra_data' or not v: continue height = int_or_none(self._search_regex( - r'^url(\d+)', k, 'height', default=None)) + r'^(?:url|cache)(\d+)', k, 'height', default=None)) formats.append({ 'format_id': k, 'url': v, From abc97b5eda4ed4b36cec29e9966eb1bb7bcd97ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 May 2016 22:07:30 +0600 Subject: [PATCH 149/501] [utils] Allow empty attribute values in get_element_by_attribute (Closes #9415) --- youtube_dl/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a5922b2b5..6e4573784 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -256,9 +256,9 @@ def get_element_by_attribute(attribute, value, html): m = re.search(r'''(?xs) <([a-zA-Z0-9:._-]+) - (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*? + (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*? \s+%s=['"]?%s['"]? - (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*? + (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*? \s*> (?P.*?) From 25cb7a0eebae0093a81fa1c930480fafa13feb25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 May 2016 22:11:18 +0600 Subject: [PATCH 150/501] [youtube] Allow empty attribute values in description regex --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b7c3cb63f..f3f102c30 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1326,9 +1326,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if video_description: video_description = re.sub(r'''(?x) ]*> [^<]+\.{3}\s* From 3e80e6f40d6ef76142340a2292ef2445dc79594b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 May 2016 23:35:58 +0600 Subject: [PATCH 151/501] [vevo] Allow request to api.vevo.com to fail (Closes #9417) I don't know whether this it's tempopary or api has just gone --- youtube_dl/extractor/vevo.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index c0ef08c02..30b3a9e7e 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -201,9 +201,10 @@ class VevoIE(VevoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id + json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id response = self._download_json( - json_url, video_id, 'Downloading video info', 'Unable to download info') + json_url, video_id, 'Downloading video info', + 'Unable to download info', fatal=False) or {} video_info = response.get('video') or {} artist = None featured_artist = None @@ -212,7 +213,7 @@ class VevoIE(VevoBaseIE): formats = [] if not video_info: - if response.get('statusCode') != 909: + if response and response.get('statusCode') != 909: ytid = response.get('errorInfo', {}).get('ytid') if ytid: self.report_warning( From f745403b5b448c170710256a61b8505e09e77674 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 May 2016 23:37:17 +0600 Subject: [PATCH 152/501] [vevo] Revert videoplayer.vevo.com to api.vevo.com --- youtube_dl/extractor/vevo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 30b3a9e7e..c0632cd6a 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -201,7 +201,7 @@ class VevoIE(VevoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id + json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id response = self._download_json( json_url, video_id, 'Downloading video info', 'Unable to download info', fatal=False) or {} From e2ee97dcd5c55e1c2aceae0d93fbfd64d0cc5ba3 Mon Sep 17 00:00:00 2001 From: inondle Date: Fri, 6 May 2016 12:05:37 -0700 Subject: [PATCH 153/501] [liveleak] Adds support for thumbnails, updates tests --- youtube_dl/extractor/liveleak.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index 29fba5f30..ea0565ac0 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -17,7 +17,8 @@ class LiveLeakIE(InfoExtractor): 'ext': 'flv', 'description': 'extremely bad day for this guy..!', 'uploader': 'ljfriel2', - 'title': 'Most unlucky car accident' + 'title': 'Most unlucky car accident', + 'thumbnail': 're:^https?://.*\.jpg$' } }, { 'url': 'http://www.liveleak.com/view?i=f93_1390833151', @@ -28,6 +29,7 @@ class LiveLeakIE(InfoExtractor): 'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.', 'uploader': 'ARD_Stinkt', 'title': 'German Television does first Edward Snowden Interview (ENGLISH)', + 'thumbnail': 're:^https?://.*\.jpg$' } }, { 'url': 'http://www.liveleak.com/view?i=4f7_1392687779', @@ -49,7 +51,8 @@ class LiveLeakIE(InfoExtractor): 'ext': 'mp4', 'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.', 'uploader': 'bony333', - 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia' + 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia', + 'thumbnail': 're:^https?://.*\.jpg$' } }] @@ -72,6 +75,7 @@ class LiveLeakIE(InfoExtractor): age_limit = int_or_none(self._search_regex( r'you confirm that you are ([0-9]+) years and over.', webpage, 'age limit', default=None)) + video_thumbnail = self._og_search_thumbnail(webpage) sources_raw = self._search_regex( r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None) @@ -124,4 +128,5 @@ class LiveLeakIE(InfoExtractor): 'uploader': video_uploader, 'formats': formats, 'age_limit': age_limit, + 'thumbnail': video_thumbnail, } From 3fd6332c056115e5de37b0789d907e9344c2ff5c Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 7 May 2016 15:12:20 +0100 Subject: [PATCH 154/501] [flickr] extract license field(closes #9425) --- youtube_dl/extractor/flickr.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 0a3de1498..73ae3adee 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -27,10 +27,24 @@ class FlickrIE(InfoExtractor): 'comment_count': int, 'view_count': int, 'tags': list, + 'license': 'Attribution-ShareAlike', } } - _API_BASE_URL = 'https://api.flickr.com/services/rest?' + # https://help.yahoo.com/kb/flickr/SLN25525.html + _LICENSES = { + '0': 'All Rights Reserved', + '1': 'Attribution-NonCommercial-ShareAlike', + '2': 'Attribution-NonCommercial', + '3': 'Attribution-NonCommercial-NoDerivs', + '4': 'Attribution', + '5': 'Attribution-ShareAlike', + '6': 'Attribution-NoDerivs', + '7': 'No known copyright restrictions', + '8': 'United States government work', + '9': 'Public Domain Dedication (CC0)', + '10': 'Public Domain Work', + } def _call_api(self, method, video_id, api_key, note, secret=None): query = { @@ -87,7 +101,8 @@ class FlickrIE(InfoExtractor): 'uploader': owner.get('realname'), 'comment_count': int_or_none(video_info.get('comments', {}).get('_content')), 'view_count': int_or_none(video_info.get('views')), - 'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])] + 'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])], + 'license': self._LICENSES.get(video_info.get('license')), } else: raise ExtractorError('not a video', expected=True) From cb1fa5881315ed998a366f47511b7a4b4ea067b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 7 May 2016 20:15:40 +0600 Subject: [PATCH 155/501] [flickr] Extract uploader URL (Closes #9426) --- youtube_dl/extractor/flickr.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 73ae3adee..a8e1bf42a 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -24,6 +24,7 @@ class FlickrIE(InfoExtractor): 'upload_date': '20110423', 'uploader_id': '10922353@N03', 'uploader': 'Forest Wander', + 'uploader_url': 'https://www.flickr.com/photos/forestwander-nature-pictures/', 'comment_count': int, 'view_count': int, 'tags': list, @@ -89,6 +90,9 @@ class FlickrIE(InfoExtractor): self._sort_formats(formats) owner = video_info.get('owner', {}) + uploader_id = owner.get('nsid') + uploader_path = owner.get('path_alias') or uploader_id + uploader_url = 'https://www.flickr.com/photos/%s/' % uploader_path if uploader_path else None return { 'id': video_id, @@ -97,8 +101,9 @@ class FlickrIE(InfoExtractor): 'formats': formats, 'timestamp': int_or_none(video_info.get('dateuploaded')), 'duration': int_or_none(video_info.get('video', {}).get('duration')), - 'uploader_id': owner.get('nsid'), + 'uploader_id': uploader_id, 'uploader': owner.get('realname'), + 'uploader_url': uploader_url, 'comment_count': int_or_none(video_info.get('comments', {}).get('_content')), 'view_count': int_or_none(video_info.get('views')), 'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])], From a0904c5d8024c12b7f95b1126a6b8152a4e1021f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 8 May 2016 00:56:31 +0800 Subject: [PATCH 156/501] [telegraaf] Fix extractor (closes #9318) --- youtube_dl/extractor/telegraaf.py | 58 +++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/telegraaf.py b/youtube_dl/extractor/telegraaf.py index 6f8333cfc..9092e9b85 100644 --- a/youtube_dl/extractor/telegraaf.py +++ b/youtube_dl/extractor/telegraaf.py @@ -2,14 +2,16 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import remove_end +from ..utils import ( + determine_ext, + remove_end, +) class TelegraafIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P\d+)/[^/]+\.html' _TEST = { 'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html', - 'md5': '83245a9779bcc4a24454bfd53c65b6dc', 'info_dict': { 'id': '24353229', 'ext': 'mp4', @@ -18,18 +20,60 @@ class TelegraafIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 33, }, + 'params': { + # m3u8 download + 'skip_download': True, + }, } def _real_extract(self, url): - playlist_id = self._match_id(url) + video_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) + webpage = self._download_webpage(url, video_id) + player_url = self._html_search_regex( + r']+src="([^"]+")', webpage, 'player URL') + player_page = self._download_webpage( + player_url, video_id, note='Download player webpage') playlist_url = self._search_regex( - r"iframe\.loadPlayer\('([^']+)'", webpage, 'player') + r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL') + playlist_data = self._download_json(playlist_url, video_id) + + item = playlist_data['items'][0] + formats = [] + locations = item['locations'] + for location in locations.get('adaptive', []): + manifest_url = location['src'] + ext = determine_ext(manifest_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, ext='mp4', m3u8_id='hls')) + elif ext == 'mpd': + # TODO: Current DASH formats are broken - $Time$ pattern in + # not implemented yet + continue + else: + self.report_warning('Unknown adaptive format %s' % ext) + for location in locations.get('progressive', []): + formats.append({ + 'url': location['sources'][0]['src'], + 'width': location.get('width'), + 'height': location.get('height'), + 'format_id': 'http-%s' % location['label'], + }) + + self._sort_formats(formats) - entries = self._extract_xspf_playlist(playlist_url, playlist_id) title = remove_end(self._og_search_title(webpage), ' - VIDEO') description = self._og_search_description(webpage) + duration = item.get('duration') + thumbnail = item.get('poster') - return self.playlist_result(entries, playlist_id, title, description) + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + 'duration': duration, + 'thumbnail': thumbnail, + } From e2eca6f65e9969c31b3374bd3688321f3e471cd7 Mon Sep 17 00:00:00 2001 From: Kevin Deldycke Date: Sat, 7 May 2016 20:03:25 +0200 Subject: [PATCH 157/501] Expand user's home in batch file path. --- youtube_dl/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 737f6545d..7a0466077 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -86,7 +86,9 @@ def _real_main(argv=None): if opts.batchfile == '-': batchfd = sys.stdin else: - batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore') + batchfd = io.open( + compat_expanduser(opts.batchfile), + 'r', encoding='utf-8', errors='ignore') batch_urls = read_batch_urls(batchfd) if opts.verbose: write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') From 00c21c225decf648199013f2fa3385a1332037bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 May 2016 00:11:44 +0600 Subject: [PATCH 158/501] Credit @kdeldycke for #9430 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 814fe9ec3..5f668338b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -169,3 +169,4 @@ Viťas Strádal Kagami Hiiragi Philip Huppert blahgeek +Kevin Deldycke From 5c24873a9e6a47e58b10eb0c0825e165604796f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 May 2016 02:04:34 +0600 Subject: [PATCH 159/501] Credit @inondle for #9400 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 5f668338b..bf860b7f7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -170,3 +170,4 @@ Kagami Hiiragi Philip Huppert blahgeek Kevin Deldycke +inondle From f5436c5d9e4e65790440ada40476712ff430651b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 May 2016 02:29:26 +0600 Subject: [PATCH 160/501] [downloader/external] Add temp fix ffmpeg m3u8 downloads (Closes #9394) --- youtube_dl/downloader/external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 8d642fc3e..45f49c350 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -224,7 +224,7 @@ class FFmpegFD(ExternalFD): args += ['-rtmp_live', 'live'] args += ['-i', url, '-c', 'copy'] - if protocol == 'm3u8': + if protocol in ('m3u8', 'm3u8_native'): if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': args += ['-f', 'mpegts'] else: From 3e169233daf76cd7585ebac12504f8e624b7693b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 May 2016 04:36:57 +0600 Subject: [PATCH 161/501] Expanduser for more options with input files --- youtube_dl/YoutubeDL.py | 1 + youtube_dl/__init__.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2187dcc8f..a96482e68 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -2018,6 +2018,7 @@ class YoutubeDL(object): if opts_cookiefile is None: self.cookiejar = compat_cookiejar.CookieJar() else: + opts_cookiefile = compat_expanduser(opts_cookiefile) self.cookiejar = compat_cookiejar.MozillaCookieJar( opts_cookiefile) if os.access(opts_cookiefile, os.R_OK): diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 7a0466077..cbd84c3af 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -406,7 +406,7 @@ def _real_main(argv=None): try: if opts.load_info_filename is not None: - retcode = ydl.download_with_info_file(opts.load_info_filename) + retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename)) else: retcode = ydl.download(all_urls) except MaxDownloadsReached: From 0fdbe3146c2b3825cc26aca7e918df041b0f9adf Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 08:56:22 +0900 Subject: [PATCH 162/501] use dict.get in case upload_date does not exist --- youtube_dl/extractor/afreecatv.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index aa5847677..4ebc61bae 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -66,7 +66,7 @@ class AfreecaTVIE(InfoExtractor): @staticmethod def parse_video_key(key): - video_key = {'upload_date': None, 'part': '0'} + video_key = {} m = re.match(r'^(?P\d{8})_\w+_(?P\d+)$', key) if m: video_key['upload_date'] = m.group('upload_date') @@ -92,12 +92,12 @@ class AfreecaTVIE(InfoExtractor): thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') entries = [] - for video_file in video_xml.findall('./track/video/file'): + for i, video_file in enumerate(video_xml.findall('./track/video/file')): video_key = self.parse_video_key(video_file.get('key')) entries.append({ - 'id': '%s_%s' % (video_id, video_key['part']), + 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), 'title': title, - 'upload_date': video_key['upload_date'], + 'upload_date': video_key.get('upload_date'), 'duration': int_or_none(video_file.get('duration')), 'url': video_file.text, }) From 81f35fee2fd2b58d909887aaa7667310a4d65759 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 08:56:44 +0900 Subject: [PATCH 163/501] fix extractors.py import order --- youtube_dl/extractor/extractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f85d75933..1f95530a5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -16,8 +16,8 @@ from .adobetv import ( AdobeTVVideoIE, ) from .adultswim import AdultSwimIE -from .afreecatv import AfreecaTVIE from .aenetworks import AENetworksIE +from .afreecatv import AfreecaTVIE from .aftonbladet import AftonbladetIE from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE From 9c072d38c6b0361d91e92c50cd0c753dc8ce3101 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 May 2016 06:52:42 +0600 Subject: [PATCH 164/501] [arte] Improve language preference (Closes #9401, closes #9162) --- youtube_dl/extractor/arte.py | 58 ++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 881cacfab..e37fdae13 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -161,24 +161,53 @@ class ArteTVPlus7IE(InfoExtractor): 'es': 'E[ESP]', } + langcode = LANGS.get(lang, lang) + formats = [] for format_id, format_dict in player_info['VSR'].items(): f = dict(format_dict) versionCode = f.get('versionCode') - langcode = LANGS.get(lang, lang) - lang_rexs = [r'VO?%s-' % re.escape(langcode), r'VO?.-ST%s$' % re.escape(langcode)] - lang_pref = None - if versionCode: - matched_lang_rexs = [r for r in lang_rexs if re.match(r, versionCode)] - lang_pref = -10 if not matched_lang_rexs else 10 * len(matched_lang_rexs) - source_pref = 0 - if versionCode is not None: - # The original version with subtitles has lower relevance - if re.match(r'VO-ST(F|A|E)', versionCode): - source_pref -= 10 - # The version with sourds/mal subtitles has also lower relevance - elif re.match(r'VO?(F|A|E)-STM\1', versionCode): - source_pref -= 9 + l = re.escape(langcode) + + # Language preference from most to least priority + # Reference: section 5.6.3 of + # http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf + PREFERENCES = ( + # original version in requested language, without subtitles + r'VO{0}$'.format(l), + # original version in requested language, with partial subtitles in requested language + r'VO{0}-ST{0}$'.format(l), + # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language + r'VO{0}-STM{0}$'.format(l), + # non-original (dubbed) version in requested language, without subtitles + r'V{0}$'.format(l), + # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language + r'V{0}-ST{0}$'.format(l), + # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language + r'V{0}-STM{0}$'.format(l), + # original version in requested language, with partial subtitles in different language + r'VO{0}-ST(?!{0}).+?$'.format(l), + # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language + r'VO{0}-STM(?!{0}).+?$'.format(l), + # original version in different language, with partial subtitles in requested language + r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l), + # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language + r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l), + # original version in different language, without subtitles + r'VO(?:(?!{0}))?$'.format(l), + # original version in different language, with partial subtitles in different language + r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l), + # original version in different language, with subtitles for the deaf and hard-of-hearing in different language + r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l), + ) + + for pref, p in enumerate(PREFERENCES): + if re.match(p, versionCode): + lang_pref = len(PREFERENCES) - pref + break + else: + lang_pref = -1 + format = { 'format_id': format_id, 'preference': -10 if f.get('videoFormat') == 'M3U8' else None, @@ -188,7 +217,6 @@ class ArteTVPlus7IE(InfoExtractor): 'height': int_or_none(f.get('height')), 'tbr': int_or_none(f.get('bitrate')), 'quality': qfunc(f.get('quality')), - 'source_preference': source_pref, } if f.get('mediaType') == 'rtmp': From 3452c3a27c2bfd278746314cda4247c2226a35f3 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 10:02:19 +0900 Subject: [PATCH 165/501] update tests --- youtube_dl/extractor/afreecatv.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 4ebc61bae..b90095881 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -30,7 +30,7 @@ class AfreecaTVIE(InfoExtractor): 'id': '36164052', 'ext': 'mp4', 'title': '데일리 에이프릴 요정들의 시상식!', - 'thumbnail': 're:^https?://videoimg.afreecatv.com/.*$', + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', 'uploader': 'dailyapril', 'uploader_id': 'dailyapril', 'upload_date': '20160503', @@ -40,7 +40,7 @@ class AfreecaTVIE(InfoExtractor): 'info_dict': { 'id': '36153164', 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", - 'thumbnail': 're:^https?://videoimg.afreecatv.com/.*$', + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', 'uploader': 'dailyapril', 'uploader_id': 'dailyapril', }, @@ -62,6 +62,9 @@ class AfreecaTVIE(InfoExtractor): 'upload_date': '20160502', }, }], + }, { + 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', + 'only_matching': True, }] @staticmethod From 370d4eb8ad3d9d092fc5eb116509eaf4a3e83177 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 10:02:48 +0900 Subject: [PATCH 166/501] use stricter file selector in case of empty in case of empty ./track/video/file entries --- youtube_dl/extractor/afreecatv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index b90095881..527386be3 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -95,7 +95,7 @@ class AfreecaTVIE(InfoExtractor): thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') entries = [] - for i, video_file in enumerate(video_xml.findall('./track/video/file')): + for i, video_file in enumerate(video_xml.findall('./track/video/file[@key]')): video_key = self.parse_video_key(video_file.get('key')) entries.append({ 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), @@ -119,7 +119,7 @@ class AfreecaTVIE(InfoExtractor): info['entries'] = entries elif len(entries) == 1: info['url'] = entries[0]['url'] - info['upload_date'] = entries[0]['upload_date'] + info['upload_date'] = entries[0].get('upload_date') else: raise ExtractorError( 'No files found for the specified AfreecaTV video, either' From 93fdb1417766015ddadcd13a709cdfae4de5e246 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 10:33:17 +0900 Subject: [PATCH 167/501] don't use selection by attribute --- youtube_dl/extractor/afreecatv.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 527386be3..0fcbea0d1 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -95,8 +95,10 @@ class AfreecaTVIE(InfoExtractor): thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') entries = [] - for i, video_file in enumerate(video_xml.findall('./track/video/file[@key]')): - video_key = self.parse_video_key(video_file.get('key')) + for i, video_file in enumerate(video_xml.findall('./track/video/file')): + video_key = self.parse_video_key(video_file.get('key', '')) + if not video_key: + continue entries.append({ 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), 'title': title, From 3b01a9fbb63e33325fa979db8a846d3e655e79e6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 8 May 2016 14:34:38 +0800 Subject: [PATCH 168/501] [litv] Add new extractor LiTV is a streaming platform providing free and paid legal contents in Taiwan. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/litv.py | 137 +++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 youtube_dl/extractor/litv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 14b4f245f..7bacef184 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -384,6 +384,7 @@ from .limelight import ( LimelightChannelIE, LimelightChannelListIE, ) +from .litv import LiTVIE from .liveleak import LiveLeakIE from .livestream import ( LivestreamIE, diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py new file mode 100644 index 000000000..3356d015d --- /dev/null +++ b/youtube_dl/extractor/litv.py @@ -0,0 +1,137 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + smuggle_url, + unsmuggle_url, +) + + +class LiTVIE(InfoExtractor): + _VALID_URL = r'https?://www\.litv\.tv/vod/[^/]+/content\.do\?.*?\bid=(?P[^&]+)' + + _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s' + + _TESTS = [{ + 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', + 'info_dict': { + 'id': 'VOD00041606', + 'title': '花千骨', + }, + 'playlist_count': 50, + }, { + 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', + 'info_dict': { + 'id': 'VOD00041610', + 'ext': 'mp4', + 'title': '花千骨第1集', + 'thumbnail': 're:https?://.*\.jpg$', + 'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f', + 'episode_number': 1, + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, # m3u8 download + }, + 'skip': 'Georestricted to Taiwan', + }] + + def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=True): + episode_title = view_data['title'] + content_id = season_list['contentId'] + + if prompt: + self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (content_id, video_id)) + + all_episodes = [ + self.url_result(smuggle_url( + self._URL_TEMPLATE % (view_data['contentType'], episode['contentId']), + {'force_noplaylist': True})) # To prevent infinite recursion + for episode in season_list['episode']] + + return self.playlist_result(all_episodes, content_id, episode_title) + + def _real_extract(self, url): + url, data = unsmuggle_url(url, {}) + + video_id = self._match_id(url) + + noplaylist = self._downloader.params.get('noplaylist') + noplaylist_prompt = True + if 'force_noplaylist' in data: + noplaylist = data['force_noplaylist'] + noplaylist_prompt = False + + webpage = self._download_webpage(url, video_id) + + view_data = dict(map(lambda t: (t[0], t[2]), re.findall( + r'viewData\.([a-zA-Z]+)\s*=\s*(["\'])([^"\']+)\2', + webpage))) + + vod_data = self._parse_json(self._search_regex( + 'var\s+vod\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'), + video_id) + + season_list = list(vod_data.get('seasonList', {}).values()) + if season_list: + if not noplaylist: + return self._extract_playlist( + season_list[0], video_id, vod_data, view_data, + prompt=noplaylist_prompt) + + if noplaylist_prompt: + self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + + # In browsers `getMainUrl` request is always issued. Usually this + # endpoint gives the same result as the data embedded in the webpage. + # If georestricted, there are no embedded data, so an extra request is + # necessary to get the error code + video_data = self._parse_json(self._search_regex( + r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);', + webpage, 'video data', default='{}'), video_id) + if not video_data: + payload = { + 'assetId': view_data['assetId'], + 'watchDevices': vod_data['watchDevices'], + 'contentType': view_data['contentType'], + } + video_data = self._download_json( + 'https://www.litv.tv/vod/getMainUrl', video_id, + data=json.dumps(payload).encode('utf-8'), + headers={'Content-Type': 'application/json'}) + + if not video_data.get('fullpath'): + error_msg = video_data.get('errorMessage') + if error_msg == 'vod.error.outsideregionerror': + self.raise_geo_restricted('This video is available in Taiwan only') + if error_msg: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_msg), expected=True) + raise ExtractorError('Unexpected result from %s' % self.IE_NAME) + + formats = self._extract_m3u8_formats( + video_data['fullpath'], video_id, ext='mp4', m3u8_id='hls') + for a_format in formats: + # LiTV HLS segments doesn't like compressions + a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True + + title = view_data['title'] + view_data.get('secondaryMark', '') + description = view_data.get('description') + thumbnail = view_data.get('imageFile') + categories = [item['name'] for item in vod_data.get('category', [])] + episode = int_or_none(view_data.get('episode')) + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'categories': categories, + 'episode_number': episode, + } From f23a92a0cecac0d4db60e086e429793556347271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 May 2016 20:02:54 +0600 Subject: [PATCH 169/501] [mva] Add extractor (Closes #6667) --- youtube_dl/extractor/extractors.py | 4 + .../extractor/microsoftvirtualacademy.py | 192 ++++++++++++++++++ 2 files changed, 196 insertions(+) create mode 100644 youtube_dl/extractor/microsoftvirtualacademy.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7bacef184..a0bb3d4c2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -409,6 +409,10 @@ from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE from .mgtv import MGTVIE +from .microsoftvirtualacademy import ( + MicrosoftVirtualAcademyIE, + MicrosoftVirtualAcademyCourseIE, +) from .minhateca import MinhatecaIE from .ministrygrid import MinistryGridIE from .minoto import MinotoIE diff --git a/youtube_dl/extractor/microsoftvirtualacademy.py b/youtube_dl/extractor/microsoftvirtualacademy.py new file mode 100644 index 000000000..b7fea47ee --- /dev/null +++ b/youtube_dl/extractor/microsoftvirtualacademy.py @@ -0,0 +1,192 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_xpath, +) +from ..utils import ( + int_or_none, + parse_duration, + smuggle_url, + unsmuggle_url, + xpath_text, +) + + +class MicrosoftVirtualAcademyBaseIE(InfoExtractor): + def _extract_base_url(self, course_id, display_id): + return self._download_json( + 'https://api-mlxprod.microsoft.com/services/products/anonymous/%s' % course_id, + display_id, 'Downloading course base URL') + + def _extract_chapter_and_title(self, title): + if not title: + return None, None + m = re.search(r'(?P\d+)\s*\|\s*(?P.+)', title) + return (int(m.group('chapter')), m.group('title')) if m else (None, title) + + +class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): + IE_NAME = 'mva' + IE_DESC = 'Microsoft Virtual Academy videos' + _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME + + _TESTS = [{ + 'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382', + 'md5': '7826c44fc31678b12ad8db11f6b5abb9', + 'info_dict': { + 'id': 'gfVXISmEB_6804984382', + 'ext': 'mp4', + 'title': 'Course Introduction', + 'formats': 'mincount:3', + 'subtitles': { + 'en': [{ + 'ext': 'ttml', + }], + }, + } + }, { + 'url': 'mva:11788:gfVXISmEB_6804984382', + 'only_matching': True, + }] + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + + mobj = re.match(self._VALID_URL, url) + course_id = mobj.group('course_id') + video_id = mobj.group('id') + + base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id) + + settings = self._download_xml( + '%s/content/content_%s/videosettings.xml?v=1' % (base_url, video_id), + video_id, 'Downloading video settings XML') + + _, title = self._extract_chapter_and_title(xpath_text( + settings, './/Title', 'title', fatal=True)) + + formats = [] + + for sources in settings.findall(compat_xpath('.//MediaSources')): + if sources.get('videoType') == 'smoothstreaming': + continue + for source in sources.findall(compat_xpath('./MediaSource')): + video_url = source.text + if not video_url or not video_url.startswith('http'): + continue + video_mode = source.get('videoMode') + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', video_mode or '', 'height', default=None)) + codec = source.get('codec') + acodec, vcodec = [None] * 2 + if codec: + codecs = codec.split(',') + if len(codecs) == 2: + acodec, vcodec = codecs + elif len(codecs) == 1: + vcodec = codecs[0] + formats.append({ + 'url': video_url, + 'format_id': video_mode, + 'height': height, + 'acodec': acodec, + 'vcodec': vcodec, + }) + self._sort_formats(formats) + + subtitles = {} + for source in settings.findall(compat_xpath('.//MarkerResourceSource')): + subtitle_url = source.text + if not subtitle_url: + continue + subtitles.setdefault('en', []).append({ + 'url': '%s/%s' % (base_url, subtitle_url), + 'ext': source.get('type'), + }) + + return { + 'id': video_id, + 'title': title, + 'subtitles': subtitles, + 'formats': formats + } + + +class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE): + IE_NAME = 'mva:course' + IE_DESC = 'Microsoft Virtual Academy courses' + _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME + + _TESTS = [{ + 'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788', + 'info_dict': { + 'id': '11788', + 'title': 'Microsoft Azure Fundamentals: Virtual Machines', + }, + 'playlist_count': 36, + }, { + # with emphasized chapters + 'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335', + 'info_dict': { + 'id': '16335', + 'title': 'Developing Windows 10 Games with Construct 2', + }, + 'playlist_count': 10, + }, { + 'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788', + 'only_matching': True, + }, { + 'url': 'mva:course:11788', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if MicrosoftVirtualAcademyIE.suitable(url) else super( + MicrosoftVirtualAcademyCourseIE, cls).suitable(url) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + course_id = mobj.group('id') + display_id = mobj.group('display_id') + + base_url = self._extract_base_url(course_id, display_id) + + manifest = self._download_json( + '%s/imsmanifestlite.json' % base_url, + display_id, 'Downloading course manifest JSON')['manifest'] + + organization = manifest['organizations']['organization'][0] + + entries = [] + for chapter in organization['item']: + chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title')) + chapter_id = chapter.get('@identifier') + for item in chapter.get('item', []): + item_id = item.get('@identifier') + if not item_id: + continue + metadata = item.get('resource', {}).get('metadata') or {} + if metadata.get('learningresourcetype') != 'Video': + continue + _, title = self._extract_chapter_and_title(item.get('title')) + duration = parse_duration(metadata.get('duration')) + description = metadata.get('description') + entries.append({ + '_type': 'url_transparent', + 'url': smuggle_url( + 'mva:%s:%s' % (course_id, item_id), {'base_url': base_url}), + 'title': title, + 'description': description, + 'duration': duration, + 'chapter': chapter_title, + 'chapter_number': chapter_number, + 'chapter_id': chapter_id, + }) + + title = organization.get('title') or manifest.get('metadata', {}).get('title') + + return self.playlist_result(entries, course_id, title) From c52f4efaee2386a72c3f6b694fb4f4c3132ced55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 May 2016 20:10:20 +0600 Subject: [PATCH 170/501] [mva] Improve _VALID_URLs --- youtube_dl/extractor/microsoftvirtualacademy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/microsoftvirtualacademy.py b/youtube_dl/extractor/microsoftvirtualacademy.py index b7fea47ee..afd3e98ec 100644 --- a/youtube_dl/extractor/microsoftvirtualacademy.py +++ b/youtube_dl/extractor/microsoftvirtualacademy.py @@ -31,7 +31,7 @@ class MicrosoftVirtualAcademyBaseIE(InfoExtractor): class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): IE_NAME = 'mva' IE_DESC = 'Microsoft Virtual Academy videos' - _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME + _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME _TESTS = [{ 'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382', @@ -118,7 +118,7 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE): IE_NAME = 'mva:course' IE_DESC = 'Microsoft Virtual Academy courses' - _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME + _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME _TESTS = [{ 'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788', From f1f6f5aa5e2a6d66fa54d35bf3e8b3626e85ee73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20=C4=8Cech?= <sleep_walker@suse.cz> Date: Sat, 7 May 2016 20:15:49 +0200 Subject: [PATCH 171/501] [ceskatelevize] Add support for live streams Live streams has no playlist title, use title of the stream containing TV channel name. Internal m3u8 handler doesn't seem to handle well continuous streams. Add test for live stream. Remove no longer reachable test. --- youtube_dl/extractor/ceskatelevize.py | 35 +++++++++++++++++++-------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 6652c8e42..b41888531 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -33,14 +33,13 @@ class CeskaTelevizeIE(InfoExtractor): 'skip_download': True, }, }, { - 'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', + # live stream + 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', 'info_dict': { - 'id': '61924494876844374', + 'id': 402, 'ext': 'mp4', - 'title': 'První republika: Zpěvačka z Dupárny Bobina', - 'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.', - 'thumbnail': 're:^https?://.*\.jpg', - 'duration': 88.4, + 'title': 're:ČT Sport.*', + 'is_live': True, }, 'params': { # m3u8 download @@ -118,19 +117,21 @@ class CeskaTelevizeIE(InfoExtractor): req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) req.add_header('Referer', url) - playlist_title = self._og_search_title(webpage) - playlist_description = self._og_search_description(webpage) + playlist_title = self._og_search_title(webpage, default=None) + playlist_description = self._og_search_description(webpage, default=None) playlist = self._download_json(req, playlist_id)['playlist'] playlist_len = len(playlist) entries = [] for item in playlist: + is_live = item['type'] == 'LIVE' formats = [] for format_id, stream_url in item['streamUrls'].items(): formats.extend(self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', - entry_protocol='m3u8_native', fatal=False)) + entry_protocol='m3u8' if is_live else 'm3u8_native', + fatal=False)) self._sort_formats(formats) item_id = item.get('id') or item['assetId'] @@ -145,14 +146,28 @@ class CeskaTelevizeIE(InfoExtractor): if subs: subtitles = self.extract_subtitles(episode_id, subs) + if playlist_len == 1: + if is_live: + # live streams has channel name in title + final_title = self._live_title(title) + elif playlist_title: + # title is always set (no KeyError caught) + # and gives good fallback + final_title = title + else: + final_title = playlist_title + else: + final_title = '%s (%s)' % (playlist_title, title) + entries.append({ 'id': item_id, - 'title': playlist_title if playlist_len == 1 else '%s (%s)' % (playlist_title, title), + 'title': final_title, 'description': playlist_description if playlist_len == 1 else None, 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, 'subtitles': subtitles, + 'is_live': is_live, }) return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) From 3951e7eb9305448aab6395f4303ed7ab19248c52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 20:37:20 +0600 Subject: [PATCH 172/501] [ceskatelevize] Simplify, restore bonus video test and skip georestricted test (Closes #9431) --- youtube_dl/extractor/ceskatelevize.py | 29 ++++++++++++++++++--------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index b41888531..5a58d1777 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -32,19 +32,34 @@ class CeskaTelevizeIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', + 'info_dict': { + 'id': '61924494877028507', + 'ext': 'mp4', + 'title': 'Hyde Park Civilizace: Bonus 01 - En', + 'description': 'English Subtittles', + 'thumbnail': 're:^https?://.*\.jpg', + 'duration': 81.3, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { # live stream 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', 'info_dict': { 'id': 402, 'ext': 'mp4', - 'title': 're:ČT Sport.*', + 'title': 're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'is_live': True, }, 'params': { # m3u8 download 'skip_download': True, }, + 'skip': 'Georestricted to Czech Republic', }, { # video with 18+ caution trailer 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', @@ -125,7 +140,7 @@ class CeskaTelevizeIE(InfoExtractor): entries = [] for item in playlist: - is_live = item['type'] == 'LIVE' + is_live = item.get('type') == 'LIVE' formats = [] for format_id, stream_url in item['streamUrls'].items(): formats.extend(self._extract_m3u8_formats( @@ -147,15 +162,9 @@ class CeskaTelevizeIE(InfoExtractor): subtitles = self.extract_subtitles(episode_id, subs) if playlist_len == 1: + final_title = playlist_title or title if is_live: - # live streams has channel name in title - final_title = self._live_title(title) - elif playlist_title: - # title is always set (no KeyError caught) - # and gives good fallback - final_title = title - else: - final_title = playlist_title + final_title = self._live_title(final_title) else: final_title = '%s (%s)' % (playlist_title, title) From 965fefdcd879405c3e4b5604513719353ba8474a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 20:38:33 +0600 Subject: [PATCH 173/501] Credit @sleep-walker for #9431 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index bf860b7f7..5ca71ace7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -171,3 +171,4 @@ Philip Huppert blahgeek Kevin Deldycke inondle +Tomáš Čech From c15c47d19bfeeacd42f44dd7736f175711a91346 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 20:45:03 +0600 Subject: [PATCH 174/501] [downloader/hls] Remove EXT-X-MEDIA-SEQUENCE from unsupported features for hlsnative --- youtube_dl/downloader/hls.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index d7b34bde3..dcedc9a64 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -23,7 +23,9 @@ class HlsFD(FragmentFD): UNSUPPORTED_FEATURES = ( r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1] r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] - r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] + # Live streams heuristic does not always work (e.g. geo restricted to Germany + # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) + #r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 From 6104cc2985c36e996df1aae7cfcc686f3bae0b82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 20:55:37 +0600 Subject: [PATCH 175/501] [downloader/hls] Add event media playlists to unsupported features of hlsnative --- youtube_dl/downloader/hls.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index dcedc9a64..a8279718b 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -26,9 +26,12 @@ class HlsFD(FragmentFD): # Live streams heuristic does not always work (e.g. geo restricted to Germany # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) #r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] + r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of + # event media playlists [4] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 + # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 ) return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) From fe40f9eef2483748ed83c9749f35220143d8cc9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 21:55:03 +0600 Subject: [PATCH 176/501] [compat] Add compat_setenv --- test/test_compat.py | 8 ++++++++ youtube_dl/compat.py | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/test/test_compat.py b/test/test_compat.py index 618668210..0d751a856 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -13,6 +13,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.utils import get_filesystem_encoding from youtube_dl.compat import ( compat_getenv, + compat_setenv, compat_etree_fromstring, compat_expanduser, compat_shlex_split, @@ -31,6 +32,13 @@ class TestCompat(unittest.TestCase): else test_str.encode(get_filesystem_encoding())) self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str) + def test_compat_setenv(self): + test_var = 'YOUTUBE-DL-TEST' + test_str = 'тест' + compat_setenv(test_var, test_str) + compat_getenv(test_var) + self.assertEqual(compat_getenv(test_var), test_str) + def test_compat_expanduser(self): old_home = os.environ.get('HOME') test_str = 'C:\Documents and Settings\тест\Application Data' diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 0b6c5ca7a..12b53cdc8 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -373,6 +373,9 @@ compat_os_name = os._name if os.name == 'java' else os.name if sys.version_info >= (3, 0): compat_getenv = os.getenv compat_expanduser = os.path.expanduser + + def compat_setenv(key, value, env=os.environ): + env[key] = value else: # Environment variables should be decoded with filesystem encoding. # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918) @@ -384,6 +387,12 @@ else: env = env.decode(get_filesystem_encoding()) return env + def compat_setenv(key, value, env=os.environ): + def encode(v): + from .utils import get_filesystem_encoding + return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v + env[encode(key)] = encode(value) + # HACK: The default implementations of os.path.expanduser from cpython do not decode # environment variables with filesystem encoding. We will work around this by # providing adjusted implementations. @@ -604,6 +613,7 @@ __all__ = [ 'compat_os_name', 'compat_parse_qs', 'compat_print', + 'compat_setenv', 'compat_shlex_split', 'compat_socket_create_connection', 'compat_str', From 129263875403841da485ac74b09960d862d23f63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 21:58:38 +0600 Subject: [PATCH 177/501] [test_compat] Use compat_setenv --- test/test_compat.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index 0d751a856..afe6bd528 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -27,9 +27,7 @@ from youtube_dl.compat import ( class TestCompat(unittest.TestCase): def test_compat_getenv(self): test_str = 'тест' - os.environ['YOUTUBE-DL-TEST'] = ( - test_str if sys.version_info >= (3, 0) - else test_str.encode(get_filesystem_encoding())) + compat_setenv('YOUTUBE-DL-TEST', test_str) self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str) def test_compat_setenv(self): @@ -42,11 +40,9 @@ class TestCompat(unittest.TestCase): def test_compat_expanduser(self): old_home = os.environ.get('HOME') test_str = 'C:\Documents and Settings\тест\Application Data' - os.environ['HOME'] = ( - test_str if sys.version_info >= (3, 0) - else test_str.encode(get_filesystem_encoding())) + compat_setenv('HOME', test_str) self.assertEqual(compat_expanduser('~'), test_str) - os.environ['HOME'] = old_home + compat_setenv('HOME', old_home) def test_all_present(self): import youtube_dl.compat From 20cfdcc910d0bc2ee4b0ee38bdf5e6ecb67e5731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 22:00:14 +0600 Subject: [PATCH 178/501] [test_compat] Avoid None values for compat_setenv --- test/test_compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_compat.py b/test/test_compat.py index afe6bd528..b20814249 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -42,7 +42,7 @@ class TestCompat(unittest.TestCase): test_str = 'C:\Documents and Settings\тест\Application Data' compat_setenv('HOME', test_str) self.assertEqual(compat_expanduser('~'), test_str) - compat_setenv('HOME', old_home) + compat_setenv('HOME', old_home or '') def test_all_present(self): import youtube_dl.compat From e62d9c5caaa972ef4b1ed5d6ab5ee4a087a4ba95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 22:05:12 +0600 Subject: [PATCH 179/501] [downloader/external] Call ffmpeg with with HTTP_PROXY env variable set (#9437) --- youtube_dl/downloader/external.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 45f49c350..3a73cee1c 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -6,6 +6,7 @@ import sys import re from .common import FileDownloader +from ..compat import compat_setenv from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS from ..utils import ( cli_option, @@ -198,6 +199,18 @@ class FFmpegFD(ExternalFD): '-headers', ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] + env = None + proxy = self.params.get('proxy') + if proxy: + if not re.match(r'^[\da-zA-Z]+://', proxy): + proxy = 'http://%s' % proxy + # Since December 2015 ffmpeg supports -http_proxy option (see + # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) + # We could switch to the following code if we are able to detect version properly + # args += ['-http_proxy', proxy] + env = os.environ.copy() + compat_setenv('HTTP_PROXY', proxy, env=env) + protocol = info_dict.get('protocol') if protocol == 'rtmp': @@ -239,7 +252,7 @@ class FFmpegFD(ExternalFD): self._debug_cmd(args) - proc = subprocess.Popen(args, stdin=subprocess.PIPE) + proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) try: retval = proc.wait() except KeyboardInterrupt: From fad7bbec3a1fb62964c8e6637dfd535fabe9c133 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 22:15:55 +0600 Subject: [PATCH 180/501] [test_compat] Remove unused import --- test/test_compat.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_compat.py b/test/test_compat.py index b20814249..9adf75763 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -10,7 +10,6 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.utils import get_filesystem_encoding from youtube_dl.compat import ( compat_getenv, compat_setenv, From 2937590e8b70384ef91bdadbb56a55897aab0837 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 22:16:33 +0600 Subject: [PATCH 181/501] [downloader/hls] PEP 8 --- youtube_dl/downloader/hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index a8279718b..62136ee54 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -25,7 +25,7 @@ class HlsFD(FragmentFD): r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] # Live streams heuristic does not always work (e.g. geo restricted to Germany # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) - #r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] + # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of # event media playlists [4] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 From 4350b74545ea3d3ce072444655613bc6974d5516 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 23 Apr 2016 15:44:34 +0800 Subject: [PATCH 182/501] [socks] Add socks.py from @bluec0re's public domain implementation https://gist.github.com/bluec0re/cafd3764412967417fd3 --- youtube_dl/socks.py | 336 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 336 insertions(+) create mode 100644 youtube_dl/socks.py diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py new file mode 100644 index 000000000..b0c36a189 --- /dev/null +++ b/youtube_dl/socks.py @@ -0,0 +1,336 @@ +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# For more information, please refer to <http://unlicense.org/> +# +# Example: +# import socks +# import ftplib +# import socket +# +# socks.patch_socket() +# +# f = ftplib.FTP('ftp.kernel.org') +# f.login() +# print f.retrlines('LIST') +# f.quit() +# +# s = socket.create_connection(('www.google.com', 80)) +# s.sendall('HEAD / HTTP/1.0\r\n\r\n') +# print s.recv(1024) +# s.close() +from __future__ import unicode_literals +import os +import struct +import socket +import time + +__author__ = 'Timo Schmid <coding@timoschmid.de>' + +_orig_socket = socket.socket + +try: + from collections import namedtuple +except ImportError: + from Collections import namedtuple + +try: + from urllib.parse import urlparse +except: + from urlparse import urlparse + +try: + from enum import Enum +except ImportError: + Enum = object + + +class ProxyError(IOError): pass +class Socks4Error(ProxyError): + CODES = { + 0x5B: 'request rejected or failed', + 0x5C: 'request rejected becasue SOCKS server cannot connect to identd on the client', + 0x5D: 'request rejected because the client program and identd report different user-ids' + } + def __init__(self, code=None, msg=None): + if code is not None and msg is None: + msg = self.CODES.get(code) + if msg is None: + msg = 'unknown error' + super(Socks4Error, self).__init__(code, msg) + +class Socks5Error(Socks4Error): + CODES = { + 0x01: 'general SOCKS server failure', + 0x02: 'connection not allowed by ruleset', + 0x03: 'Network unreachable', + 0x04: 'Host unreachable', + 0x05: 'Connection refused', + 0x06: 'TTL expired', + 0x07: 'Command not supported', + 0x08: 'Address type not supported', + 0xFE: 'unknown username or invalid password', + 0xFF: 'all offered authentication methods were rejected' + } + +class ProxyType(Enum): + SOCKS4 = 0 + SOCKS4A = 1 + SOCKS5 = 2 + +Proxy = namedtuple('Proxy', ('type', 'host', 'port', 'username', 'password', 'remote_dns')) + +_default_proxy = None + +def setdefaultproxy(proxytype=None, addr=None, port=None, rdns=True, username=None, password=None, allow_env_override=True): + global _default_proxy + if allow_env_override: + all_proxy = os.environ.get('ALL_PROXY', os.environ.get('all_proxy')) + if all_proxy: + all_proxy = urlparse(all_proxy) + if all_proxy.scheme.startswith('socks'): + if all_proxy.scheme == 'socks' or all_proxy.scheme == 'socks4': + proxytype = ProxyType.SOCKS4 + elif all_proxy.scheme == 'socks4a': + proxytype = ProxyType.SOCKS4A + elif all_proxy.scheme == 'socks5': + proxytype = ProxyType.SOCKS5 + addr = all_proxy.hostname + port = all_proxy.port + username = all_proxy.username + password = all_proxy.password + + if proxytype is not None: + _default_proxy = Proxy(proxytype, addr, port, username, password, rdns) + + +def wrap_socket(sock): + return socksocket(_sock=sock._sock) + +def wrap_module(module): + if hasattr(module, 'socket'): + sock = module.socket + if isinstance(sock, socket.socket): + module.socket = sockssocket + elif hasattr(socket, 'socket'): + socket.socket = sockssocket + +def patch_socket(): + import sys + if 'socket' not in sys.modules: + import socket + sys.modules['socket'].socket = sockssocket + + +class sockssocket(socket.socket): + def __init__(self, *args, **kwargs): + self.__proxy = None + if 'proxy' in kwargs: + self.__proxy = kwargs['proxy'] + del kwargs['proxy'] + super(sockssocket, self).__init__(*args, **kwargs) + + @property + def _proxy(self): + if self.__proxy: + return self.__proxy + return _default_proxy + + @property + def _proxy_port(self): + if self._proxy: + if self._proxy.port: + return self._proxy.port + return 1080 + return None + + def setproxy(self, proxytype=None, addr=None, port=None, rdns=True, username=None, password=None): + if proxytype is None: + self.__proxy = None + else: + self.__proxy = Proxy(proxytype, addr, port, username, password, rdns) + + def recvall(self, cnt): + data = b'' + while len(data) < cnt: + cur = self.recv(cnt - len(data)) + if not cur: + raise IOError("{0} bytes missing".format(cnt-len(data))) + data += cur + return data + + def _setup_socks4(self, address, is_4a=False): + destaddr, port = address + + try: + ipaddr = socket.inet_aton(destaddr) + except socket.error: + if is_4a and self._proxy.remote_dns: + ipaddr = struct.pack('!BBBB', 0, 0, 0, 0xFF) + else: + ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + + packet = struct.pack('!BBH', 0x4, 0x1, port) + ipaddr + if self._proxy.username: + username = self._proxy.username + if hasattr(username, 'encode'): + username = username.encode() + packet += struct.pack('!{0}s'.format(len(username)+1), username) + else: + packet += b'\x00' + + if is_4a and self._proxy.remote_dns: + if hasattr(destaddr, 'encode'): + destaddr = destaddr.encode() + packet += struct.pack('!{0}s'.format(len(destaddr)+1), destaddr) + + self.sendall(packet) + + packet = self.recvall(8) + nbyte, resp_code, dstport, dsthost = struct.unpack('!BBHI', packet) + + # check valid response + if nbyte != 0x00: + self.close() + raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(0, nbyte)) + + # access granted + if resp_code != 0x5a: + self.close() + raise Socks4Error(resp_code) + + def _setup_socks5(self, address): + destaddr, port = address + + try: + ipaddr = socket.inet_aton(destaddr) + except socket.error: + if self._proxy.remote_dns: + ipaddr = None + else: + ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + + auth_methods = 1 + if self._proxy.username and self._proxy.password: + # two auth methods available + auth_methods = 2 + packet = struct.pack('!BBB', 0x5, auth_methods, 0x00) # no auth + if self._proxy.username and self._proxy.password: + packet += struct.pack('!B', 0x02) # user/pass auth + + self.sendall(packet) + + packet = self.recvall(2) + version, method = struct.unpack('!BB', packet) + + # check valid response + if version != 0x05: + self.close() + raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(5, version)) + + # no auth methods + if method == 0xFF: + self.close() + raise Socks5Error(method) + + # user/pass auth + if method == 0x01: + username = self._proxy.username + if hasattr(username, 'encode'): + username = username.encode() + password = self._proxy.password + if hasattr(password, 'encode'): + password = password.encode() + packet = struct.pack('!BB', 1, len(username)) + username + packet += struct.pack('!B', len(password)) + password + self.sendall(packet) + + packet = self.recvall(2) + version, status = struct.unpack('!BB', packet) + + if version != 0x01: + self.close() + raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(1, version)) + + if status != 0x00: + self.close() + raise Socks5Error(1) + elif method == 0x00: # no auth + pass + + + packet = struct.pack('!BBB', 5, 1, 0) + if ipaddr is None: + if hasattr(destaddr, 'encode'): + destaddr = destaddr.encode() + packet += struct.pack('!BB', 3, len(destaddr)) + destaddr + else: + packet += struct.pack('!B', 1) + ipaddr + packet += struct.pack('!H', port) + + self.sendall(packet) + + packet = self.recvall(4) + version, status, _, atype = struct.unpack('!BBBB', packet) + + if version != 0x05: + self.close() + raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(5, version)) + + if status != 0x00: + self.close() + raise Socks5Error(status) + + if atype == 0x01: + destaddr = self.recvall(4) + elif atype == 0x03: + alen = struct.unpack('!B', self.recv(1))[0] + destaddr = self.recvall(alen) + elif atype == 0x04: + destaddr = self.recvall(16) + destport = struct.unpack('!H', self.recvall(2))[0] + + def _make_proxy(self, connect_func, address): + if self._proxy.type == ProxyType.SOCKS4: + result = connect_func(self, (self._proxy.host, self._proxy_port)) + if result != 0 and result is not None: + return result + self._setup_socks4(address) + elif self._proxy.type == ProxyType.SOCKS4A: + result = connect_func(self, (self._proxy.host, self._proxy_port)) + if result != 0 and result is not None: + return result + self._setup_socks4(address, is_4a=True) + elif self._proxy.type == ProxyType.SOCKS5: + result = connect_func(self, (self._proxy.host, self._proxy_port)) + if result != 0 and result is not None: + return result + self._setup_socks5(address) + else: + return connect_func(self, address) + + def connect(self, address): + self._make_proxy(_orig_socket.connect, address) + + def connect_ex(self, address): + return self._make_proxy(_orig_socket.connect_ex, address) From dab0daeeb0929b9b560d2b9a5f39c1e2e6dfa449 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 23 Apr 2016 18:28:49 +0800 Subject: [PATCH 183/501] [utils,compat] Move struct_pack and struct_unpack to compat.py --- test/test_compat.py | 5 +++++ test/test_utils.py | 4 ---- youtube_dl/compat.py | 23 +++++++++++++++++++++++ youtube_dl/downloader/f4m.py | 4 ++-- youtube_dl/extractor/rtve.py | 4 +++- youtube_dl/swfinterp.py | 6 ++++-- youtube_dl/utils.py | 20 +------------------- 7 files changed, 38 insertions(+), 28 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index 9adf75763..dd62a5d6b 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -20,6 +20,7 @@ from youtube_dl.compat import ( compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlencode, + struct_unpack, ) @@ -102,5 +103,9 @@ class TestCompat(unittest.TestCase): self.assertTrue(isinstance(doc.find('chinese').text, compat_str)) self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str)) + def test_struct_unpack(self): + self.assertEqual(struct_unpack('!B', b'\x00'), (0,)) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 00ada95ec..5702ffa97 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -55,7 +55,6 @@ from youtube_dl.utils import ( smuggle_url, str_to_int, strip_jsonp, - struct_unpack, timeconvert, unescapeHTML, unified_strdate, @@ -457,9 +456,6 @@ class TestUtil(unittest.TestCase): testPL(5, 2, (2, 99), [2, 3, 4]) testPL(5, 2, (20, 99), []) - def test_struct_unpack(self): - self.assertEqual(struct_unpack('!B', b'\x00'), (0,)) - def test_read_batch_urls(self): f = io.StringIO('''\xef\xbb\xbf foo bar\r diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 12b53cdc8..f697bee7e 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -11,6 +11,7 @@ import re import shlex import shutil import socket +import struct import subprocess import sys import itertools @@ -592,6 +593,26 @@ if sys.version_info >= (3, 0): else: from tokenize import generate_tokens as compat_tokenize_tokenize + +try: + struct.pack('!I', 0) +except TypeError: + # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument + # See https://bugs.python.org/issue19099 + def struct_pack(spec, *args): + if isinstance(spec, compat_str): + spec = spec.encode('ascii') + return struct.pack(spec, *args) + + def struct_unpack(spec, *args): + if isinstance(spec, compat_str): + spec = spec.encode('ascii') + return struct.unpack(spec, *args) +else: + struct_pack = struct.pack + struct_unpack = struct.unpack + + __all__ = [ 'compat_HTMLParser', 'compat_HTTPError', @@ -634,6 +655,8 @@ __all__ = [ 'compat_xml_parse_error', 'compat_xpath', 'shlex_quote', + 'struct_pack', + 'struct_unpack', 'subprocess_check_output', 'workaround_optparse_bug9161', ] diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 664d87543..b282fe3d6 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -12,13 +12,13 @@ from ..compat import ( compat_urlparse, compat_urllib_error, compat_urllib_parse_urlparse, + struct_pack, + struct_unpack, ) from ..utils import ( encodeFilename, fix_xml_ampersands, sanitize_open, - struct_pack, - struct_unpack, xpath_text, ) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 79af47715..f59040877 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -6,6 +6,9 @@ import re import time from .common import InfoExtractor +from ..compat import ( + struct_unpack, +) from ..utils import ( ExtractorError, float_or_none, @@ -13,7 +16,6 @@ from ..utils import ( remove_start, sanitized_Request, std_headers, - struct_unpack, ) diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 06c1d6cc1..86b28716c 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -4,10 +4,12 @@ import collections import io import zlib -from .compat import compat_str +from .compat import ( + compat_str, + struct_unpack, +) from .utils import ( ExtractorError, - struct_unpack, ) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6e4573784..fa16a42ad 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -26,7 +26,6 @@ import platform import re import socket import ssl -import struct import subprocess import sys import tempfile @@ -53,6 +52,7 @@ from .compat import ( compat_urlparse, compat_xpath, shlex_quote, + struct_pack, ) @@ -1761,24 +1761,6 @@ def escape_url(url): fragment=escape_rfc3986(url_parsed.fragment) ).geturl() -try: - struct.pack('!I', 0) -except TypeError: - # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument - # See https://bugs.python.org/issue19099 - def struct_pack(spec, *args): - if isinstance(spec, compat_str): - spec = spec.encode('ascii') - return struct.pack(spec, *args) - - def struct_unpack(spec, *args): - if isinstance(spec, compat_str): - spec = spec.encode('ascii') - return struct.unpack(spec, *args) -else: - struct_pack = struct.pack - struct_unpack = struct.unpack - def read_batch_urls(batch_fd): def fixup(url): From 71aff18809a70b7fa32d8fd07f4fb2f64641aea5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 23 Apr 2016 21:30:06 +0800 Subject: [PATCH 184/501] [socks] Support SOCKS proxies --- youtube_dl/socks.py | 196 ++++++++++++-------------------------------- youtube_dl/utils.py | 63 +++++++++++++- 2 files changed, 115 insertions(+), 144 deletions(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index b0c36a189..95795b5a9 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -1,77 +1,30 @@ -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# For more information, please refer to <http://unlicense.org/> -# -# Example: -# import socks -# import ftplib -# import socket -# -# socks.patch_socket() -# -# f = ftplib.FTP('ftp.kernel.org') -# f.login() -# print f.retrlines('LIST') -# f.quit() -# -# s = socket.create_connection(('www.google.com', 80)) -# s.sendall('HEAD / HTTP/1.0\r\n\r\n') -# print s.recv(1024) -# s.close() +# Public Domain SOCKS proxy protocol implementation +# Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3 + from __future__ import unicode_literals -import os -import struct + +import collections import socket -import time + +from .compat import ( + struct_pack, + struct_unpack, +) __author__ = 'Timo Schmid <coding@timoschmid.de>' -_orig_socket = socket.socket -try: - from collections import namedtuple -except ImportError: - from Collections import namedtuple - -try: - from urllib.parse import urlparse -except: - from urlparse import urlparse - -try: - from enum import Enum -except ImportError: - Enum = object +class ProxyError(IOError): + pass -class ProxyError(IOError): pass class Socks4Error(ProxyError): CODES = { 0x5B: 'request rejected or failed', 0x5C: 'request rejected becasue SOCKS server cannot connect to identd on the client', 0x5D: 'request rejected because the client program and identd report different user-ids' } + def __init__(self, code=None, msg=None): if code is not None and msg is None: msg = self.CODES.get(code) @@ -79,6 +32,7 @@ class Socks4Error(ProxyError): msg = 'unknown error' super(Socks4Error, self).__init__(code, msg) + class Socks5Error(Socks4Error): CODES = { 0x01: 'general SOCKS server failure', @@ -93,68 +47,19 @@ class Socks5Error(Socks4Error): 0xFF: 'all offered authentication methods were rejected' } -class ProxyType(Enum): - SOCKS4 = 0 + +class ProxyType(object): + SOCKS4 = 0 SOCKS4A = 1 - SOCKS5 = 2 + SOCKS5 = 2 -Proxy = namedtuple('Proxy', ('type', 'host', 'port', 'username', 'password', 'remote_dns')) - -_default_proxy = None - -def setdefaultproxy(proxytype=None, addr=None, port=None, rdns=True, username=None, password=None, allow_env_override=True): - global _default_proxy - if allow_env_override: - all_proxy = os.environ.get('ALL_PROXY', os.environ.get('all_proxy')) - if all_proxy: - all_proxy = urlparse(all_proxy) - if all_proxy.scheme.startswith('socks'): - if all_proxy.scheme == 'socks' or all_proxy.scheme == 'socks4': - proxytype = ProxyType.SOCKS4 - elif all_proxy.scheme == 'socks4a': - proxytype = ProxyType.SOCKS4A - elif all_proxy.scheme == 'socks5': - proxytype = ProxyType.SOCKS5 - addr = all_proxy.hostname - port = all_proxy.port - username = all_proxy.username - password = all_proxy.password - - if proxytype is not None: - _default_proxy = Proxy(proxytype, addr, port, username, password, rdns) - - -def wrap_socket(sock): - return socksocket(_sock=sock._sock) - -def wrap_module(module): - if hasattr(module, 'socket'): - sock = module.socket - if isinstance(sock, socket.socket): - module.socket = sockssocket - elif hasattr(socket, 'socket'): - socket.socket = sockssocket - -def patch_socket(): - import sys - if 'socket' not in sys.modules: - import socket - sys.modules['socket'].socket = sockssocket +Proxy = collections.namedtuple('Proxy', ('type', 'host', 'port', 'username', 'password', 'remote_dns')) class sockssocket(socket.socket): - def __init__(self, *args, **kwargs): - self.__proxy = None - if 'proxy' in kwargs: - self.__proxy = kwargs['proxy'] - del kwargs['proxy'] - super(sockssocket, self).__init__(*args, **kwargs) - @property def _proxy(self): - if self.__proxy: - return self.__proxy - return _default_proxy + return self.__proxy @property def _proxy_port(self): @@ -175,7 +80,7 @@ class sockssocket(socket.socket): while len(data) < cnt: cur = self.recv(cnt - len(data)) if not cur: - raise IOError("{0} bytes missing".format(cnt-len(data))) + raise IOError('{0} bytes missing'.format(cnt - len(data))) data += cur return data @@ -186,39 +91,42 @@ class sockssocket(socket.socket): ipaddr = socket.inet_aton(destaddr) except socket.error: if is_4a and self._proxy.remote_dns: - ipaddr = struct.pack('!BBBB', 0, 0, 0, 0xFF) + ipaddr = struct_pack('!BBBB', 0, 0, 0, 0xFF) else: ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) - packet = struct.pack('!BBH', 0x4, 0x1, port) + ipaddr + packet = struct_pack('!BBH', 0x4, 0x1, port) + ipaddr if self._proxy.username: username = self._proxy.username if hasattr(username, 'encode'): username = username.encode() - packet += struct.pack('!{0}s'.format(len(username)+1), username) + packet += struct_pack('!{0}s'.format(len(username) + 1), username) else: packet += b'\x00' if is_4a and self._proxy.remote_dns: if hasattr(destaddr, 'encode'): destaddr = destaddr.encode() - packet += struct.pack('!{0}s'.format(len(destaddr)+1), destaddr) + packet += struct_pack('!{0}s'.format(len(destaddr) + 1), destaddr) self.sendall(packet) packet = self.recvall(8) - nbyte, resp_code, dstport, dsthost = struct.unpack('!BBHI', packet) + nbyte, resp_code, dstport, dsthost = struct_unpack('!BBHI', packet) # check valid response if nbyte != 0x00: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(0, nbyte)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(0, nbyte)) # access granted if resp_code != 0x5a: self.close() raise Socks4Error(resp_code) + return (dsthost, dstport) + def _setup_socks5(self, address): destaddr, port = address @@ -234,19 +142,20 @@ class sockssocket(socket.socket): if self._proxy.username and self._proxy.password: # two auth methods available auth_methods = 2 - packet = struct.pack('!BBB', 0x5, auth_methods, 0x00) # no auth + packet = struct_pack('!BBB', 0x5, auth_methods, 0x00) # no auth if self._proxy.username and self._proxy.password: - packet += struct.pack('!B', 0x02) # user/pass auth + packet += struct_pack('!B', 0x02) # user/pass auth self.sendall(packet) packet = self.recvall(2) - version, method = struct.unpack('!BB', packet) + version, method = struct_unpack('!BB', packet) # check valid response if version != 0x05: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(5, version)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(5, version)) # no auth methods if method == 0xFF: @@ -261,41 +170,42 @@ class sockssocket(socket.socket): password = self._proxy.password if hasattr(password, 'encode'): password = password.encode() - packet = struct.pack('!BB', 1, len(username)) + username - packet += struct.pack('!B', len(password)) + password + packet = struct_pack('!BB', 1, len(username)) + username + packet += struct_pack('!B', len(password)) + password self.sendall(packet) packet = self.recvall(2) - version, status = struct.unpack('!BB', packet) + version, status = struct_unpack('!BB', packet) if version != 0x01: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(1, version)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(1, version)) if status != 0x00: self.close() raise Socks5Error(1) - elif method == 0x00: # no auth + elif method == 0x00: # no auth pass - - packet = struct.pack('!BBB', 5, 1, 0) + packet = struct_pack('!BBB', 5, 1, 0) if ipaddr is None: if hasattr(destaddr, 'encode'): destaddr = destaddr.encode() - packet += struct.pack('!BB', 3, len(destaddr)) + destaddr + packet += struct_pack('!BB', 3, len(destaddr)) + destaddr else: - packet += struct.pack('!B', 1) + ipaddr - packet += struct.pack('!H', port) + packet += struct_pack('!B', 1) + ipaddr + packet += struct_pack('!H', port) self.sendall(packet) packet = self.recvall(4) - version, status, _, atype = struct.unpack('!BBBB', packet) + version, status, _, atype = struct_unpack('!BBBB', packet) if version != 0x05: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(5, version)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(5, version)) if status != 0x00: self.close() @@ -304,11 +214,13 @@ class sockssocket(socket.socket): if atype == 0x01: destaddr = self.recvall(4) elif atype == 0x03: - alen = struct.unpack('!B', self.recv(1))[0] + alen = struct_unpack('!B', self.recv(1))[0] destaddr = self.recvall(alen) elif atype == 0x04: destaddr = self.recvall(16) - destport = struct.unpack('!H', self.recvall(2))[0] + destport = struct_unpack('!H', self.recvall(2))[0] + + return (destaddr, destport) def _make_proxy(self, connect_func, address): if self._proxy.type == ProxyType.SOCKS4: @@ -330,7 +242,7 @@ class sockssocket(socket.socket): return connect_func(self, address) def connect(self, address): - self._make_proxy(_orig_socket.connect, address) + self._make_proxy(socket.socket.connect, address) def connect_ex(self, address): - return self._make_proxy(_orig_socket.connect_ex, address) + return self._make_proxy(socket.socket.connect_ex, address) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index fa16a42ad..b2e4a2dfb 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -55,6 +55,11 @@ from .compat import ( struct_pack, ) +from .socks import ( + ProxyType, + sockssocket, +) + # This is not clearly defined otherwise compiled_regex_type = type(re.compile('')) @@ -752,8 +757,15 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): self._params = params def http_open(self, req): + conn_class = compat_http_client.HTTPConnection + + socks_proxy = req.headers.get('Ytdl-socks-proxy') + if socks_proxy: + conn_class = make_socks_conn_class(conn_class, socks_proxy) + del req.headers['Ytdl-socks-proxy'] + return self.do_open(functools.partial( - _create_http_connection, self, compat_http_client.HTTPConnection, False), + _create_http_connection, self, conn_class, False), req) @staticmethod @@ -849,6 +861,41 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): https_response = http_response +def make_socks_conn_class(base_class, socks_proxy): + assert issubclass(base_class, ( + compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection)) + + url_components = compat_urlparse.urlparse(socks_proxy) + if url_components.scheme.lower() == 'socks5': + socks_type = ProxyType.SOCKS5 + elif url_components.scheme.lower() in ('socks', 'socks4'): + socks_type = ProxyType.SOCKS4 + + proxy_args = ( + socks_type, + url_components.hostname, url_components.port or 1080, + True, # Remote DNS + url_components.username, url_components.password + ) + + class SocksConnection(base_class): + def connect(self): + self.sock = sockssocket() + self.sock.setproxy(*proxy_args) + if type(self.timeout) in (int, float): + self.sock.settimeout(self.timeout) + self.sock.connect((self.host, self.port)) + + if isinstance(self, compat_http_client.HTTPSConnection): + if hasattr(self, '_context'): # Python > 2.6 + self.sock = self._context.wrap_socket( + self.sock, server_hostname=self.host) + else: + self.sock = ssl.wrap_socket(self.sock) + + return SocksConnection + + class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): def __init__(self, params, https_conn_class=None, *args, **kwargs): compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs) @@ -857,12 +904,20 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): def https_open(self, req): kwargs = {} + conn_class = self._https_conn_class + if hasattr(self, '_context'): # python > 2.6 kwargs['context'] = self._context if hasattr(self, '_check_hostname'): # python 3.x kwargs['check_hostname'] = self._check_hostname + + socks_proxy = req.headers.get('Ytdl-socks-proxy') + if socks_proxy: + conn_class = make_socks_conn_class(conn_class, socks_proxy) + del req.headers['Ytdl-socks-proxy'] + return self.do_open(functools.partial( - _create_http_connection, self, self._https_conn_class, True), + _create_http_connection, self, conn_class, True), req, **kwargs) @@ -2683,6 +2738,10 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): if proxy == '__noproxy__': return None # No Proxy + if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks5'): + req.add_header('Ytdl-socks-proxy', proxy) + # youtube-dl's http/https handlers do wrapping the socket with socks + return None return compat_urllib_request.ProxyHandler.proxy_open( self, req, proxy, type) From 72f3289ac48d8dbfe1ee3fd2d82a23f1bff045df Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 23 Apr 2016 21:30:44 +0800 Subject: [PATCH 185/501] [test/test_socks] Add tests for SOCKS proxies --- .gitignore | 1 + Makefile | 2 +- test/helper.py | 5 ++++ test/test_socks.py | 71 ++++++++++++++++++++++++++++++++++++++++++++++ tox.ini | 1 + 5 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 test/test_socks.py diff --git a/.gitignore b/.gitignore index 72c10425d..0e7128551 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ updates_key.pem *.part *.swp test/testdata +test/local_parameters.json .tox youtube-dl.zsh .idea diff --git a/Makefile b/Makefile index c9ce216d1..5d7cd5a7e 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ test: ot: offlinetest offlinetest: codetest - $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py + $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py --exclude test_socks.py tar: youtube-dl.tar.gz diff --git a/test/helper.py b/test/helper.py index b8e22c5cb..dfee217a9 100644 --- a/test/helper.py +++ b/test/helper.py @@ -24,8 +24,13 @@ from youtube_dl.utils import ( def get_params(override=None): PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") + LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), + "local_parameters.json") with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) + if os.path.exists(LOCAL_PARAMETERS_FILE): + with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: + parameters.update(json.load(pf)) if override: parameters.update(override) return parameters diff --git a/test/test_socks.py b/test/test_socks.py new file mode 100644 index 000000000..92574c6fd --- /dev/null +++ b/test/test_socks.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# coding: utf-8 +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import (FakeYDL, get_params) +from youtube_dl.compat import compat_urllib_request + + +class TestSocks(unittest.TestCase): + @staticmethod + def _check_params(attrs): + params = get_params() + for attr in attrs: + if attr not in params: + print('Missing %s. Skipping.' % attr) + return + return params + + def test_proxy_http(self): + params = self._check_params(['primary_proxy', 'primary_server_ip']) + if params is None: + return + ydl = FakeYDL({ + 'proxy': params['primary_proxy'] + }) + self.assertEqual( + ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8'), + params['primary_server_ip']) + + def test_proxy_https(self): + params = self._check_params(['primary_proxy', 'primary_server_ip']) + if params is None: + return + ydl = FakeYDL({ + 'proxy': params['primary_proxy'] + }) + self.assertEqual( + ydl.urlopen('https://yt-dl.org/ip').read().decode('utf-8'), + params['primary_server_ip']) + + def test_secondary_proxy_http(self): + params = self._check_params(['secondary_proxy', 'secondary_server_ip']) + if params is None: + return + ydl = FakeYDL() + req = compat_urllib_request.Request('http://yt-dl.org/ip') + req.add_header('Ytdl-request-proxy', params['secondary_proxy']) + self.assertEqual( + ydl.urlopen(req).read().decode('utf-8'), + params['secondary_server_ip']) + + def test_secondary_proxy_https(self): + params = self._check_params(['secondary_proxy', 'secondary_server_ip']) + if params is None: + return + ydl = FakeYDL() + req = compat_urllib_request.Request('https://yt-dl.org/ip') + req.add_header('Ytdl-request-proxy', params['secondary_proxy']) + self.assertEqual( + ydl.urlopen(req).read().decode('utf-8'), + params['secondary_server_ip']) + + +if __name__ == '__main__': + unittest.main() diff --git a/tox.ini b/tox.ini index 2d7134005..9c4e4a3d1 100644 --- a/tox.ini +++ b/tox.ini @@ -9,5 +9,6 @@ passenv = HOME defaultargs = test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py + --exclude test_socks.py commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html # test.test_download:TestDownload.test_NowVideo From 9e9cd7248d387954d1009087ac300ee3ff6a9766 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 3 May 2016 15:11:05 +0800 Subject: [PATCH 186/501] [socks] Eliminate magic constants and improve --- youtube_dl/socks.py | 289 ++++++++++++++++++++++++-------------------- 1 file changed, 157 insertions(+), 132 deletions(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index 95795b5a9..0e3dd7893 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -3,37 +3,87 @@ from __future__ import unicode_literals +# References: +# SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol +# SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol +# SOCKS5 protocol https://tools.ietf.org/html/rfc1928 +# SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929 + import collections import socket from .compat import ( + compat_ord, struct_pack, struct_unpack, ) __author__ = 'Timo Schmid <coding@timoschmid.de>' +SOCKS4_VERSION = 4 +SOCKS4_REPLY_VERSION = 0x00 +# Excerpt from SOCKS4A protocol: +# if the client cannot resolve the destination host's domain name to find its +# IP address, it should set the first three bytes of DSTIP to NULL and the last +# byte to a non-zero value. +SOCKS4_DEFAULT_DSTIP = struct_pack('!BBBB', 0, 0, 0, 0xFF) + +SOCKS5_VERSION = 5 +SOCKS5_USER_AUTH_VERSION = 0x01 +SOCKS5_USER_AUTH_SUCCESS = 0x00 + + +class Socks4Command(object): + CMD_CONNECT = 0x01 + CMD_BIND = 0x02 + + +class Socks5Command(Socks4Command): + CMD_UDP_ASSOCIATE = 0x03 + + +class Socks5Auth(object): + AUTH_NONE = 0x00 + AUTH_GSSAPI = 0x01 + AUTH_USER_PASS = 0x02 + AUTH_NO_ACCEPTABLE = 0xFF # For server response + + +class Socks5AddressType(object): + ATYP_IPV4 = 0x01 + ATYP_DOMAINNAME = 0x03 + ATYP_IPV6 = 0x04 + class ProxyError(IOError): - pass - - -class Socks4Error(ProxyError): - CODES = { - 0x5B: 'request rejected or failed', - 0x5C: 'request rejected becasue SOCKS server cannot connect to identd on the client', - 0x5D: 'request rejected because the client program and identd report different user-ids' - } + ERR_SUCCESS = 0x00 def __init__(self, code=None, msg=None): if code is not None and msg is None: - msg = self.CODES.get(code) - if msg is None: - msg = 'unknown error' - super(Socks4Error, self).__init__(code, msg) + msg = self.CODES.get(code) and 'unknown error' + super(ProxyError, self).__init__(code, msg) -class Socks5Error(Socks4Error): +class InvalidVersionError(ProxyError): + def __init__(self, expected_version, got_version): + msg = ('Invalid response version from server. Expected {0:02x} got ' + '{1:02x}'.format(expected_version, got_version)) + super(InvalidVersionError, self).__init__(0, msg) + + +class Socks4Error(ProxyError): + ERR_SUCCESS = 90 + + CODES = { + 91: 'request rejected or failed', + 92: 'request rejected becasue SOCKS server cannot connect to identd on the client', + 93: 'request rejected because the client program and identd report different user-ids' + } + + +class Socks5Error(ProxyError): + ERR_GENERAL_FAILURE = 0x01 + CODES = { 0x01: 'general SOCKS server failure', 0x02: 'connection not allowed by ruleset', @@ -53,27 +103,19 @@ class ProxyType(object): SOCKS4A = 1 SOCKS5 = 2 -Proxy = collections.namedtuple('Proxy', ('type', 'host', 'port', 'username', 'password', 'remote_dns')) +Proxy = collections.namedtuple('Proxy', ( + 'type', 'host', 'port', 'username', 'password', 'remote_dns')) class sockssocket(socket.socket): - @property - def _proxy(self): - return self.__proxy + def __init__(self, *args, **kwargs): + self._proxy = None + super(sockssocket, self).__init__(*args, **kwargs) - @property - def _proxy_port(self): - if self._proxy: - if self._proxy.port: - return self._proxy.port - return 1080 - return None + def setproxy(self, proxytype, addr, port, rdns=True, username=None, password=None): + assert proxytype in (ProxyType.SOCKS4, ProxyType.SOCKS4A, ProxyType.SOCKS5) - def setproxy(self, proxytype=None, addr=None, port=None, rdns=True, username=None, password=None): - if proxytype is None: - self.__proxy = None - else: - self.__proxy = Proxy(proxytype, addr, port, username, password, rdns) + self._proxy = Proxy(proxytype, addr, port, username, password, rdns) def recvall(self, cnt): data = b'' @@ -84,163 +126,146 @@ class sockssocket(socket.socket): data += cur return data + def _recv_bytes(self, cnt): + data = self.recvall(cnt) + return struct_unpack('!{0}B'.format(cnt), data) + + @staticmethod + def _len_and_data(data): + return struct_pack('!B', len(data)) + data + + def _check_response_version(self, expected_version, got_version): + if got_version != expected_version: + self.close() + raise InvalidVersionError(expected_version, got_version) + + def _resolve_address(self, destaddr, default, use_remote_dns): + try: + return socket.inet_aton(destaddr) + except socket.error: + if use_remote_dns and self._proxy.remote_dns: + return default + else: + return socket.inet_aton(socket.gethostbyname(destaddr)) + def _setup_socks4(self, address, is_4a=False): destaddr, port = address - try: - ipaddr = socket.inet_aton(destaddr) - except socket.error: - if is_4a and self._proxy.remote_dns: - ipaddr = struct_pack('!BBBB', 0, 0, 0, 0xFF) - else: - ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a) - packet = struct_pack('!BBH', 0x4, 0x1, port) + ipaddr - if self._proxy.username: - username = self._proxy.username - if hasattr(username, 'encode'): - username = username.encode() - packet += struct_pack('!{0}s'.format(len(username) + 1), username) - else: - packet += b'\x00' + packet = struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr + + username = (self._proxy.username or '').encode('utf-8') + packet += username + b'\x00' if is_4a and self._proxy.remote_dns: - if hasattr(destaddr, 'encode'): - destaddr = destaddr.encode() - packet += struct_pack('!{0}s'.format(len(destaddr) + 1), destaddr) + packet += destaddr.encode('utf-8') + b'\x00' self.sendall(packet) - packet = self.recvall(8) - nbyte, resp_code, dstport, dsthost = struct_unpack('!BBHI', packet) + version, resp_code, dstport, dsthost = struct_unpack('!BBHI', self.recvall(8)) - # check valid response - if nbyte != 0x00: - self.close() - raise ProxyError( - 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(0, nbyte)) + self._check_response_version(SOCKS4_REPLY_VERSION, version) - # access granted - if resp_code != 0x5a: + if resp_code != Socks4Error.ERR_SUCCESS: self.close() raise Socks4Error(resp_code) return (dsthost, dstport) - def _setup_socks5(self, address): - destaddr, port = address + def _setup_socks4a(self, address): + self._setup_socks4(address, is_4a=True) - try: - ipaddr = socket.inet_aton(destaddr) - except socket.error: - if self._proxy.remote_dns: - ipaddr = None - else: - ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + def _socks5_auth(self): + packet = struct_pack('!B', SOCKS5_VERSION) - auth_methods = 1 + auth_methods = [Socks5Auth.AUTH_NONE] if self._proxy.username and self._proxy.password: - # two auth methods available - auth_methods = 2 - packet = struct_pack('!BBB', 0x5, auth_methods, 0x00) # no auth - if self._proxy.username and self._proxy.password: - packet += struct_pack('!B', 0x02) # user/pass auth + auth_methods.append(Socks5Auth.AUTH_USER_PASS) + + packet += struct_pack('!B', len(auth_methods)) + packet += struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods) self.sendall(packet) - packet = self.recvall(2) - version, method = struct_unpack('!BB', packet) + version, method = self._recv_bytes(2) - # check valid response - if version != 0x05: - self.close() - raise ProxyError( - 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(5, version)) + self._check_response_version(SOCKS5_VERSION, version) - # no auth methods - if method == 0xFF: + if method == Socks5Auth.AUTH_NO_ACCEPTABLE: self.close() raise Socks5Error(method) - # user/pass auth - if method == 0x01: - username = self._proxy.username - if hasattr(username, 'encode'): - username = username.encode() - password = self._proxy.password - if hasattr(password, 'encode'): - password = password.encode() - packet = struct_pack('!BB', 1, len(username)) + username - packet += struct_pack('!B', len(password)) + password + if method == Socks5Auth.AUTH_USER_PASS: + username = self._proxy.username.encode('utf-8') + password = self._proxy.password.encode('utf-8') + packet = struct_pack('!B', SOCKS5_USER_AUTH_VERSION) + packet += self._len_and_data(username) + self._len_and_data(password) self.sendall(packet) - packet = self.recvall(2) - version, status = struct_unpack('!BB', packet) + version, status = self._recv_bytes(2) - if version != 0x01: - self.close() - raise ProxyError( - 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(1, version)) + self._check_response_version(SOCKS5_USER_AUTH_VERSION, version) - if status != 0x00: + if status != SOCKS5_USER_AUTH_SUCCESS: self.close() - raise Socks5Error(1) - elif method == 0x00: # no auth + raise Socks5Error(Socks5Error.ERR_GENERAL_FAILURE) + elif method == Socks5Auth.AUTH_NONE: pass - packet = struct_pack('!BBB', 5, 1, 0) + def _setup_socks5(self, address): + destaddr, port = address + + ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True) + + self._socks5_auth() + + reserved = 0 + packet = struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) if ipaddr is None: - if hasattr(destaddr, 'encode'): - destaddr = destaddr.encode() - packet += struct_pack('!BB', 3, len(destaddr)) + destaddr + destaddr = destaddr.encode('utf-8') + packet += struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) + packet += self._len_and_data(destaddr) else: - packet += struct_pack('!B', 1) + ipaddr + packet += struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr packet += struct_pack('!H', port) self.sendall(packet) - packet = self.recvall(4) - version, status, _, atype = struct_unpack('!BBBB', packet) + version, status, reserved, atype = self._recv_bytes(4) - if version != 0x05: - self.close() - raise ProxyError( - 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(5, version)) + self._check_response_version(SOCKS5_VERSION, version) - if status != 0x00: + if status != Socks5Error.ERR_SUCCESS: self.close() raise Socks5Error(status) - if atype == 0x01: + if atype == Socks5AddressType.ATYP_IPV4: destaddr = self.recvall(4) - elif atype == 0x03: - alen = struct_unpack('!B', self.recv(1))[0] + elif atype == Socks5AddressType.ATYP_DOMAINNAME: + alen = compat_ord(self.recv(1)) destaddr = self.recvall(alen) - elif atype == 0x04: + elif atype == Socks5AddressType.ATYP_IPV6: destaddr = self.recvall(16) destport = struct_unpack('!H', self.recvall(2))[0] return (destaddr, destport) def _make_proxy(self, connect_func, address): - if self._proxy.type == ProxyType.SOCKS4: - result = connect_func(self, (self._proxy.host, self._proxy_port)) - if result != 0 and result is not None: - return result - self._setup_socks4(address) - elif self._proxy.type == ProxyType.SOCKS4A: - result = connect_func(self, (self._proxy.host, self._proxy_port)) - if result != 0 and result is not None: - return result - self._setup_socks4(address, is_4a=True) - elif self._proxy.type == ProxyType.SOCKS5: - result = connect_func(self, (self._proxy.host, self._proxy_port)) - if result != 0 and result is not None: - return result - self._setup_socks5(address) - else: + if not self._proxy: return connect_func(self, address) + result = connect_func(self, (self._proxy.host, self._proxy.port)) + if result != 0 and result is not None: + return result + setup_funcs = { + ProxyType.SOCKS4: self._setup_socks4, + ProxyType.SOCKS4A: self._setup_socks4a, + ProxyType.SOCKS5: self._setup_socks5, + } + setup_funcs[self._proxy.type](address) + return result + def connect(self, address): self._make_proxy(socket.socket.connect, address) From 51fb4995a5242c0edca09167cf8c4b050cf5a186 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 3 May 2016 15:15:32 +0800 Subject: [PATCH 187/501] [utils] Register SOCKS protocols in urllib and support SOCKS4A --- youtube_dl/YoutubeDL.py | 3 +++ youtube_dl/utils.py | 11 ++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a96482e68..34eeb77c5 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -64,6 +64,7 @@ from .utils import ( PostProcessingError, preferredencoding, prepend_extension, + register_socks_protocols, render_table, replace_extension, SameFileError, @@ -361,6 +362,8 @@ class YoutubeDL(object): for ph in self.params.get('progress_hooks', []): self.add_progress_hook(ph) + register_socks_protocols() + def warn_if_short_id(self, argv): # short YouTube ID starting with dash? idxs = [ diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b2e4a2dfb..c9702fd93 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -61,6 +61,13 @@ from .socks import ( ) +def register_socks_protocols(): + # "Register" SOCKS protocols + for scheme in ('socks', 'socks4', 'socks4a', 'socks5'): + if scheme not in compat_urlparse.uses_netloc: + compat_urlparse.uses_netloc.append(scheme) + + # This is not clearly defined otherwise compiled_regex_type = type(re.compile('')) @@ -870,6 +877,8 @@ def make_socks_conn_class(base_class, socks_proxy): socks_type = ProxyType.SOCKS5 elif url_components.scheme.lower() in ('socks', 'socks4'): socks_type = ProxyType.SOCKS4 + elif url_components.scheme.lower() == 'socks4a': + socks_type = ProxyType.SOCKS4A proxy_args = ( socks_type, @@ -2738,7 +2747,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): if proxy == '__noproxy__': return None # No Proxy - if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks5'): + if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): req.add_header('Ytdl-socks-proxy', proxy) # youtube-dl's http/https handlers do wrapping the socket with socks return None From d5ae6bb50124f8320f2b492380480038c487a6d2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 3 May 2016 15:37:30 +0800 Subject: [PATCH 188/501] [utils] Add rationale for register_socks_protocols --- youtube_dl/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c9702fd93..dc73f3407 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -63,6 +63,8 @@ from .socks import ( def register_socks_protocols(): # "Register" SOCKS protocols + # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 + # URLs with protocols not in urlparse.uses_netloc are not handled correctly for scheme in ('socks', 'socks4', 'socks4a', 'socks5'): if scheme not in compat_urlparse.uses_netloc: compat_urlparse.uses_netloc.append(scheme) From edaa23f822a1e4a62771422fb598c7bd8ae0a152 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 3 May 2016 16:50:16 +0800 Subject: [PATCH 189/501] [compat] Rename struct_(un)pack to compat_struct_(un)pack --- test/test_compat.py | 4 ++-- youtube_dl/compat.py | 12 ++++++------ youtube_dl/downloader/f4m.py | 14 +++++++------- youtube_dl/extractor/rtve.py | 4 ++-- youtube_dl/socks.py | 32 ++++++++++++++++---------------- youtube_dl/swfinterp.py | 14 +++++++------- youtube_dl/utils.py | 4 ++-- 7 files changed, 42 insertions(+), 42 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index dd62a5d6b..539b30540 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -17,10 +17,10 @@ from youtube_dl.compat import ( compat_expanduser, compat_shlex_split, compat_str, + compat_struct_unpack, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlencode, - struct_unpack, ) @@ -104,7 +104,7 @@ class TestCompat(unittest.TestCase): self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str)) def test_struct_unpack(self): - self.assertEqual(struct_unpack('!B', b'\x00'), (0,)) + self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,)) if __name__ == '__main__': diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index f697bee7e..e48c761a6 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -599,18 +599,18 @@ try: except TypeError: # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument # See https://bugs.python.org/issue19099 - def struct_pack(spec, *args): + def compat_struct_pack(spec, *args): if isinstance(spec, compat_str): spec = spec.encode('ascii') return struct.pack(spec, *args) - def struct_unpack(spec, *args): + def compat_struct_unpack(spec, *args): if isinstance(spec, compat_str): spec = spec.encode('ascii') return struct.unpack(spec, *args) else: - struct_pack = struct.pack - struct_unpack = struct.unpack + compat_struct_pack = struct.pack + compat_struct_unpack = struct.unpack __all__ = [ @@ -638,6 +638,8 @@ __all__ = [ 'compat_shlex_split', 'compat_socket_create_connection', 'compat_str', + 'compat_struct_pack', + 'compat_struct_unpack', 'compat_subprocess_get_DEVNULL', 'compat_tokenize_tokenize', 'compat_urllib_error', @@ -655,8 +657,6 @@ __all__ = [ 'compat_xml_parse_error', 'compat_xpath', 'shlex_quote', - 'struct_pack', - 'struct_unpack', 'subprocess_check_output', 'workaround_optparse_bug9161', ] diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index b282fe3d6..3d9337afa 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -12,8 +12,8 @@ from ..compat import ( compat_urlparse, compat_urllib_error, compat_urllib_parse_urlparse, - struct_pack, - struct_unpack, + compat_struct_pack, + compat_struct_unpack, ) from ..utils import ( encodeFilename, @@ -31,13 +31,13 @@ class FlvReader(io.BytesIO): # Utility functions for reading numbers and strings def read_unsigned_long_long(self): - return struct_unpack('!Q', self.read(8))[0] + return compat_struct_unpack('!Q', self.read(8))[0] def read_unsigned_int(self): - return struct_unpack('!I', self.read(4))[0] + return compat_struct_unpack('!I', self.read(4))[0] def read_unsigned_char(self): - return struct_unpack('!B', self.read(1))[0] + return compat_struct_unpack('!B', self.read(1))[0] def read_string(self): res = b'' @@ -194,11 +194,11 @@ def build_fragments_list(boot_info): def write_unsigned_int(stream, val): - stream.write(struct_pack('!I', val)) + stream.write(compat_struct_pack('!I', val)) def write_unsigned_int_24(stream, val): - stream.write(struct_pack('!I', val)[1:]) + stream.write(compat_struct_pack('!I', val)[1:]) def write_flv_header(stream): diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index f59040877..edd0d108e 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -7,7 +7,7 @@ import time from .common import InfoExtractor from ..compat import ( - struct_unpack, + compat_struct_unpack, ) from ..utils import ( ExtractorError, @@ -23,7 +23,7 @@ def _decrypt_url(png): encrypted_data = base64.b64decode(png.encode('utf-8')) text_index = encrypted_data.find(b'tEXt') text_chunk = encrypted_data[text_index - 4:] - length = struct_unpack('!I', text_chunk[:4])[0] + length = compat_struct_unpack('!I', text_chunk[:4])[0] # Use bytearray to get integers when iterating in both python 2.x and 3.x data = bytearray(text_chunk[8:8 + length]) data = [chr(b) for b in data if b != 0] diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index 0e3dd7893..a5b27fea7 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -14,8 +14,8 @@ import socket from .compat import ( compat_ord, - struct_pack, - struct_unpack, + compat_struct_pack, + compat_struct_unpack, ) __author__ = 'Timo Schmid <coding@timoschmid.de>' @@ -26,7 +26,7 @@ SOCKS4_REPLY_VERSION = 0x00 # if the client cannot resolve the destination host's domain name to find its # IP address, it should set the first three bytes of DSTIP to NULL and the last # byte to a non-zero value. -SOCKS4_DEFAULT_DSTIP = struct_pack('!BBBB', 0, 0, 0, 0xFF) +SOCKS4_DEFAULT_DSTIP = compat_struct_pack('!BBBB', 0, 0, 0, 0xFF) SOCKS5_VERSION = 5 SOCKS5_USER_AUTH_VERSION = 0x01 @@ -128,11 +128,11 @@ class sockssocket(socket.socket): def _recv_bytes(self, cnt): data = self.recvall(cnt) - return struct_unpack('!{0}B'.format(cnt), data) + return compat_struct_unpack('!{0}B'.format(cnt), data) @staticmethod def _len_and_data(data): - return struct_pack('!B', len(data)) + data + return compat_struct_pack('!B', len(data)) + data def _check_response_version(self, expected_version, got_version): if got_version != expected_version: @@ -153,7 +153,7 @@ class sockssocket(socket.socket): ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a) - packet = struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr + packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr username = (self._proxy.username or '').encode('utf-8') packet += username + b'\x00' @@ -163,7 +163,7 @@ class sockssocket(socket.socket): self.sendall(packet) - version, resp_code, dstport, dsthost = struct_unpack('!BBHI', self.recvall(8)) + version, resp_code, dstport, dsthost = compat_struct_unpack('!BBHI', self.recvall(8)) self._check_response_version(SOCKS4_REPLY_VERSION, version) @@ -177,14 +177,14 @@ class sockssocket(socket.socket): self._setup_socks4(address, is_4a=True) def _socks5_auth(self): - packet = struct_pack('!B', SOCKS5_VERSION) + packet = compat_struct_pack('!B', SOCKS5_VERSION) auth_methods = [Socks5Auth.AUTH_NONE] if self._proxy.username and self._proxy.password: auth_methods.append(Socks5Auth.AUTH_USER_PASS) - packet += struct_pack('!B', len(auth_methods)) - packet += struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods) + packet += compat_struct_pack('!B', len(auth_methods)) + packet += compat_struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods) self.sendall(packet) @@ -199,7 +199,7 @@ class sockssocket(socket.socket): if method == Socks5Auth.AUTH_USER_PASS: username = self._proxy.username.encode('utf-8') password = self._proxy.password.encode('utf-8') - packet = struct_pack('!B', SOCKS5_USER_AUTH_VERSION) + packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION) packet += self._len_and_data(username) + self._len_and_data(password) self.sendall(packet) @@ -221,14 +221,14 @@ class sockssocket(socket.socket): self._socks5_auth() reserved = 0 - packet = struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) + packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) if ipaddr is None: destaddr = destaddr.encode('utf-8') - packet += struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) + packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) packet += self._len_and_data(destaddr) else: - packet += struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr - packet += struct_pack('!H', port) + packet += compat_struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr + packet += compat_struct_pack('!H', port) self.sendall(packet) @@ -247,7 +247,7 @@ class sockssocket(socket.socket): destaddr = self.recvall(alen) elif atype == Socks5AddressType.ATYP_IPV6: destaddr = self.recvall(16) - destport = struct_unpack('!H', self.recvall(2))[0] + destport = compat_struct_unpack('!H', self.recvall(2))[0] return (destaddr, destport) diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 86b28716c..7cf490aa4 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -6,7 +6,7 @@ import zlib from .compat import ( compat_str, - struct_unpack, + compat_struct_unpack, ) from .utils import ( ExtractorError, @@ -25,17 +25,17 @@ def _extract_tags(file_contents): file_contents[:1]) # Determine number of bits in framesize rectangle - framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3 + framesize_nbits = compat_struct_unpack('!B', content[:1])[0] >> 3 framesize_len = (5 + 4 * framesize_nbits + 7) // 8 pos = framesize_len + 2 + 2 while pos < len(content): - header16 = struct_unpack('<H', content[pos:pos + 2])[0] + header16 = compat_struct_unpack('<H', content[pos:pos + 2])[0] pos += 2 tag_code = header16 >> 6 tag_len = header16 & 0x3f if tag_len == 0x3f: - tag_len = struct_unpack('<I', content[pos:pos + 4])[0] + tag_len = compat_struct_unpack('<I', content[pos:pos + 4])[0] pos += 4 assert pos + tag_len <= len(content), \ ('Tag %d ends at %d+%d - that\'s longer than the file (%d)' @@ -103,7 +103,7 @@ def _read_int(reader): for _ in range(5): buf = reader.read(1) assert len(buf) == 1 - b = struct_unpack('<B', buf)[0] + b = compat_struct_unpack('<B', buf)[0] res = res | ((b & 0x7f) << shift) if b & 0x80 == 0: break @@ -129,7 +129,7 @@ def _s24(reader): bs = reader.read(3) assert len(bs) == 3 last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00' - return struct_unpack('<i', bs + last_byte)[0] + return compat_struct_unpack('<i', bs + last_byte)[0] def _read_string(reader): @@ -148,7 +148,7 @@ def _read_bytes(count, reader): def _read_byte(reader): resb = _read_bytes(1, reader=reader) - res = struct_unpack('<B', resb)[0] + res = compat_struct_unpack('<B', resb)[0] return res diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index dc73f3407..dbac38b55 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -44,6 +44,7 @@ from .compat import ( compat_parse_qs, compat_socket_create_connection, compat_str, + compat_struct_pack, compat_urllib_error, compat_urllib_parse, compat_urllib_parse_urlencode, @@ -52,7 +53,6 @@ from .compat import ( compat_urlparse, compat_xpath, shlex_quote, - struct_pack, ) from .socks import ( @@ -1259,7 +1259,7 @@ def bytes_to_intlist(bs): def intlist_to_bytes(xs): if not xs: return b'' - return struct_pack('%dB' % len(xs), *xs) + return compat_struct_pack('%dB' % len(xs), *xs) # Cross-platform file locking From e21f17fc86aab0ac7f1f4cee28f64e7b9b954f71 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 5 May 2016 17:09:13 +0800 Subject: [PATCH 190/501] [test/test_socks] Test with local SOCKS servers --- .gitignore | 1 + .travis.yml | 3 +++ devscripts/install_srelay.sh | 8 +++++++ test/test_socks.py | 42 +++++++++++++++++++++++++++++++++--- 4 files changed, 51 insertions(+), 3 deletions(-) create mode 100755 devscripts/install_srelay.sh diff --git a/.gitignore b/.gitignore index 0e7128551..d5f216b5f 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ test/local_parameters.json youtube-dl.zsh .idea .idea/* +tmp/ diff --git a/.travis.yml b/.travis.yml index cc21fae8f..998995845 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,9 @@ python: - "3.4" - "3.5" sudo: false +install: + - bash ./devscripts/install_srelay.sh + - export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6 script: nosetests test --verbose notifications: email: diff --git a/devscripts/install_srelay.sh b/devscripts/install_srelay.sh new file mode 100755 index 000000000..33ce8a3f7 --- /dev/null +++ b/devscripts/install_srelay.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +mkdir -p tmp && cd tmp +wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz +tar zxvf srelay-0.4.8b6.tar.gz +cd srelay-0.4.8b6 +./configure +make diff --git a/test/test_socks.py b/test/test_socks.py index 92574c6fd..dc9b8d276 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -8,11 +8,20 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import (FakeYDL, get_params) -from youtube_dl.compat import compat_urllib_request +import random +import subprocess + +from test.helper import ( + FakeYDL, + get_params, +) +from youtube_dl.compat import ( + compat_str, + compat_urllib_request, +) -class TestSocks(unittest.TestCase): +class TestMultipleSocks(unittest.TestCase): @staticmethod def _check_params(attrs): params = get_params() @@ -67,5 +76,32 @@ class TestSocks(unittest.TestCase): params['secondary_server_ip']) +class TestSocks(unittest.TestCase): + def setUp(self): + self.port = random.randint(49152, 65535) + self.server_process = subprocess.Popen([ + 'srelay', '-f', '-i', '127.0.0.1:%d' % self.port], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + def tearDown(self): + self.server_process.terminate() + self.server_process.communicate() + + def _get_ip(self, protocol): + ydl = FakeYDL({ + 'proxy': '%s://127.0.0.1:%d' % (protocol, self.port), + }) + return ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8') + + def test_socks4(self): + self.assertTrue(isinstance(self._get_ip('socks4'), compat_str)) + + def test_socks4a(self): + self.assertTrue(isinstance(self._get_ip('socks4a'), compat_str)) + + def test_socks5(self): + self.assertTrue(isinstance(self._get_ip('socks5'), compat_str)) + + if __name__ == '__main__': unittest.main() From fa5cb8d0212918657cb58b4d5791ed3de831bd74 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 8 May 2016 15:14:56 +0800 Subject: [PATCH 191/501] [socks] Remove a superfluous clause --- youtube_dl/socks.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index a5b27fea7..fd49d7435 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -210,8 +210,6 @@ class sockssocket(socket.socket): if status != SOCKS5_USER_AUTH_SUCCESS: self.close() raise Socks5Error(Socks5Error.ERR_GENERAL_FAILURE) - elif method == Socks5Auth.AUTH_NONE: - pass def _setup_socks5(self, address): destaddr, port = address From 6ddb4888d2610df3bbb5024440caddde50fe9ad8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 8 May 2016 15:15:58 +0800 Subject: [PATCH 192/501] [options] Update --proxy description for SOCKS proxies --- youtube_dl/options.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index d1f8d1331..38efd292d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -188,7 +188,10 @@ def parseOpts(overrideArguments=None): network.add_option( '--proxy', dest='proxy', default=None, metavar='URL', - help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') + help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable experimental ' + 'SOCKS proxy, specify a proper scheme. For example ' + 'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") ' + 'for direct connection') network.add_option( '--socket-timeout', dest='socket_timeout', type=float, default=None, metavar='SECONDS', From c2876afafef392220cdb2baebace1d6d533f8d63 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 8 May 2016 15:16:32 +0800 Subject: [PATCH 193/501] [test/test_socks] Use a different port range Seems on Travis CI, ports in the original range are often used. --- test/test_socks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_socks.py b/test/test_socks.py index dc9b8d276..d07003ceb 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -78,7 +78,7 @@ class TestMultipleSocks(unittest.TestCase): class TestSocks(unittest.TestCase): def setUp(self): - self.port = random.randint(49152, 65535) + self.port = random.randint(20000, 30000) self.server_process = subprocess.Popen([ 'srelay', '-f', '-i', '127.0.0.1:%d' % self.port], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) From 28b4f73620c82e7007b3154e4d5f437cf6fb2608 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 10 May 2016 09:08:08 +0200 Subject: [PATCH 194/501] release 2016.05.10 --- .github/ISSUE_TEMPLATE.md | 6 +++--- README.md | 8 +++++--- docs/supportedsites.md | 11 +++++++++-- youtube_dl/version.py | 2 +- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a26ff1de4..1fb878b59 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.01 +[debug] youtube-dl version 2016.05.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/README.md b/README.md index 50acb26a0..4ef6b6d5a 100644 --- a/README.md +++ b/README.md @@ -85,9 +85,11 @@ which means you can modify it, redistribute it or use it however you like. --no-color Do not emit color codes in output ## Network Options: - --proxy URL Use the specified HTTP/HTTPS proxy. Pass in - an empty string (--proxy "") for direct - connection + --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. + To enable experimental SOCKS proxy, specify + a proper scheme. For example + socks5://127.0.0.1:1080/. Pass in an empty + string (--proxy "") for direct connection --socket-timeout SECONDS Time to wait before giving up, in seconds --source-address IP Client-side IP address to bind to (experimental) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9fb43671f..de84e5c84 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -77,6 +77,7 @@ - **Bild**: Bild.de - **BiliBili** - **BioBioChileTV** + - **BIQLE** - **BleacherReport** - **BleacherReportCMS** - **blinkx** @@ -145,6 +146,7 @@ - **culturebox.francetvinfo.fr** - **CultureUnplugged** - **CWTV** + - **DailyMail** - **dailymotion** - **dailymotion:playlist** - **dailymotion:user** @@ -325,6 +327,7 @@ - **limelight** - **limelight:channel** - **limelight:channel_list** + - **LiTV** - **LiveLeak** - **livestream** - **livestream:original** @@ -374,6 +377,8 @@ - **mtvservices:embedded** - **MuenchenTV**: münchen.tv - **MusicPlayOn** + - **mva**: Microsoft Virtual Academy videos + - **mva:course**: Microsoft Virtual Academy courses - **Mwave** - **MwaveMeetGreet** - **MySpace** @@ -463,7 +468,8 @@ - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **pcmag** - **People** - - **Periscope**: Periscope + - **periscope**: Periscope + - **periscope:user**: Periscope user videos - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** @@ -700,6 +706,7 @@ - **Vessel** - **Vesti**: Вести.Ru - **Vevo** + - **VevoPlaylist** - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** - **Vice** @@ -772,7 +779,7 @@ - **WSJ**: Wall Street Journal - **XBef** - **XboxClips** - - **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me + - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To - **XHamster** - **XHamsterEmbed** - **xiami:album**: 虾米音乐 - 专辑 diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 551160897..45e40c0d1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.01' +__version__ = '2016.05.10' From 702ccf2dc08603fed98d2672f86af1a0e300d83e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 10 May 2016 15:58:25 +0800 Subject: [PATCH 195/501] [compat] Rename shlex_quote and remove unused subprocess_check_output --- youtube_dl/compat.py | 19 +++---------------- youtube_dl/postprocessor/execafterdownload.py | 4 ++-- youtube_dl/utils.py | 4 ++-- 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index e48c761a6..1392361a1 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -341,9 +341,9 @@ except ImportError: # Python 2 return parsed_result try: - from shlex import quote as shlex_quote + from shlex import quote as compat_shlex_quote except ImportError: # Python < 3.3 - def shlex_quote(s): + def compat_shlex_quote(s): if re.match(r'^[-_\w./]+$', s): return s else: @@ -466,18 +466,6 @@ else: print(s) -try: - subprocess_check_output = subprocess.check_output -except AttributeError: - def subprocess_check_output(*args, **kwargs): - assert 'input' not in kwargs - p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs) - output, _ = p.communicate() - ret = p.poll() - if ret: - raise subprocess.CalledProcessError(ret, p.args, output=output) - return output - if sys.version_info < (3, 0) and sys.platform == 'win32': def compat_getpass(prompt, *args, **kwargs): if isinstance(prompt, compat_str): @@ -635,6 +623,7 @@ __all__ = [ 'compat_parse_qs', 'compat_print', 'compat_setenv', + 'compat_shlex_quote', 'compat_shlex_split', 'compat_socket_create_connection', 'compat_str', @@ -656,7 +645,5 @@ __all__ = [ 'compat_urlretrieve', 'compat_xml_parse_error', 'compat_xpath', - 'shlex_quote', - 'subprocess_check_output', 'workaround_optparse_bug9161', ] diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py index 74f66d669..90630c2d7 100644 --- a/youtube_dl/postprocessor/execafterdownload.py +++ b/youtube_dl/postprocessor/execafterdownload.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import subprocess from .common import PostProcessor -from ..compat import shlex_quote +from ..compat import compat_shlex_quote from ..utils import PostProcessingError @@ -17,7 +17,7 @@ class ExecAfterDownloadPP(PostProcessor): if '{}' not in cmd: cmd += ' {}' - cmd = cmd.replace('{}', shlex_quote(information['filepath'])) + cmd = cmd.replace('{}', compat_shlex_quote(information['filepath'])) self._downloader.to_screen('[exec] Executing command: %s' % cmd) retCode = subprocess.call(cmd, shell=True) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index dbac38b55..e8b09e9db 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -42,6 +42,7 @@ from .compat import ( compat_http_client, compat_kwargs, compat_parse_qs, + compat_shlex_quote, compat_socket_create_connection, compat_str, compat_struct_pack, @@ -52,7 +53,6 @@ from .compat import ( compat_urllib_request, compat_urlparse, compat_xpath, - shlex_quote, ) from .socks import ( @@ -1977,7 +1977,7 @@ def ytdl_is_updateable(): def args_to_str(args): # Get a short string representation for a subprocess command - return ' '.join(shlex_quote(a) for a in args) + return ' '.join(compat_shlex_quote(a) for a in args) def error_to_compat_str(err): From e73b9c65e279f283b28d14be5b7173eae46d4364 Mon Sep 17 00:00:00 2001 From: teemuy <z0rs4m37tAlL> Date: Wed, 11 May 2016 18:10:30 +0300 Subject: [PATCH 196/501] Bugfix: Allow colons in custom HTTP header values. --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index cbd84c3af..740a1904b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -69,7 +69,7 @@ def _real_main(argv=None): for h in opts.headers: if h.find(':', 1) < 0: parser.error('wrong header formatting, it should be key:value, not "%s"' % h) - key, value = h.split(':', 2) + key, value = h.split(':', 1) if opts.verbose: write_string('[debug] Adding header from command line option %s:%s\n' % (key, value)) std_headers[key] = value From e0741fd4496c85ef447e72df935cb6edd1af53ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 11 May 2016 22:03:30 +0600 Subject: [PATCH 197/501] [__init__] Simplify colon presence check --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 740a1904b..5df965191 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -67,7 +67,7 @@ def _real_main(argv=None): # Custom HTTP headers if opts.headers is not None: for h in opts.headers: - if h.find(':', 1) < 0: + if ':' not in h: parser.error('wrong header formatting, it should be key:value, not "%s"' % h) key, value = h.split(':', 1) if opts.verbose: From 4540515cb3daa0716fa94e54cacb566ef1461ab3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 12 May 2016 18:48:27 +0800 Subject: [PATCH 198/501] [iqiyi] Fix 1080P extraction (closes #9446) --- youtube_dl/extractor/iqiyi.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index ffb8008ce..ddcb3c916 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -505,7 +505,10 @@ class IqiyiIE(InfoExtractor): 'enc': md5_text(enc_key + tail), 'qyid': _uuid, 'tn': random.random(), - 'um': 0, + # In iQiyi's flash player, um is set to 1 if there's a logged user + # Some 1080P formats are only available with a logged user. + # Here force um=1 to trick the iQiyi server + 'um': 1, 'authkey': md5_text(md5_text('') + tail), 'k_tag': 1, } From 778a1ccca7d6cce06faf17867f20b87883d84e98 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 12 May 2016 19:48:48 +0800 Subject: [PATCH 199/501] =?UTF-8?q?[utils]=20Add=20=C5=92=20and=20=C5=93?= =?UTF-8?q?=20found=20in=20French=20to=20ACCENT=5FCHARS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #9463 --- test/test_utils.py | 4 ++-- youtube_dl/utils.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 5702ffa97..ca254779f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -155,8 +155,8 @@ class TestUtil(unittest.TestCase): self.assertTrue(sanitize_filename(':', restricted=True) != '') self.assertEqual(sanitize_filename( - 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', restricted=True), - 'AAAAAAAECEEEEIIIIDNOOOOOOUUUUYPssaaaaaaaeceeeeiiiionoooooouuuuypy') + 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', restricted=True), + 'AAAAAAAECEEEEIIIIDNOOOOOOOEUUUUYPssaaaaaaaeceeeeiiiionoooooooeuuuuypy') def test_sanitize_ids(self): self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e8b09e9db..6592c8ec2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -104,9 +104,9 @@ KNOWN_EXTENSIONS = ( 'f4f', 'f4m', 'm3u8', 'smil') # needed for sanitizing filenames in restricted mode -ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', - itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOUUUUYP', ['ss'], - 'aaaaaa', ['ae'], 'ceeeeiiiionoooooouuuuypy'))) +ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', + itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOO', ['OE'], 'UUUUYP', ['ss'], + 'aaaaaa', ['ae'], 'ceeeeiiiionoooooo', ['oe'], 'uuuuypy'))) def preferredencoding(): From 7e8ddca1bb10068356d1ec43cf66e7627b76fce7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 12 May 2016 19:56:58 +0800 Subject: [PATCH 200/501] [vevo] Delay the georestriction check to prevent false alerts Fixes #9408 --- youtube_dl/extractor/vevo.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index c0632cd6a..388b4debe 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -213,19 +213,17 @@ class VevoIE(VevoBaseIE): formats = [] if not video_info: - if response and response.get('statusCode') != 909: + try: + self._initialize_api(video_id) + except ExtractorError: ytid = response.get('errorInfo', {}).get('ytid') if ytid: self.report_warning( 'Video is geoblocked, trying with the YouTube video %s' % ytid) return self.url_result(ytid, 'Youtube', ytid) - if 'statusMessage' in response: - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, response['statusMessage']), expected=True) - raise ExtractorError('Unable to extract videos') + raise - self._initialize_api(video_id) video_info = self._call_api( 'video/%s' % video_id, video_id, 'Downloading api video info', 'Failed to download video info') From 1b405bb47d91119cc612a90d26f27f2b93f7c7b4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 3 May 2016 18:06:50 +0800 Subject: [PATCH 201/501] [downloader/f4m] Tolerate truncate segments when testing Replaces #9216 Fixes #9214 and test_Bloomberg partially --- youtube_dl/downloader/f4m.py | 42 +++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 3d9337afa..314def4cb 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -23,26 +23,38 @@ from ..utils import ( ) +class DataTruncatedError(Exception): + pass + + class FlvReader(io.BytesIO): """ Reader for Flv files The file format is documented in https://www.adobe.com/devnet/f4v.html """ + def read_bytes(self, n): + data = self.read(n) + if len(data) < n: + raise DataTruncatedError( + 'FlvReader error: need %d bytes while only %d bytes got' % ( + n, len(data))) + return data + # Utility functions for reading numbers and strings def read_unsigned_long_long(self): - return compat_struct_unpack('!Q', self.read(8))[0] + return compat_struct_unpack('!Q', self.read_bytes(8))[0] def read_unsigned_int(self): - return compat_struct_unpack('!I', self.read(4))[0] + return compat_struct_unpack('!I', self.read_bytes(4))[0] def read_unsigned_char(self): - return compat_struct_unpack('!B', self.read(1))[0] + return compat_struct_unpack('!B', self.read_bytes(1))[0] def read_string(self): res = b'' while True: - char = self.read(1) + char = self.read_bytes(1) if char == b'\x00': break res += char @@ -53,18 +65,18 @@ class FlvReader(io.BytesIO): Read a box and return the info as a tuple: (box_size, box_type, box_data) """ real_size = size = self.read_unsigned_int() - box_type = self.read(4) + box_type = self.read_bytes(4) header_end = 8 if size == 1: real_size = self.read_unsigned_long_long() header_end = 16 - return real_size, box_type, self.read(real_size - header_end) + return real_size, box_type, self.read_bytes(real_size - header_end) def read_asrt(self): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) quality_entry_count = self.read_unsigned_char() # QualityEntryCount for i in range(quality_entry_count): @@ -85,7 +97,7 @@ class FlvReader(io.BytesIO): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) # time scale self.read_unsigned_int() @@ -119,7 +131,7 @@ class FlvReader(io.BytesIO): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) self.read_unsigned_int() # BootstrapinfoVersion # Profile,Live,Update,Reserved @@ -374,7 +386,17 @@ class F4mFD(FragmentFD): down.close() reader = FlvReader(down_data) while True: - _, box_type, box_data = reader.read_box_info() + try: + _, box_type, box_data = reader.read_box_info() + except DataTruncatedError: + if test: + # In tests, segments may be truncated, and thus + # FlvReader may not be able to parse the whole + # chunk. If so, write the segment as is + # See https://github.com/rg3/youtube-dl/issues/9214 + dest_stream.write(down_data) + break + raise if box_type == b'mdat': dest_stream.write(box_data) break From a3fa6024d676ec20a06fe618f5c3d6e064f49336 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 12 May 2016 20:05:43 +0800 Subject: [PATCH 202/501] [bloomberg] Fix test_Bloomberg In this test case, sometimes HLS is the best format while sometimes HDS is. To prevent occasional test failures, force HDS to be the best format. In the past, testing against HDS formats causes the same error as #9214, which is fixed as #9377 landed. --- youtube_dl/extractor/bloomberg.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 13343bc25..bd538be50 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -17,6 +17,9 @@ class BloombergIE(InfoExtractor): 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', 'description': 'md5:a8ba0302912d03d246979735c17d2761', }, + 'params': { + 'format': 'best[format_id^=hds]', + }, }, { 'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets', 'only_matching': True, From f388f616c1f2ad9d2c906c4183cf996c845b2858 Mon Sep 17 00:00:00 2001 From: TRox1972 <archcr8@gmail.com> Date: Thu, 12 May 2016 16:48:12 +0200 Subject: [PATCH 203/501] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4ef6b6d5a..a2febab2c 100644 --- a/README.md +++ b/README.md @@ -417,7 +417,7 @@ which means you can modify it, redistribute it or use it however you like. # CONFIGURATION -You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. +You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory: ``` From 7581bfc958c8de77adbf8a502564d2263d17479d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 12 May 2016 18:57:53 +0800 Subject: [PATCH 204/501] [utils] Unquote crendentials passed to SOCKS proxies Fixes #9450 --- youtube_dl/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6592c8ec2..d6f94f8cd 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -50,6 +50,7 @@ from .compat import ( compat_urllib_parse, compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, + compat_urllib_parse_unquote_plus, compat_urllib_request, compat_urlparse, compat_xpath, @@ -886,7 +887,8 @@ def make_socks_conn_class(base_class, socks_proxy): socks_type, url_components.hostname, url_components.port or 1080, True, # Remote DNS - url_components.username, url_components.password + compat_urllib_parse_unquote_plus(url_components.username), + compat_urllib_parse_unquote_plus(url_components.password), ) class SocksConnection(base_class): From 0db3a66162cf1059dbfccd60db350596f7c5b469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 12 May 2016 23:57:52 +0600 Subject: [PATCH 205/501] [twitch] Skip dead tests --- youtube_dl/extractor/twitch.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 36ee1adff..68f50487b 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -171,6 +171,7 @@ class TwitchVideoIE(TwitchItemBaseIE): 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG', }, 'playlist_mincount': 12, + 'skip': 'HTTP Error 404: Not Found', } @@ -187,6 +188,7 @@ class TwitchChapterIE(TwitchItemBaseIE): 'title': 'ACRL Off Season - Sports Cars @ Nordschleife', }, 'playlist_mincount': 3, + 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361', 'only_matching': True, @@ -368,6 +370,7 @@ class TwitchBookmarksIE(TwitchPlaylistBaseIE): 'title': 'Ognos', }, 'playlist_mincount': 3, + 'skip': 'HTTP Error 404: Not Found', } def _extract_playlist_page(self, response): From 0df79d552a6d528ac5bb1a9cce99199aafe79144 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 13 May 2016 00:14:30 +0600 Subject: [PATCH 206/501] [twitch:bookmarks] Remove extractor Bookmarks no longer available --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/twitch.py | 26 -------------------------- 2 files changed, 27 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a0bb3d4c2..f2bd4fe97 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -834,7 +834,6 @@ from .twitch import ( TwitchVodIE, TwitchProfileIE, TwitchPastBroadcastsIE, - TwitchBookmarksIE, TwitchStreamIE, ) from .twitter import ( diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 68f50487b..f7b98e190 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -357,32 +357,6 @@ class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE): } -class TwitchBookmarksIE(TwitchPlaylistBaseIE): - IE_NAME = 'twitch:bookmarks' - _VALID_URL = r'%s/(?P<id>[^/]+)/profile/bookmarks/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE - _PLAYLIST_URL = '%s/api/bookmark/?user=%%s&offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE - _PLAYLIST_TYPE = 'bookmarks' - - _TEST = { - 'url': 'http://www.twitch.tv/ognos/profile/bookmarks', - 'info_dict': { - 'id': 'ognos', - 'title': 'Ognos', - }, - 'playlist_mincount': 3, - 'skip': 'HTTP Error 404: Not Found', - } - - def _extract_playlist_page(self, response): - entries = [] - for bookmark in response.get('bookmarks', []): - video = bookmark.get('video') - if not video: - continue - entries.append(video['url']) - return entries - - class TwitchStreamIE(TwitchBaseIE): IE_NAME = 'twitch:stream' _VALID_URL = r'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE From d8d540cf0d11dbf7b3d9de611470fc7114c8d1ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 13 May 2016 02:07:12 +0600 Subject: [PATCH 207/501] [nrk] Rework extractor (Closes #9470) --- youtube_dl/extractor/nrk.py | 435 ++++++++++++++++-------------------- 1 file changed, 196 insertions(+), 239 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 51dfc27ac..f0fbdd8be 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -4,91 +4,224 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urlparse, - compat_urllib_parse_unquote, -) +from ..compat import compat_urllib_parse_unquote from ..utils import ( - determine_ext, ExtractorError, - float_or_none, + int_or_none, + parse_age_limit, parse_duration, - unified_strdate, ) -class NRKIE(InfoExtractor): - _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)' - - _TESTS = [ - { - 'url': 'http://www.nrk.no/video/PS*150533', - # MD5 is unstable - 'info_dict': { - 'id': '150533', - 'ext': 'flv', - 'title': 'Dompap og andre fugler i Piip-Show', - 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', - 'duration': 263, - } - }, - { - 'url': 'http://www.nrk.no/video/PS*154915', - # MD5 is unstable - 'info_dict': { - 'id': '154915', - 'ext': 'flv', - 'title': 'Slik høres internett ut når du er blind', - 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', - 'duration': 20, - } - }, - ] +class NRKBaseIE(InfoExtractor): + def _extract_formats(self, manifest_url, video_id, fatal=True): + return self._extract_f4m_formats( + manifest_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', + video_id, f4m_id='hds', fatal=fatal) def _real_extract(self, url): video_id = self._match_id(url) data = self._download_json( - 'http://v8.psapi.nrk.no/mediaelement/%s' % video_id, - video_id, 'Downloading media JSON') + 'http://%s/mediaelement/%s' % (self._API_HOST, video_id), + video_id, 'Downloading mediaelement JSON') - media_url = data.get('mediaUrl') + title = data.get('fullTitle') or data.get('mainTitle') or data['title'] + video_id = data.get('id') or video_id - if not media_url: - if data['usageRights']['isGeoBlocked']: + entries = [] + + media_assets = data.get('mediaAssets') + if media_assets and isinstance(media_assets, list): + def video_id_and_title(idx): + return ((video_id, title) if len(media_assets) == 1 + else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx))) + for num, asset in enumerate(media_assets, 1): + asset_url = asset.get('url') + if not asset_url: + continue + formats = self._extract_formats(asset_url, video_id, fatal=False) + if not formats: + continue + self._sort_formats(formats) + entry_id, entry_title = video_id_and_title(num) + duration = parse_duration(asset.get('duration')) + subtitles = {} + for subtitle in ('webVtt', 'timedText'): + subtitle_url = asset.get('%sSubtitlesUrl' % subtitle) + if subtitle_url: + subtitles.setdefault('no', []).append({'url': subtitle_url}) + entries.append({ + 'id': asset.get('carrierId') or entry_id, + 'title': entry_title, + 'duration': duration, + 'subtitles': subtitles, + 'formats': formats, + }) + + if not entries: + media_url = data.get('mediaUrl') + if media_url: + formats = self._extract_formats(media_url, video_id) + self._sort_formats(formats) + duration = parse_duration(data.get('duration')) + entries = [{ + 'id': video_id, + 'title': title, + 'duration': duration, + 'formats': formats, + }] + + if not entries: + if data.get('usageRights', {}).get('isGeoBlocked'): raise ExtractorError( 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', expected=True) - if determine_ext(media_url) == 'f4m': - formats = self._extract_f4m_formats( - media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds') - self._sort_formats(formats) - else: - formats = [{ - 'url': media_url, - 'ext': 'flv', - }] - - duration = parse_duration(data.get('duration')) + conviva = data.get('convivaStatistics') or {} + series = conviva.get('seriesName') or data.get('seriesTitle') + episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') + thumbnails = None images = data.get('images') - if images: - thumbnails = images['webImages'] - thumbnails.sort(key=lambda image: image['pixelWidth']) - thumbnail = thumbnails[-1]['imageUrl'] - else: - thumbnail = None + if images and isinstance(images, dict): + web_images = images.get('webImages') + if isinstance(web_images, list): + thumbnails = [{ + 'url': image['imageUrl'], + 'width': int_or_none(image.get('width')), + 'height': int_or_none(image.get('height')), + } for image in web_images if image.get('imageUrl')] - return { - 'id': video_id, - 'title': data['title'], - 'description': data['description'], - 'duration': duration, - 'thumbnail': thumbnail, - 'formats': formats, + description = data.get('description') + + common_info = { + 'description': description, + 'series': series, + 'episode': episode, + 'age_limit': parse_age_limit(data.get('legalAge')), + 'thumbnails': thumbnails, } + vcodec = 'none' if data.get('mediaType') == 'Audio' else None + + # TODO: extract chapters when https://github.com/rg3/youtube-dl/pull/9409 is merged + + for entry in entries: + entry.update(common_info) + for f in entry['formats']: + f['vcodec'] = vcodec + + return self.playlist_result(entries, video_id, title, description) + + +class NRKIE(NRKBaseIE): + _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)' + _API_HOST = 'v8.psapi.nrk.no' + _TESTS = [{ + # video + 'url': 'http://www.nrk.no/video/PS*150533', + # MD5 is unstable + 'info_dict': { + 'id': '150533', + 'ext': 'flv', + 'title': 'Dompap og andre fugler i Piip-Show', + 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', + 'duration': 263, + } + }, { + # audio + 'url': 'http://www.nrk.no/video/PS*154915', + # MD5 is unstable + 'info_dict': { + 'id': '154915', + 'ext': 'flv', + 'title': 'Slik høres internett ut når du er blind', + 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', + 'duration': 20, + } + }] + + +class NRKTVIE(NRKBaseIE): + IE_DESC = 'NRK TV and NRK Radio' + _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?' + _API_HOST = 'psapi-we.nrk.no' + + _TESTS = [{ + 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', + 'info_dict': { + 'id': 'MUHH48000314', + 'ext': 'mp4', + 'title': '20 spørsmål', + 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', + 'upload_date': '20140523', + 'duration': 1741.52, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://tv.nrk.no/program/mdfp15000514', + 'info_dict': { + 'id': 'mdfp15000514', + 'ext': 'mp4', + 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting', + 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', + 'upload_date': '20140524', + 'duration': 4605.08, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # single playlist video + 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', + 'md5': 'adbd1dbd813edaf532b0a253780719c2', + 'info_dict': { + 'id': 'MSPO40010515-part2', + 'ext': 'flv', + 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', + 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'upload_date': '20150106', + }, + 'skip': 'Only works from Norway', + }, { + 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', + 'playlist': [{ + 'md5': '9480285eff92d64f06e02a5367970a7a', + 'info_dict': { + 'id': 'MSPO40010515-part1', + 'ext': 'flv', + 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', + 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'upload_date': '20150106', + }, + }, { + 'md5': 'adbd1dbd813edaf532b0a253780719c2', + 'info_dict': { + 'id': 'MSPO40010515-part2', + 'ext': 'flv', + 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', + 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'upload_date': '20150106', + }, + }], + 'info_dict': { + 'id': 'MSPO40010515', + 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', + 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'upload_date': '20150106', + 'duration': 6947.52, + }, + 'skip': 'Only works from Norway', + }, { + 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#', + 'only_matching': True, + }] + class NRKPlaylistIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)' @@ -159,179 +292,3 @@ class NRKSkoleIE(InfoExtractor): nrk_id = self._search_regex(r'data-nrk-id=["\'](\d+)', webpage, 'nrk id') return self.url_result('nrk:%s' % nrk_id) - - -class NRKTVIE(InfoExtractor): - IE_DESC = 'NRK TV and NRK Radio' - _VALID_URL = r'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?' - - _TESTS = [ - { - 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', - 'info_dict': { - 'id': 'MUHH48000314', - 'ext': 'mp4', - 'title': '20 spørsmål', - 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', - 'upload_date': '20140523', - 'duration': 1741.52, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'https://tv.nrk.no/program/mdfp15000514', - 'info_dict': { - 'id': 'mdfp15000514', - 'ext': 'mp4', - 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting', - 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', - 'upload_date': '20140524', - 'duration': 4605.08, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - # single playlist video - 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', - 'md5': 'adbd1dbd813edaf532b0a253780719c2', - 'info_dict': { - 'id': 'MSPO40010515-part2', - 'ext': 'flv', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', - }, - 'skip': 'Only works from Norway', - }, - { - 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', - 'playlist': [ - { - 'md5': '9480285eff92d64f06e02a5367970a7a', - 'info_dict': { - 'id': 'MSPO40010515-part1', - 'ext': 'flv', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', - }, - }, - { - 'md5': 'adbd1dbd813edaf532b0a253780719c2', - 'info_dict': { - 'id': 'MSPO40010515-part2', - 'ext': 'flv', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', - }, - }, - ], - 'info_dict': { - 'id': 'MSPO40010515', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', - 'duration': 6947.5199999999995, - }, - 'skip': 'Only works from Norway', - }, - { - 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#', - 'only_matching': True, - } - ] - - def _extract_f4m(self, manifest_url, video_id): - return self._extract_f4m_formats( - manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id, f4m_id='hds') - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - part_id = mobj.group('part_id') - base_url = mobj.group('baseurl') - - webpage = self._download_webpage(url, video_id) - - title = self._html_search_meta( - 'title', webpage, 'title') - description = self._html_search_meta( - 'description', webpage, 'description') - - thumbnail = self._html_search_regex( - r'data-posterimage="([^"]+)"', - webpage, 'thumbnail', fatal=False) - upload_date = unified_strdate(self._html_search_meta( - 'rightsfrom', webpage, 'upload date', fatal=False)) - duration = float_or_none(self._html_search_regex( - r'data-duration="([^"]+)"', - webpage, 'duration', fatal=False)) - - # playlist - parts = re.findall( - r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage) - if parts: - entries = [] - for current_part_id, stream_url, part_title in parts: - if part_id and current_part_id != part_id: - continue - video_part_id = '%s-part%s' % (video_id, current_part_id) - formats = self._extract_f4m(stream_url, video_part_id) - entries.append({ - 'id': video_part_id, - 'title': part_title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'formats': formats, - }) - if part_id: - if entries: - return entries[0] - else: - playlist = self.playlist_result(entries, video_id, title, description) - playlist.update({ - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'duration': duration, - }) - return playlist - - formats = [] - - f4m_url = re.search(r'data-media="([^"]+)"', webpage) - if f4m_url: - formats.extend(self._extract_f4m(f4m_url.group(1), video_id)) - - m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage) - if m3u8_url: - formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4', m3u8_id='hls')) - self._sort_formats(formats) - - subtitles_url = self._html_search_regex( - r'data-subtitlesurl\s*=\s*(["\'])(?P<url>.+?)\1', - webpage, 'subtitle URL', default=None, group='url') - subtitles = {} - if subtitles_url: - subtitles['no'] = [{ - 'ext': 'ttml', - 'url': compat_urlparse.urljoin(base_url, subtitles_url), - }] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - } From b9e7bc55da1c1275737b356efadc06435b8bfa2c Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 12 May 2016 22:45:54 +0100 Subject: [PATCH 208/501] [mgtv] extract http formats --- youtube_dl/extractor/mgtv.py | 43 ++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index a14d176a5..9fbc74f5d 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -11,7 +11,7 @@ class MGTVIE(InfoExtractor): _TEST = { 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', - 'md5': '', + 'md5': '1bdadcf760a0b90946ca68ee9a2db41a', 'info_dict': { 'id': '3116640', 'ext': 'mp4', @@ -20,15 +20,6 @@ class MGTVIE(InfoExtractor): 'duration': 7461, 'thumbnail': 're:^https?://.*\.jpg$', }, - 'params': { - 'skip_download': True, # m3u8 download - }, - } - - _FORMAT_MAP = { - '标清': ('Standard', 0), - '高清': ('High', 1), - '超清': ('SuperHigh', 2), } def _real_extract(self, url): @@ -40,17 +31,27 @@ class MGTVIE(InfoExtractor): formats = [] for idx, stream in enumerate(api_data['stream']): - format_name = stream.get('name') - format_id, preference = self._FORMAT_MAP.get(format_name, (None, None)) - format_info = self._download_json( - stream['url'], video_id, - note='Download video info for format %s' % format_id or '#%d' % idx) - formats.append({ - 'format_id': format_id, - 'url': format_info['info'], - 'ext': 'mp4', # These are m3u8 playlists - 'preference': preference, - }) + stream_url = stream.get('url') + if not stream_url: + continue + tbr = int_or_none(self._search_regex( + r'(\d+)\.mp4', stream_url, 'tbr', default=None)) + + def extract_format(stream_url, format_id, idx, query={}): + format_info = self._download_json( + stream_url, video_id, + note='Download video info for format %s' % format_id or '#%d' % idx, query=query) + return { + 'format_id': format_id, + 'url': format_info['info'], + 'ext': 'mp4', + 'tbr': tbr, + } + + formats.append(extract_format( + stream_url, 'hls-%d' % tbr if tbr else None, idx * 2)) + formats.append(extract_format(stream_url.replace( + '/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031})) self._sort_formats(formats) return { From 99d79b8692ae8981aff91cf5b1475516b60eb765 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 13 May 2016 05:21:45 +0100 Subject: [PATCH 209/501] [ustudio] add support ustudio app/embed urls --- youtube_dl/extractor/ustudio.py | 66 +++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ustudio.py b/youtube_dl/extractor/ustudio.py index cafc082b6..3484a2046 100644 --- a/youtube_dl/extractor/ustudio.py +++ b/youtube_dl/extractor/ustudio.py @@ -6,10 +6,12 @@ from .common import InfoExtractor from ..utils import ( int_or_none, unified_strdate, + unescapeHTML, ) class UstudioIE(InfoExtractor): + IE_NAME = 'ustudio' _VALID_URL = r'https?://(?:(?:www|v1)\.)?ustudio\.com/video/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)' _TEST = { 'url': 'http://ustudio.com/video/Uxu2my9bgSph/san_francisco_golden_gate_bridge', @@ -27,9 +29,7 @@ class UstudioIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') + video_id, display_id = re.match(self._VALID_URL, url).groups() config = self._download_xml( 'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id, @@ -37,7 +37,7 @@ class UstudioIE(InfoExtractor): def extract(kind): return [{ - 'url': item.attrib['url'], + 'url': unescapeHTML(item.attrib['url']), 'width': int_or_none(item.get('width')), 'height': int_or_none(item.get('height')), } for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')] @@ -65,3 +65,61 @@ class UstudioIE(InfoExtractor): 'uploader': uploader, 'formats': formats, } + + +class UstudioEmbedIE(InfoExtractor): + IE_NAME = 'ustudio:embed' + _VALID_URL = r'https?://(?:(?:app|embed)\.)?ustudio\.com/embed/(?P<uid>[^/]+)/(?P<id>[^/]+)' + _TEST = { + 'url': 'http://app.ustudio.com/embed/DeN7VdYRDKhP/Uw7G1kMCe65T', + 'md5': '47c0be52a09b23a7f40de9469cec58f4', + 'info_dict': { + 'id': 'Uw7G1kMCe65T', + 'ext': 'mp4', + 'title': '5 Things IT Should Know About Video', + 'description': 'md5:93d32650884b500115e158c5677d25ad', + 'uploader_id': 'DeN7VdYRDKhP', + } + } + + def _real_extract(self, url): + uploader_id, video_id = re.match(self._VALID_URL, url).groups() + video_data = self._download_json( + 'http://app.ustudio.com/embed/%s/%s/config.json' % (uploader_id, video_id), + video_id)['videos'][0] + title = video_data['name'] + + formats = [] + for ext, qualities in video_data.get('transcodes', {}).items(): + for quality in qualities: + quality_url = quality.get('url') + if not quality_url: + continue + height = int_or_none(quality.get('height')) + formats.append({ + 'format_id': '%s-%dp' % (ext, height) if height else ext, + 'url': quality_url, + 'width': int_or_none(quality.get('width')), + 'height': height, + }) + self._sort_formats(formats) + + thumbnails = [] + for image in video_data.get('images', []): + image_url = image.get('url') + if not image_url: + continue + thumbnails.append({ + 'url': image_url, + }) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'duration': int_or_none(video_data.get('duration')), + 'uploader_id': uploader_id, + 'tags': video_data.get('keywords'), + 'thumbnails': thumbnails, + 'formats': formats, + } From cdf32ff15d6fc9d1902bfb3ed10a582070d20cd9 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 13 May 2016 05:25:32 +0100 Subject: [PATCH 210/501] [extractors] add import for UstudioEmbedIE --- youtube_dl/extractor/extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f2bd4fe97..50d2204f2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -851,7 +851,10 @@ from .unistra import UnistraIE from .urort import UrortIE from .usatoday import USATodayIE from .ustream import UstreamIE, UstreamChannelIE -from .ustudio import UstudioIE +from .ustudio import ( + UstudioIE, + UstudioEmbedIE, +) from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veehd import VeeHDIE From 18cf6381f6b140431f3a747fc2d222be08ab2e23 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 13 May 2016 08:05:28 +0100 Subject: [PATCH 211/501] [nrk] extract m3u8 formats --- youtube_dl/extractor/nrk.py | 39 +++++++++++++++---------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index f0fbdd8be..7532f40c1 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -15,9 +15,14 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): def _extract_formats(self, manifest_url, video_id, fatal=True): - return self._extract_f4m_formats( + formats = [] + formats.extend(self._extract_f4m_formats( manifest_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', - video_id, f4m_id='hds', fatal=fatal) + video_id, f4m_id='hds', fatal=fatal)) + formats.extend(self._extract_m3u8_formats(manifest_url.replace( + 'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'), + video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=fatal)) + return formats def _real_extract(self, url): video_id = self._match_id(url) @@ -121,10 +126,10 @@ class NRKIE(NRKBaseIE): _TESTS = [{ # video 'url': 'http://www.nrk.no/video/PS*150533', - # MD5 is unstable + 'md5': '2f7f6eeb2aacdd99885f355428715cfa', 'info_dict': { 'id': '150533', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Dompap og andre fugler i Piip-Show', 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', 'duration': 263, @@ -150,32 +155,24 @@ class NRKTVIE(NRKBaseIE): _TESTS = [{ 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', + 'md5': '4e9ca6629f09e588ed240fb11619922a', 'info_dict': { - 'id': 'MUHH48000314', + 'id': 'MUHH48000314AA', 'ext': 'mp4', - 'title': '20 spørsmål', + 'title': '20 spørsmål 23.05.2014', 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', - 'upload_date': '20140523', 'duration': 1741.52, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, }, { 'url': 'https://tv.nrk.no/program/mdfp15000514', + 'md5': '43d0be26663d380603a9cf0c24366531', 'info_dict': { - 'id': 'mdfp15000514', + 'id': 'MDFP15000514CA', 'ext': 'mp4', - 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting', - 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', - 'upload_date': '20140524', + 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014', + 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db', 'duration': 4605.08, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, }, { # single playlist video 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', @@ -185,7 +182,6 @@ class NRKTVIE(NRKBaseIE): 'ext': 'flv', 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', }, 'skip': 'Only works from Norway', }, { @@ -197,7 +193,6 @@ class NRKTVIE(NRKBaseIE): 'ext': 'flv', 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', }, }, { 'md5': 'adbd1dbd813edaf532b0a253780719c2', @@ -206,14 +201,12 @@ class NRKTVIE(NRKBaseIE): 'ext': 'flv', 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', }, }], 'info_dict': { 'id': 'MSPO40010515', 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', 'duration': 6947.52, }, 'skip': 'Only works from Norway', From ad55e101651edc732acac22cfb25d276d6c8bdca Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 13 May 2016 08:35:38 +0100 Subject: [PATCH 212/501] [brightcove] change the protocol for m3u8 formats to m3u8_native --- youtube_dl/extractor/brightcove.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index f0781fc27..fc7fc5b16 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -307,9 +307,10 @@ class BrightcoveLegacyIE(InfoExtractor): playlist_title=playlist_info['mediaCollectionDTO']['displayName']) def _extract_video_info(self, video_info): + video_id = compat_str(video_info['id']) publisher_id = video_info.get('publisherId') info = { - 'id': compat_str(video_info['id']), + 'id': video_id, 'title': video_info['displayName'].strip(), 'description': video_info.get('shortDescription'), 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), @@ -331,7 +332,8 @@ class BrightcoveLegacyIE(InfoExtractor): url_comp = compat_urllib_parse_urlparse(url) if url_comp.path.endswith('.m3u8'): formats.extend( - self._extract_m3u8_formats(url, info['id'], 'mp4')) + self._extract_m3u8_formats( + url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) continue elif 'akamaihd.net' in url_comp.netloc: # This type of renditions are served through @@ -365,7 +367,7 @@ class BrightcoveLegacyIE(InfoExtractor): a_format.update({ 'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''), 'ext': 'mp4', - 'protocol': 'm3u8', + 'protocol': 'm3u8_native', }) formats.append(a_format) @@ -395,7 +397,7 @@ class BrightcoveLegacyIE(InfoExtractor): return ad_info if 'url' not in info and not info.get('formats'): - raise ExtractorError('Unable to extract video url for %s' % info['id']) + raise ExtractorError('Unable to extract video url for %s' % video_id) return info @@ -527,7 +529,7 @@ class BrightcoveNewIE(InfoExtractor): if not src: continue formats.extend(self._extract_m3u8_formats( - src, video_id, 'mp4', m3u8_id='hls', fatal=False)) + src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif source_type == 'application/dash+xml': if not src: continue From cc1028aa6d27aeec39617d1ff8d2edcf1ee989d7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 13 May 2016 18:11:08 +0800 Subject: [PATCH 213/501] [openload] Fix extraction (closes #9472) --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 456561bcc..5049b870e 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -100,7 +100,7 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True) code = self._search_regex( - r'<video[^>]+>\s*<script[^>]+>([^<]+)</script>', + r'</video>\s*</div>\s*<script[^>]+>([^<]+)</script>', webpage, 'JS code') decoded = self.openload_decode(code) From f196508f7b872963d13bcff94c0105d743322f71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 13 May 2016 22:19:00 +0600 Subject: [PATCH 214/501] [imdb] Relax _VALID_URL (Closes #9481) --- youtube_dl/extractor/imdb.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 8bed8ccd0..203156229 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -12,9 +12,9 @@ from ..utils import ( class ImdbIE(InfoExtractor): IE_NAME = 'imdb' IE_DESC = 'Internet Movie Database trailers' - _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)' + _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/[^/]+/vi(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.imdb.com/video/imdb/vi2524815897', 'info_dict': { 'id': '2524815897', @@ -22,7 +22,10 @@ class ImdbIE(InfoExtractor): 'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb', 'description': 'md5:9061c2219254e5d14e03c25c98e96a81', } - } + }, { + 'url': 'http://www.imdb.com/video/_/vi2524815897', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 96c2e3e909171d103beafd1fd88e9d6e215681c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 13 May 2016 23:25:05 +0600 Subject: [PATCH 215/501] [imdb] Improve extraction --- youtube_dl/extractor/imdb.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 203156229..3a2b7cec5 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -1,10 +1,10 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( + mimetype2ext, qualities, ) @@ -51,13 +51,27 @@ class ImdbIE(InfoExtractor): json_data = self._search_regex( r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>', format_page, 'json data', flags=re.DOTALL) - info = json.loads(json_data) - format_info = info['videoPlayerObject']['video'] - f_id = format_info['ffname'] + info = self._parse_json(json_data, video_id, fatal=False) + if not info: + continue + format_info = info.get('videoPlayerObject', {}).get('video', {}) + if not format_info: + continue + video_info_list = format_info.get('videoInfoList') + if not video_info_list or not isinstance(video_info_list, list): + continue + video_info = video_info_list[0] + if not video_info or not isinstance(video_info, dict): + continue + video_url = video_info.get('videoUrl') + if not video_url: + continue + format_id = format_info.get('ffname') formats.append({ - 'format_id': f_id, - 'url': format_info['videoInfoList'][0]['videoUrl'], - 'quality': quality(f_id), + 'format_id': format_id, + 'url': video_url, + 'ext': mimetype2ext(video_info.get('videoMimeType')), + 'quality': quality(format_id), }) self._sort_formats(formats) From 0730be9022b415738e917c4cf72c2347ff0008e0 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 13 May 2016 20:24:36 +0100 Subject: [PATCH 216/501] [sina] fix extraction(fixes #1146) --- youtube_dl/extractor/sina.py | 124 ++++++++++++++++++++++++----------- 1 file changed, 84 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py index d03f1b1d4..8fc66732a 100644 --- a/youtube_dl/extractor/sina.py +++ b/youtube_dl/extractor/sina.py @@ -4,28 +4,35 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode -from ..utils import sanitized_Request +from ..utils import ( + HEADRequest, + ExtractorError, + int_or_none, + update_url_query, + qualities, + get_element_by_attribute, + clean_html, +) class SinaIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/ - ( - (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-)))) - | + _VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/ + (?: + (?:view/|.*\#)(?P<video_id>\d+)| + .+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)| # This is used by external sites like Weibo - (api/sinawebApi/outplay.php/(?P<token>.+?)\.swf) + api/sinawebApi/outplay.php/(?P<token>.+?)\.swf ) ''' _TESTS = [ { - 'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898', - 'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f', + 'url': 'http://video.sina.com.cn/news/spj/topvideoes20160504/?opsubject_id=top1#250576622', + 'md5': 'd38433e2fc886007729735650ae4b3e9', 'info_dict': { - 'id': '110028898', - 'ext': 'flv', - 'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员', + 'id': '250576622', + 'ext': 'mp4', + 'title': '现场:克鲁兹宣布退选 特朗普将稳获提名', } }, { @@ -35,37 +42,74 @@ class SinaIE(InfoExtractor): 'ext': 'flv', 'title': '军方提高对朝情报监视级别', }, + 'skip': 'the page does not exist or has been deleted', + }, + { + 'url': 'http://video.sina.com.cn/view/250587748.html', + 'md5': '3d1807a25c775092aab3bc157fff49b4', + 'info_dict': { + 'id': '250587748', + 'ext': 'mp4', + 'title': '瞬间泪目:8年前汶川地震珍贵视频首曝光', + }, }, ] - def _extract_video(self, video_id): - data = compat_urllib_parse_urlencode({'vid': video_id}) - url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data, - video_id, 'Downloading video url') - image_page = self._download_webpage( - 'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data, - video_id, 'Downloading thumbnail info') - - return {'id': video_id, - 'url': url_doc.find('./durl/url').text, - 'ext': 'flv', - 'title': url_doc.find('./vname').text, - 'thumbnail': image_page.split('=')[1], - } - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - if mobj.group('token') is not None: - # The video id is in the redirected url - self.to_screen('Getting video id') - request = sanitized_Request(url) - request.get_method = lambda: 'HEAD' - (_, urlh) = self._download_webpage_handle(request, 'NA', False) - return self._real_extract(urlh.geturl()) - elif video_id is None: - pseudo_id = mobj.group('pseudo_id') - webpage = self._download_webpage(url, pseudo_id) - video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, 'video id') - return self._extract_video(video_id) + video_id = mobj.group('video_id') + if not video_id: + if mobj.group('token') is not None: + # The video id is in the redirected url + self.to_screen('Getting video id') + request = HEADRequest(url) + (_, urlh) = self._download_webpage_handle(request, 'NA', False) + return self._real_extract(urlh.geturl()) + else: + pseudo_id = mobj.group('pseudo_id') + webpage = self._download_webpage(url, pseudo_id) + error = get_element_by_attribute('class', 'errtitle', webpage) + if error: + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, clean_html(error)), expected=True) + video_id = self._search_regex( + r"video_id\s*:\s*'(\d+)'", webpage, 'video id') + + video_data = self._download_json( + 'http://s.video.sina.com.cn/video/h5play', + video_id, query={'video_id': video_id}) + if video_data['code'] != 1: + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, video_data['message']), expected=True) + else: + video_data = video_data['data'] + title = video_data['title'] + description = video_data.get('description') + if description: + description = description.strip() + + preference = qualities(['cif', 'sd', 'hd', 'fhd', 'ffd']) + formats = [] + for quality_id, quality in video_data.get('videos', {}).get('mp4', {}).items(): + file_api = quality.get('file_api') + file_id = quality.get('file_id') + if not file_api or not file_id: + continue + formats.append({ + 'format_id': quality_id, + 'url': update_url_query(file_api, {'vid': file_id}), + 'preference': preference(quality_id), + 'ext': 'mp4', + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': video_data.get('image'), + 'duration': int_or_none(video_data.get('length')), + 'timestamp': int_or_none(video_data.get('create_time')), + 'formats': formats, + } From 134c6ea856be472f253bffbe99b88546fe417806 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 04:46:38 +0600 Subject: [PATCH 217/501] [YoutubeDL] Sanitize url for url and url_transparent extraction results --- youtube_dl/YoutubeDL.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 34eeb77c5..03a6a1890 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -720,6 +720,7 @@ class YoutubeDL(object): result_type = ie_result.get('_type', 'video') if result_type in ('url', 'url_transparent'): + ie_result['url'] = sanitize_url(ie_result['url']) extract_flat = self.params.get('extract_flat', False) if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or extract_flat is True): From b5abf8614898cc728488d7ecc7a55a4c5c92758f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 04:53:14 +0600 Subject: [PATCH 218/501] [cinemassacre] Remove extractor (Closes #9457) It now uses jwplatform --- youtube_dl/extractor/cinemassacre.py | 119 --------------------------- youtube_dl/extractor/extractors.py | 1 - 2 files changed, 120 deletions(-) delete mode 100644 youtube_dl/extractor/cinemassacre.py diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py deleted file mode 100644 index 042c4f2f1..000000000 --- a/youtube_dl/extractor/cinemassacre.py +++ /dev/null @@ -1,119 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ExtractorError -from .screenwavemedia import ScreenwaveMediaIE - - -class CinemassacreIE(InfoExtractor): - _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)' - _TESTS = [ - { - 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', - 'md5': 'fde81fbafaee331785f58cd6c0d46190', - 'info_dict': { - 'id': 'Cinemassacre-19911', - 'ext': 'mp4', - 'upload_date': '20121110', - 'title': '“Angry Video Game Nerd: The Movie” – Trailer', - 'description': 'md5:fb87405fcb42a331742a0dce2708560b', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', - 'md5': 'd72f10cd39eac4215048f62ab477a511', - 'info_dict': { - 'id': 'Cinemassacre-521be8ef82b16', - 'ext': 'mp4', - 'upload_date': '20131002', - 'title': 'The Mummy’s Hand (1940)', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - # Youtube embedded video - 'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/', - 'md5': 'ec9838a5520ef5409b3e4e42fcb0a3b9', - 'info_dict': { - 'id': 'OEVzPCY2T-g', - 'ext': 'webm', - 'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles', - 'upload_date': '20061207', - 'uploader': 'Cinemassacre', - 'uploader_id': 'JamesNintendoNerd', - 'description': 'md5:784734696c2b8b7f4b8625cc799e07f6', - } - }, - { - # Youtube embedded video - 'url': 'http://cinemassacre.com/2006/09/01/mckids/', - 'md5': '7393c4e0f54602ad110c793eb7a6513a', - 'info_dict': { - 'id': 'FnxsNhuikpo', - 'ext': 'webm', - 'upload_date': '20060901', - 'uploader': 'Cinemassacre Extra', - 'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53', - 'uploader_id': 'Cinemassacre', - 'title': 'AVGN: McKids', - } - }, - { - 'url': 'http://cinemassacre.com/2015/05/25/mario-kart-64-nintendo-64-james-mike-mondays/', - 'md5': '1376908e49572389e7b06251a53cdd08', - 'info_dict': { - 'id': 'Cinemassacre-555779690c440', - 'ext': 'mp4', - 'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!', - 'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays', - 'upload_date': '20150525', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d') - - webpage = self._download_webpage(url, display_id) - - playerdata_url = self._search_regex( - [ - ScreenwaveMediaIE.EMBED_PATTERN, - r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"', - ], - webpage, 'player data URL', default=None, group='url') - if not playerdata_url: - raise ExtractorError('Unable to find player data') - - video_title = self._html_search_regex( - r'<title>(?P<title>.+?)\|', webpage, 'title') - video_description = self._html_search_regex( - r'<div class="entry-content">(?P<description>.+?)</div>', - webpage, 'description', flags=re.DOTALL, fatal=False) - video_thumbnail = self._og_search_thumbnail(webpage) - - return { - '_type': 'url_transparent', - 'display_id': display_id, - 'title': video_title, - 'description': video_description, - 'upload_date': video_date, - 'thumbnail': video_thumbnail, - 'url': playerdata_url, - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 50d2204f2..b6f4ccc5d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -124,7 +124,6 @@ from .chirbit import ( ChirbitProfileIE, ) from .cinchcast import CinchcastIE -from .cinemassacre import CinemassacreIE from .cliprs import ClipRsIE from .clipfish import ClipfishIE from .cliphunter import CliphunterIE From 98d560f205e6aeddc767844d142b00525a9eaff9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 14 May 2016 18:48:36 +0800 Subject: [PATCH 219/501] [test/test_socks] Skip SOCKS tests They occasional trigger errors or blocks (https://travis-ci.org/rg3/youtube-dl/jobs/130184883) --- test/test_socks.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/test_socks.py b/test/test_socks.py index d07003ceb..1e68eb0da 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -77,17 +77,28 @@ class TestMultipleSocks(unittest.TestCase): class TestSocks(unittest.TestCase): + _SKIP_SOCKS_TEST = True + def setUp(self): + if self._SKIP_SOCKS_TEST: + return + self.port = random.randint(20000, 30000) self.server_process = subprocess.Popen([ 'srelay', '-f', '-i', '127.0.0.1:%d' % self.port], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) def tearDown(self): + if self._SKIP_SOCKS_TEST: + return + self.server_process.terminate() self.server_process.communicate() def _get_ip(self, protocol): + if self._SKIP_SOCKS_TEST: + return '127.0.0.1' + ydl = FakeYDL({ 'proxy': '%s://127.0.0.1:%d' % (protocol, self.port), }) From 791ff52f753ee123426766aaa5320eb63a874b7b Mon Sep 17 00:00:00 2001 From: Jakub Wilk <jwilk@jwilk.net> Date: Sat, 14 May 2016 13:19:54 +0200 Subject: [PATCH 220/501] [teamcoco] Fix base64 regexp --- youtube_dl/extractor/teamcoco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index b49ab5f5b..79a778920 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -88,7 +88,7 @@ class TeamcocoIE(InfoExtractor): preload_codes = self._html_search_regex( r'(function.+)setTimeout\(function\(\)\{playlist', webpage, 'preload codes') - base64_fragments = re.findall(r'"([a-zA-z0-9+/=]+)"', preload_codes) + base64_fragments = re.findall(r'"([a-zA-Z0-9+/=]+)"', preload_codes) base64_fragments.remove('init') def _check_sequence(cur_fragments): From 66e7ace17a36ed0f761ae620801e9e27d5c3cb3f Mon Sep 17 00:00:00 2001 From: Jakub Wilk <jwilk@jwilk.net> Date: Sat, 14 May 2016 13:41:41 +0200 Subject: [PATCH 221/501] Don't hardcode errno constant The value of ENOENT is architecture-dependent, so don't assume it's always 2. --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 03a6a1890..3917ca9dc 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -326,7 +326,7 @@ class YoutubeDL(object): ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) self._output_channel = os.fdopen(master, 'rb') except OSError as ose: - if ose.errno == 2: + if ose.errno == errno.ENOENT: self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.') else: raise From bd1e484448c84904ce0d99fe05c3721053aa3c00 Mon Sep 17 00:00:00 2001 From: felix <felix.von.s@posteo.de> Date: Sun, 13 Mar 2016 12:29:15 +0100 Subject: [PATCH 222/501] [utils] js_to_json: various improvements now JS object literals like { /* " */ 0: ",]\xaa<\/p>", } will be correctly converted to JSON. --- test/test_utils.py | 12 ++++++++++++ youtube_dl/utils.py | 30 ++++++++++++++++-------------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index ca254779f..ab2842f3b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -640,6 +640,18 @@ class TestUtil(unittest.TestCase): on = js_to_json('{"abc": "def",}') self.assertEqual(json.loads(on), {'abc': 'def'}) + on = js_to_json('{ 0: /* " \n */ ",]" , }') + self.assertEqual(json.loads(on), {'0': ',]'}) + + on = js_to_json(r'["<p>x<\/p>"]') + self.assertEqual(json.loads(on), ['<p>x</p>']) + + on = js_to_json(r'["\xaa"]') + self.assertEqual(json.loads(on), ['\u00aa']) + + on = js_to_json("['a\\\nb']") + self.assertEqual(json.loads(on), ['ab']) + def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d6f94f8cd..52a20632f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1914,24 +1914,26 @@ def js_to_json(code): v = m.group(0) if v in ('true', 'false', 'null'): return v - if v.startswith('"'): - v = re.sub(r"\\'", "'", v[1:-1]) - elif v.startswith("'"): - v = v[1:-1] - v = re.sub(r"\\\\|\\'|\"", lambda m: { - '\\\\': '\\\\', - "\\'": "'", + elif v.startswith('/*') or v == ',': + return "" + + if v[0] in ("'", '"'): + v = re.sub(r'(?s)\\.|"', lambda m: { '"': '\\"', - }[m.group(0)], v) + "\\'": "'", + '\\\n': '', + '\\x': '\\u00', + }.get(m.group(0), m.group(0)), v[1:-1]) + return '"%s"' % v - res = re.sub(r'''(?x) - "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"| - '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'| - [a-zA-Z_][.a-zA-Z_0-9]* + return re.sub(r'''(?sx) + "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| + '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| + /\*.*?\*/|,(?=\s*[\]}])| + [a-zA-Z_][.a-zA-Z_0-9]*| + [0-9]+(?=\s*:) ''', fix_kv, code) - res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res) - return res def qualities(quality_ids): From 640eea0a0cf7ae589126f7762e1cfc7bdd2250d9 Mon Sep 17 00:00:00 2001 From: felix <felix.von.s@posteo.de> Date: Sun, 20 Mar 2016 12:17:57 +0100 Subject: [PATCH 223/501] [ora] minimise fragile regex shenanigans; recognise unsafespeech.com URLs --- youtube_dl/extractor/ora.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/ora.py b/youtube_dl/extractor/ora.py index 8545fb1b8..cfae71bcc 100644 --- a/youtube_dl/extractor/ora.py +++ b/youtube_dl/extractor/ora.py @@ -6,13 +6,14 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( get_element_by_attribute, + js_to_json, qualities, unescapeHTML, ) class OraTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ora\.tv/([^/]+/)*(?P<id>[^/\?#]+)' + _VALID_URL = r'https?://(?:www\.)?(ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)' _TEST = { 'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq', 'md5': 'fa33717591c631ec93b04b0e330df786', @@ -28,10 +29,13 @@ class OraTVIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_data = self._search_regex( - r'"(?:video|current)"\s*:\s*({[^}]+?})', webpage, 'current video') - m3u8_url = self._search_regex( - r'hls_stream"?\s*:\s*"([^"]+)', video_data, 'm3u8 url', None) + ora_meta = self._parse_json(self._search_regex( + r'(?s);\s*ora_meta = ({.*?});</script>', webpage, 'ora_meta'), display_id, + transform_source=lambda data: js_to_json(re.sub('":(document|\().*?(:false|\(\)),', '":null,', data))) + + video_data = ora_meta.get('video', ora_meta.get('current')) + m3u8_url = video_data['hls_stream'] + if m3u8_url: formats = self._extract_m3u8_formats( m3u8_url, display_id, 'mp4', 'm3u8_native', @@ -60,13 +64,11 @@ class OraTVIE(InfoExtractor): r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube') return { - 'id': self._search_regex( - r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id), + 'id': video_data.get('id', display_id), 'display_id': display_id, 'title': unescapeHTML(self._og_search_title(webpage)), 'description': get_element_by_attribute( 'class', 'video_txt_decription', webpage), - 'thumbnail': self._proto_relative_url(self._search_regex( - r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)), + 'thumbnail': self._proto_relative_url(video_data.get('thumb')), 'formats': formats, } From 89ac4a19e658203db85c6a1d4b267a2eeb47a38e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 20:39:58 +0600 Subject: [PATCH 224/501] [utils] Process non-base 10 integers in js_to_json --- test/test_utils.py | 19 +++++++++++++++++++ youtube_dl/utils.py | 12 ++++++++++++ 2 files changed, 31 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index ab2842f3b..26f66bff6 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -617,6 +617,15 @@ class TestUtil(unittest.TestCase): json_code = js_to_json(inp) self.assertEqual(json.loads(json_code), json.loads(inp)) + inp = '''{ + 0:{src:'skipped', type: 'application/dash+xml'}, + 1:{src:'skipped', type: 'application/vnd.apple.mpegURL'}, + }''' + self.assertEqual(js_to_json(inp), '''{ + "0":{"src":"skipped", "type": "application/dash+xml"}, + "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"} + }''') + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) @@ -652,6 +661,16 @@ class TestUtil(unittest.TestCase): on = js_to_json("['a\\\nb']") self.assertEqual(json.loads(on), ['ab']) + on = js_to_json('{0xff:0xff}') + self.assertEqual(json.loads(on), {'255': 255}) + + on = js_to_json('{077:077}') + self.assertEqual(json.loads(on), {'63': 63}) + + on = js_to_json('{42:42}') + self.assertEqual(json.loads(on), {'42': 42}) + + def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 52a20632f..25a9f33c0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1925,6 +1925,17 @@ def js_to_json(code): '\\x': '\\u00', }.get(m.group(0), m.group(0)), v[1:-1]) + INTEGER_TABLE = ( + (r'^(0[xX][0-9a-fA-F]+)', 16), + (r'^(0+[0-7]+)', 8), + ) + + for regex, base in INTEGER_TABLE: + im = re.match(regex, v) + if im: + i = int(im.group(1), base) + return '"%d":' % i if v.endswith(':') else '%d' % i + return '"%s"' % v return re.sub(r'''(?sx) @@ -1932,6 +1943,7 @@ def js_to_json(code): '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| /\*.*?\*/|,(?=\s*[\]}])| [a-zA-Z_][.a-zA-Z_0-9]*| + (?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| [0-9]+(?=\s*:) ''', fix_kv, code) From ca950f49e909baf6672034ffc2c1c2ee7133cf23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 20:45:18 +0600 Subject: [PATCH 225/501] [ora] Revert extraction to regexes It's less fragile than using js_to_json with ora js --- youtube_dl/extractor/ora.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/ora.py b/youtube_dl/extractor/ora.py index cfae71bcc..1d42be39b 100644 --- a/youtube_dl/extractor/ora.py +++ b/youtube_dl/extractor/ora.py @@ -6,15 +6,14 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( get_element_by_attribute, - js_to_json, qualities, unescapeHTML, ) class OraTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?(?:ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)' + _TESTS = [{ 'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq', 'md5': 'fa33717591c631ec93b04b0e330df786', 'info_dict': { @@ -23,19 +22,19 @@ class OraTVIE(InfoExtractor): 'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!', 'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1', } - } + }, { + 'url': 'http://www.unsafespeech.com/video/2016/5/10/student-self-censorship-and-the-thought-police-on-university-campuses-0_6622bnkppw4d', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - ora_meta = self._parse_json(self._search_regex( - r'(?s);\s*ora_meta = ({.*?});</script>', webpage, 'ora_meta'), display_id, - transform_source=lambda data: js_to_json(re.sub('":(document|\().*?(:false|\(\)),', '":null,', data))) - - video_data = ora_meta.get('video', ora_meta.get('current')) - m3u8_url = video_data['hls_stream'] - + video_data = self._search_regex( + r'"(?:video|current)"\s*:\s*({[^}]+?})', webpage, 'current video') + m3u8_url = self._search_regex( + r'hls_stream"?\s*:\s*"([^"]+)', video_data, 'm3u8 url', None) if m3u8_url: formats = self._extract_m3u8_formats( m3u8_url, display_id, 'mp4', 'm3u8_native', @@ -64,11 +63,13 @@ class OraTVIE(InfoExtractor): r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube') return { - 'id': video_data.get('id', display_id), + 'id': self._search_regex( + r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id), 'display_id': display_id, 'title': unescapeHTML(self._og_search_title(webpage)), 'description': get_element_by_attribute( 'class', 'video_txt_decription', webpage), - 'thumbnail': self._proto_relative_url(video_data.get('thumb')), + 'thumbnail': self._proto_relative_url(self._search_regex( + r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)), 'formats': formats, } From 364cf465dd53e8006f5523c348f127f8df657bc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 20:46:33 +0600 Subject: [PATCH 226/501] [test_utils] PEP 8 --- test/test_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 26f66bff6..520d32ff5 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -670,7 +670,6 @@ class TestUtil(unittest.TestCase): on = js_to_json('{42:42}') self.assertEqual(json.loads(on), {'42': 42}) - def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) From 5c86bfe70ff0048e59c6e890af14a055522fd3fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 23:35:03 +0600 Subject: [PATCH 227/501] [3qsdn] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/threeqsdn.py | 132 +++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 youtube_dl/extractor/threeqsdn.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b6f4ccc5d..2db3b3c3f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -766,6 +766,7 @@ from .thesixtyone import TheSixtyOneIE from .thestar import TheStarIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE +from .threeqsdn import ThreeQSDNIE from .tinypic import TinyPicIE from .tlc import TlcDeIE from .tmz import ( diff --git a/youtube_dl/extractor/threeqsdn.py b/youtube_dl/extractor/threeqsdn.py new file mode 100644 index 000000000..27a3de5c4 --- /dev/null +++ b/youtube_dl/extractor/threeqsdn.py @@ -0,0 +1,132 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + js_to_json, + mimetype2ext, +) + + +class ThreeQSDNIE(InfoExtractor): + IE_NAME = '3qsdn' + IE_DESC = '3Q SDN' + _VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _TESTS = [{ + # ondemand from http://www.philharmonie.tv/veranstaltung/26/ + 'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http', + 'md5': 'ab040e37bcfa2e0c079f92cb1dd7f6cd', + 'info_dict': { + 'id': '0280d6b9-1215-11e6-b427-0cc47a188158', + 'ext': 'mp4', + 'title': '0280d6b9-1215-11e6-b427-0cc47a188158', + 'is_live': False, + }, + 'expected_warnings': ['Failed to download MPD manifest'], + }, { + # live video stream + 'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true', + 'info_dict': { + 'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f', + 'ext': 'mp4', + 'title': 'd755d94b-4ab9-11e3-9162-0025907ad44f', + 'is_live': False, + }, + }, { + # live audio stream + 'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48', + 'only_matching': True, + }, { + # live audio stream with some 404 URLs + 'url': 'http://playout.3qsdn.com/ac5c3186-777a-11e2-9c30-9acf09e2db48', + 'only_matching': True, + }, { + # geo restricted with 'This content is not available in your country' + 'url': 'http://playout.3qsdn.com/d63a3ffe-75e8-11e2-9c30-9acf09e2db48', + 'only_matching': True, + }, { + # geo restricted with 'playout.3qsdn.com/forbidden' + 'url': 'http://playout.3qsdn.com/8e330f26-6ae2-11e2-a16a-9acf09e2db48', + 'only_matching': True, + }, { + # live video with rtmp link + 'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + js = self._download_webpage( + 'http://playout.3qsdn.com/%s' % video_id, video_id, + query={'js': 'true'}) + + if any(p in js for p in ( + '>This content is not available in your country', + 'playout.3qsdn.com/forbidden')): + self.raise_geo_restricted() + + stream_content = self._search_regex( + r'streamContent\s*:\s*(["\'])(?P<content>.+?)\1', js, + 'stream content', default='demand', group='content') + + live = stream_content == 'live' + + stream_type = self._search_regex( + r'streamType\s*:\s*(["\'])(?P<type>audio|video)\1', js, + 'stream type', default='video', group='type') + + formats = [] + urls = set() + + def extract_formats(item_url, item={}): + if not item_url or item_url in urls: + return + urls.add(item_url) + type_ = item.get('type') + ext = determine_ext(item_url, default_ext=None) + if type_ == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + item_url, video_id, mpd_id='mpd', fatal=False)) + elif type_ in ('application/vnd.apple.mpegURL', 'application/x-mpegurl') or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + item_url, video_id, 'mp4', + entry_protocol='m3u8' if live else 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + item_url, video_id, f4m_id='hds', fatal=False)) + else: + if not self._is_valid_url(item_url, video_id): + return + formats.append({ + 'url': item_url, + 'format_id': item.get('quality'), + 'ext': 'mp4' if item_url.startswith('rtsp') else mimetype2ext(type_) or ext, + 'vcodec': 'none' if stream_type == 'audio' else None, + }) + + for item_js in re.findall(r'({.*?\b(?:src|source)\s*:\s*["\'].+?})', js): + f = self._parse_json( + item_js, video_id, transform_source=js_to_json, fatal=False) + if not f: + continue + extract_formats(f.get('src'), f) + + # More relaxed version to collect additional URLs and acting + # as a future-proof fallback + for _, src in re.findall(r'\b(?:src|source)\s*:\s*(["\'])((?:https?|rtsp)://.+?)\1', js): + extract_formats(src) + + self._sort_formats(formats) + + title = self._live_title(video_id) if live else video_id + + return { + 'id': video_id, + 'title': title, + 'is_live': live, + 'formats': formats, + } From 5d39176f6de8bab1e019ead7cd497659f3fc1a94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 23:40:34 +0600 Subject: [PATCH 228/501] [extractor/generic:3qsdn] Add support for embeds --- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/threeqsdn.py | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0f1eb7fa6..b48ccfc97 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -61,6 +61,7 @@ from .jwplatform import JWPlatformIE from .digiteka import DigitekaIE from .instagram import InstagramIE from .liveleak import LiveLeakIE +from .threeqsdn import ThreeQSDNIE class GenericIE(InfoExtractor): @@ -1983,6 +1984,11 @@ class GenericIE(InfoExtractor): if liveleak_url: return self.url_result(liveleak_url, 'LiveLeak') + # Look for 3Q SDN embeds + threeqsdn_url = ThreeQSDNIE._extract_url(webpage) + if threeqsdn_url: + return self.url_result(self._proto_relative_url(threeqsdn_url), ThreeQSDNIE.ie_key()) + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/threeqsdn.py b/youtube_dl/extractor/threeqsdn.py index 27a3de5c4..c77a07989 100644 --- a/youtube_dl/extractor/threeqsdn.py +++ b/youtube_dl/extractor/threeqsdn.py @@ -56,6 +56,13 @@ class ThreeQSDNIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % ThreeQSDNIE._VALID_URL, webpage) + if mobj: + return mobj.group('url') + def _real_extract(self, url): video_id = self._match_id(url) From cda6d47aad106a825f837c7a583fffc783c4b63b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 23:41:57 +0600 Subject: [PATCH 229/501] [utils] Simplify integer conversion in js_to_json --- youtube_dl/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 25a9f33c0..a637563cb 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1926,14 +1926,14 @@ def js_to_json(code): }.get(m.group(0), m.group(0)), v[1:-1]) INTEGER_TABLE = ( - (r'^(0[xX][0-9a-fA-F]+)', 16), - (r'^(0+[0-7]+)', 8), + (r'^0[xX][0-9a-fA-F]+', 16), + (r'^0+[0-7]+', 8), ) for regex, base in INTEGER_TABLE: im = re.match(regex, v) if im: - i = int(im.group(1), base) + i = int(im.group(0), base) return '"%d":' % i if v.endswith(':') else '%d' % i return '"%s"' % v From 6f41b2bcf16899f8c3f0ea705b2914cf1ae668a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 23:58:25 +0600 Subject: [PATCH 230/501] [extractor/generic] Improve 3qsdn embeds support (Closes #9453) --- youtube_dl/extractor/generic.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b48ccfc97..a6b1e23e3 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1428,7 +1428,8 @@ class GenericIE(InfoExtractor): # Site Name | Video Title # Video Title - Tagline | Site Name # and so on and so forth; it's just not practical - video_title = self._html_search_regex( + video_title = self._og_search_title( + webpage, default=None) or self._html_search_regex( r'(?s)<title>(.*?)', webpage, 'video title', default='video') @@ -1446,6 +1447,9 @@ class GenericIE(InfoExtractor): video_uploader = self._search_regex( r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') + video_description = self._og_search_description(webpage, default=None) + video_thumbnail = self._og_search_thumbnail(webpage, default=None) + # Helper method def _playlist_from_matches(matches, getter=None, ie=None): urlrs = orderedSet( @@ -1987,7 +1991,15 @@ class GenericIE(InfoExtractor): # Look for 3Q SDN embeds threeqsdn_url = ThreeQSDNIE._extract_url(webpage) if threeqsdn_url: - return self.url_result(self._proto_relative_url(threeqsdn_url), ThreeQSDNIE.ie_key()) + return { + '_type': 'url_transparent', + 'ie_key': ThreeQSDNIE.ie_key(), + 'url': self._proto_relative_url(threeqsdn_url), + 'title': video_title, + 'description': video_description, + 'thumbnail': video_thumbnail, + 'uploader': video_uploader, + } def check_video(vurl): if YoutubeIE.suitable(vurl): From ed56f260399728f1975dd30f4c8ee110cf106d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 May 2016 03:34:35 +0600 Subject: [PATCH 231/501] [extractor/common] Improve name extraction for m3u8 formats --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 0843d89af..8a8c07226 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1139,11 +1139,14 @@ class InfoExtractor(object): if m3u8_id: format_id.append(m3u8_id) last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None + # Despite specification does not mention NAME attribute for + # EXT-X-STREAM-INF it still sometimes may be present + stream_name = last_info.get('NAME') or last_media_name # Bandwidth of live streams may differ over time thus making # format_id unpredictable. So it's better to keep provided # format_id intact. if not live: - format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats))) + format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats))) f = { 'format_id': '-'.join(format_id), 'url': format_url(line.strip()), From 69c9cc2716a4d076b023096c23b6f7646627824a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 May 2016 03:38:04 +0600 Subject: [PATCH 232/501] [xvideos] Extract html5 player formats (Closes #9495) --- youtube_dl/extractor/xvideos.py | 43 ++++++++++++++++----------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 710ad5041..1dfe031ca 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -8,7 +8,6 @@ from ..utils import ( clean_html, ExtractorError, determine_ext, - sanitized_Request, ) @@ -25,8 +24,6 @@ class XVideosIE(InfoExtractor): } } - _ANDROID_USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19' - def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -35,31 +32,34 @@ class XVideosIE(InfoExtractor): if mobj: raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) - video_url = compat_urllib_parse_unquote( - self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL')) video_title = self._html_search_regex( r'(.*?)\s+-\s+XVID', webpage, 'title') video_thumbnail = self._search_regex( r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False) - formats = [{ - 'url': video_url, - }] + formats = [] - android_req = sanitized_Request(url) - android_req.add_header('User-Agent', self._ANDROID_USER_AGENT) - android_webpage = self._download_webpage(android_req, video_id, fatal=False) + video_url = compat_urllib_parse_unquote(self._search_regex( + r'flv_url=(.+?)&', webpage, 'video URL', default='')) + if video_url: + formats.append({'url': video_url}) - if android_webpage is not None: - player_params_str = self._search_regex( - 'mobileReplacePlayerDivTwoQual\(([^)]+)\)', - android_webpage, 'player parameters', default='') - player_params = list(map(lambda s: s.strip(' \''), player_params_str.split(','))) - if player_params: - formats.extend([{ - 'url': param, - 'preference': -10, - } for param in player_params if determine_ext(param) == 'mp4']) + player_args = self._search_regex( + r'(?s)new\s+HTML5Player\((.+?)\)', webpage, ' html5 player', default=None) + if player_args: + for arg in player_args.split(','): + format_url = self._search_regex( + r'(["\'])(?P<url>https?://.+?)\1', arg, 'url', + default=None, group='url') + if not format_url: + continue + ext = determine_ext(format_url) + if ext == 'mp4': + formats.append({'url': format_url}) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) self._sort_formats(formats) @@ -67,7 +67,6 @@ class XVideosIE(InfoExtractor): 'id': video_id, 'formats': formats, 'title': video_title, - 'ext': 'flv', 'thumbnail': video_thumbnail, 'age_limit': 18, } From 79298173c5a957456cb17b2b26338a657f1aae1e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 15 May 2016 15:32:54 +0800 Subject: [PATCH 233/501] [utils] Fix getheader in urlhandle_detect_ext Fixes #7049, related to #9440 --- youtube_dl/utils.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a637563cb..24e74428b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2031,11 +2031,7 @@ def mimetype2ext(mt): def urlhandle_detect_ext(url_handle): - try: - url_handle.headers - getheader = lambda h: url_handle.headers[h] - except AttributeError: # Python < 3 - getheader = url_handle.info().getheader + getheader = url_handle.headers.get cd = getheader('Content-Disposition') if cd: From cec9727c7f6a0dad8b10a51f0a6581ac5a1dbe86 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 15 May 2016 15:35:31 +0800 Subject: [PATCH 234/501] [hearthisat] Detect invalid download links (fixes #9440) --- youtube_dl/extractor/hearthisat.py | 38 +++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/hearthisat.py b/youtube_dl/extractor/hearthisat.py index 7d8698655..ac42ef414 100644 --- a/youtube_dl/extractor/hearthisat.py +++ b/youtube_dl/extractor/hearthisat.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( HEADRequest, + KNOWN_EXTENSIONS, sanitized_Request, str_to_int, urlencode_postdata, @@ -17,7 +18,7 @@ from ..utils import ( class HearThisAtIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$' _PLAYLIST_URL = 'https://hearthis.at/playlist.php' - _TEST = { + _TESTS = [{ 'url': 'https://hearthis.at/moofi/dr-kreep', 'md5': 'ab6ec33c8fed6556029337c7885eb4e0', 'info_dict': { @@ -34,7 +35,25 @@ class HearThisAtIE(InfoExtractor): 'duration': 71, 'categories': ['Experimental'], } - } + }, { + # 'download' link redirects to the original webpage + 'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/', + 'md5': '5980ceb7c461605d30f1f039df160c6e', + 'info_dict': { + 'id': '811296', + 'ext': 'mp3', + 'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!', + 'description': 'Listen to DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix! by TwitchSF on hearthis.at - Dance', + 'upload_date': '20160328', + 'timestamp': 1459186146, + 'thumbnail': 're:^https?://.*\.jpg$', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'duration': 4360, + 'categories': ['Dance'], + }, + }] def _real_extract(self, url): m = re.match(self._VALID_URL, url) @@ -90,13 +109,14 @@ class HearThisAtIE(InfoExtractor): ext_handle = self._request_webpage( ext_req, display_id, note='Determining extension') ext = urlhandle_detect_ext(ext_handle) - formats.append({ - 'format_id': 'download', - 'vcodec': 'none', - 'ext': ext, - 'url': download_url, - 'preference': 2, # Usually better quality - }) + if ext in KNOWN_EXTENSIONS: + formats.append({ + 'format_id': 'download', + 'vcodec': 'none', + 'ext': ext, + 'url': download_url, + 'preference': 2, # Usually better quality + }) self._sort_formats(formats) return { From 5572d598a537998615c760ca06bd8d3894150c6a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 15 May 2016 15:44:04 +0800 Subject: [PATCH 235/501] [hearthisat] Update the first test --- youtube_dl/extractor/hearthisat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hearthisat.py b/youtube_dl/extractor/hearthisat.py index ac42ef414..256453882 100644 --- a/youtube_dl/extractor/hearthisat.py +++ b/youtube_dl/extractor/hearthisat.py @@ -27,7 +27,7 @@ class HearThisAtIE(InfoExtractor): 'title': 'Moofi - Dr. Kreep', 'thumbnail': 're:^https?://.*\.jpg$', 'timestamp': 1421564134, - 'description': 'Creepy Patch. Mutable Instruments Braids Vowel + Formant Mode.', + 'description': 'Listen to Dr. Kreep by Moofi on hearthis.at - Modular, Eurorack, Mutable Intruments Braids, Valhalla-DSP', 'upload_date': '20150118', 'comment_count': int, 'view_count': int, From a0a81918f18252805b161e4f7d0dc4924b672948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 15 May 2016 22:07:51 +0600 Subject: [PATCH 236/501] [collegehumor] Remove extractor It now uses brightcove --- youtube_dl/extractor/collegehumor.py | 101 --------------------------- youtube_dl/extractor/extractors.py | 1 - 2 files changed, 102 deletions(-) delete mode 100644 youtube_dl/extractor/collegehumor.py diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py deleted file mode 100644 index 002b24037..000000000 --- a/youtube_dl/extractor/collegehumor.py +++ /dev/null @@ -1,101 +0,0 @@ -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..utils import int_or_none - - -class CollegeHumorIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$' - - _TESTS = [ - { - 'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', - 'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd', - 'info_dict': { - 'id': '6902724', - 'ext': 'mp4', - 'title': 'Comic-Con Cosplay Catastrophe', - 'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.", - 'age_limit': 13, - 'duration': 187, - }, - }, { - 'url': 'http://www.collegehumor.com/video/3505939/font-conference', - 'md5': '72fa701d8ef38664a4dbb9e2ab721816', - 'info_dict': { - 'id': '3505939', - 'ext': 'mp4', - 'title': 'Font Conference', - 'description': "This video wasn't long enough, so we made it double-spaced.", - 'age_limit': 10, - 'duration': 179, - }, - }, { - # embedded youtube video - 'url': 'http://www.collegehumor.com/embed/6950306', - 'info_dict': { - 'id': 'Z-bao9fg6Yc', - 'ext': 'mp4', - 'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!', - 'uploader': 'Mark Dice', - 'uploader_id': 'MarkDice', - 'description': 'md5:62c3dab9351fac7bb44b53b69511d87f', - 'upload_date': '20140127', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Youtube'], - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') - - jsonUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id + '.json' - data = json.loads(self._download_webpage( - jsonUrl, video_id, 'Downloading info JSON')) - vdata = data['video'] - if vdata.get('youtubeId') is not None: - return { - '_type': 'url', - 'url': vdata['youtubeId'], - 'ie_key': 'Youtube', - } - - AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0} - rating = vdata.get('rating') - if rating: - age_limit = AGE_LIMITS.get(rating.lower()) - else: - age_limit = None # None = No idea - - PREFS = {'high_quality': 2, 'low_quality': 0} - formats = [] - for format_key in ('mp4', 'webm'): - for qname, qurl in vdata.get(format_key, {}).items(): - formats.append({ - 'format_id': format_key + '_' + qname, - 'url': qurl, - 'format': format_key, - 'preference': PREFS.get(qname), - }) - self._sort_formats(formats) - - duration = int_or_none(vdata.get('duration'), 1000) - like_count = int_or_none(vdata.get('likes')) - - return { - 'id': video_id, - 'title': vdata['title'], - 'description': vdata.get('description'), - 'thumbnail': vdata.get('thumbnail'), - 'formats': formats, - 'age_limit': age_limit, - 'duration': duration, - 'like_count': like_count, - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2db3b3c3f..ca9d85e33 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -138,7 +138,6 @@ from .cnn import ( CNNBlogsIE, CNNArticleIE, ) -from .collegehumor import CollegeHumorIE from .collegerama import CollegeRamaIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comcarcoff import ComCarCoffIE From f7199423e542580cf8c30991d122673276113497 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 16 May 2016 00:30:13 +0600 Subject: [PATCH 237/501] [groupon] Add support for Youtube embeds (Closes #9508) --- youtube_dl/extractor/groupon.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/groupon.py b/youtube_dl/extractor/groupon.py index f6b69662b..1dd0a81cc 100644 --- a/youtube_dl/extractor/groupon.py +++ b/youtube_dl/extractor/groupon.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class GrouponIE(InfoExtractor): - _VALID_URL = r'https?://www\.groupon\.com/deals/(?P<id>[^?#]+)' + _VALID_URL = r'https?://(?:www\.)?groupon\.com/deals/(?P<id>[^/?#&]+)' _TEST = { 'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf', @@ -15,18 +15,26 @@ class GrouponIE(InfoExtractor): }, 'playlist': [{ 'info_dict': { - 'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf', - 'ext': 'flv', - 'title': 'Bikram Yoga Huntington Beach | Orange County', + 'id': 'fk6OhWpXgIQ', + 'ext': 'mp4', + 'title': 'Bikram Yoga Huntington Beach | Orange County !tubGNycTo@9Uxg82uESj4i61EYX8nyuf', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', - 'duration': 44.961, + 'duration': 45, + 'upload_date': '20160405', + 'uploader_id': 'groupon', + 'uploader': 'Groupon', }, }], 'params': { - 'skip_download': 'HDS', + 'skip_download': True, } } + _PROVIDERS = { + 'ooyala': ('ooyala:%s', 'Ooyala'), + 'youtube': ('%s', 'Youtube'), + } + def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) @@ -36,12 +44,17 @@ class GrouponIE(InfoExtractor): videos = payload['carousel'].get('dealVideos', []) entries = [] for v in videos: - if v.get('provider') != 'OOYALA': + provider = v.get('provider') + video_id = v.get('media') or v.get('id') or v.get('baseURL') + if not provider or not video_id: + continue + url_pattern, ie_key = self._PROVIDERS.get(provider.lower()) + if not url_pattern: self.report_warning( '%s: Unsupported video provider %s, skipping video' % - (playlist_id, v.get('provider'))) + (playlist_id, provider)) continue - entries.append(self.url_result('ooyala:%s' % v['media'])) + entries.append(self.url_result(url_pattern % video_id, ie_key)) return { '_type': 'playlist', From 36755d9d694f818ce8f367ce7eb41374f194893d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 16 May 2016 17:25:47 +0200 Subject: [PATCH 238/501] release 2016.05.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 7 +++---- youtube_dl/version.py | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1fb878b59..7024fc729 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.10 +[debug] youtube-dl version 2016.05.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index de84e5c84..29db13883 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -6,6 +6,7 @@ - **22tracks:genre** - **22tracks:track** - **24video** + - **3qsdn**: 3Q SDN - **3sat** - **4tube** - **56.com** @@ -114,7 +115,6 @@ - **chirbit** - **chirbit:profile** - **Cinchcast** - - **Cinemassacre** - **Clipfish** - **cliphunter** - **ClipRs** @@ -128,7 +128,6 @@ - **CNN** - **CNNArticle** - **CNNBlogs** - - **CollegeHumor** - **CollegeRama** - **ComCarCoff** - **ComedyCentral** @@ -680,7 +679,6 @@ - **tvp.pl:Series** - **TVPlay**: TV3Play and related services - **Tweakers** - - **twitch:bookmarks** - **twitch:chapter** - **twitch:past_broadcasts** - **twitch:profile** @@ -698,7 +696,8 @@ - **USAToday** - **ustream** - **ustream:channel** - - **Ustudio** + - **ustudio** + - **ustudio:embed** - **Varzesh3** - **Vbox7** - **VeeHD** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 45e40c0d1..5a0fdd6ce 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.10' +__version__ = '2016.05.16' From cdd94c2eae6c6f0a627d457c3a73894a62eb86c5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 17 May 2016 14:38:15 +0800 Subject: [PATCH 239/501] [utils] Check for None values in SOCKS proxy Originally reported at https://github.com/rg3/youtube-dl/pull/9287#issuecomment-219617864 --- youtube_dl/utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 24e74428b..ac60ba18c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -883,12 +883,17 @@ def make_socks_conn_class(base_class, socks_proxy): elif url_components.scheme.lower() == 'socks4a': socks_type = ProxyType.SOCKS4A + def unquote_if_non_empty(s): + if not s: + return s + return compat_urllib_parse_unquote_plus(s) + proxy_args = ( socks_type, url_components.hostname, url_components.port or 1080, True, # Remote DNS - compat_urllib_parse_unquote_plus(url_components.username), - compat_urllib_parse_unquote_plus(url_components.password), + unquote_if_non_empty(url_components.username), + unquote_if_non_empty(url_components.password), ) class SocksConnection(base_class): From 055f0d3d0636e343354a19cd558a3aac3cf31399 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 17 May 2016 15:38:57 +0800 Subject: [PATCH 240/501] [abcnews] Added a new extractor (closes #3992) Related: #6108, #8664, #9459 --- youtube_dl/extractor/abcnews.py | 135 +++++++++++++++++++++++++++++ youtube_dl/extractor/amp.py | 4 +- youtube_dl/extractor/extractors.py | 4 + 3 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 youtube_dl/extractor/abcnews.py diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py new file mode 100644 index 000000000..b61a6327c --- /dev/null +++ b/youtube_dl/extractor/abcnews.py @@ -0,0 +1,135 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import calendar +import re +import time + +from .amp import AMPIE +from .common import InfoExtractor +from ..compat import compat_urlparse + + +class AbcNewsVideoIE(AMPIE): + IE_NAME = 'abcnews:video' + _VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' + + _TESTS = [{ + 'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932', + 'info_dict': { + 'id': '20411932', + 'ext': 'mp4', + 'display_id': 'week-exclusive-irans-foreign-minister-zarif', + 'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif', + 'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.', + 'duration': 180, + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + video_id = mobj.group('id') + info_dict = self._extract_feed_info( + 'http://abcnews.go.com/video/itemfeed?id=%s' % video_id) + info_dict.update({ + 'id': video_id, + 'display_id': display_id, + }) + return info_dict + + +class AbcNewsIE(InfoExtractor): + IE_NAME = 'abcnews' + _VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)' + + _TESTS = [{ + 'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY', + 'info_dict': { + 'id': '10498713', + 'ext': 'flv', + 'display_id': 'dramatic-video-rare-death-job-america', + 'title': 'Occupational Hazards', + 'description': 'Nightline investigates the dangers that lurk at various jobs.', + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20100428', + 'timestamp': 1272412800, + }, + 'add_ie': ['AbcNewsVideo'], + }, { + 'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818', + 'info_dict': { + 'id': '39125818', + 'ext': 'mp4', + 'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016', + 'title': 'Justin Timberlake Drops Hints For Secret Single', + 'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.', + 'upload_date': '20160515', + 'timestamp': 1463329500, + }, + 'params': { + # m3u8 download + 'skip_download': True, + # The embedded YouTube video is blocked due to copyright issues + 'playlist_items': '1', + }, + 'add_ie': ['AbcNewsVideo'], + }, { + 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + video_url = self._search_regex( + r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL') + full_video_url = compat_urlparse.urljoin(url, video_url) + + youtube_url = self._html_search_regex( + r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"', + webpage, 'YouTube URL', default=None) + + timestamp = None + date_str = self._html_search_regex( + r'<span[^>]+class="timestamp">([^<]+)</span>', + webpage, 'timestamp', fatal=False) + if date_str: + tz_offset = 0 + if date_str.endswith(' ET'): # Eastern Time + tz_offset = -5 + date_str = date_str[:-3] + date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p'] + for date_format in date_formats: + try: + timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format)) + except ValueError: + continue + if timestamp is not None: + timestamp -= tz_offset * 3600 + + entry = { + '_type': 'url_transparent', + 'ie_key': AbcNewsVideoIE.ie_key(), + 'url': full_video_url, + 'id': video_id, + 'display_id': display_id, + 'timestamp': timestamp, + } + + if youtube_url: + entries = [entry, self.url_result(youtube_url, 'Youtube')] + return self.playlist_result(entries) + + return entry diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py index 138fa0808..8545681be 100644 --- a/youtube_dl/extractor/amp.py +++ b/youtube_dl/extractor/amp.py @@ -52,7 +52,7 @@ class AMPIE(InfoExtractor): for media_data in media_content: media = media_data['@attributes'] media_type = media['type'] - if media_type == 'video/f4m': + if media_type in ('video/f4m', 'application/f4m+xml'): formats.extend(self._extract_f4m_formats( media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id, f4m_id='hds', fatal=False)) @@ -61,7 +61,7 @@ class AMPIE(InfoExtractor): media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)) else: formats.append({ - 'format_id': media_data['media-category']['@attributes']['label'], + 'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'), 'url': media['url'], 'tbr': int_or_none(media.get('bitrate')), 'filesize': int_or_none(media.get('fileSize')), diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ca9d85e33..861701f4c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -3,6 +3,10 @@ from __future__ import unicode_literals from .abc import ABCIE from .abc7news import Abc7NewsIE +from .abcnews import ( + AbcNewsIE, + AbcNewsVideoIE, +) from .academicearth import AcademicEarthCourseIE from .acast import ( ACastIE, From 15cda1ef774e9dbc538765f59dff5b10a492eca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 17 May 2016 23:46:47 +0600 Subject: [PATCH 241/501] [nfb] Fix uploader extraction --- youtube_dl/extractor/nfb.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index 51e4a34f7..234e49047 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -37,8 +37,7 @@ class NFBIE(InfoExtractor): uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"', page, 'director id', fatal=False) - uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>', - page, 'director name', fatal=False) + uploader = self._og_search_property('video:director', page, 'director name') request = sanitized_Request( 'https://www.nfb.ca/film/%s/player_config' % video_id, From 11e6a0b64130f9b4aea1a6115a3ebaad73f2f5e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 18 May 2016 00:25:15 +0600 Subject: [PATCH 242/501] [nfb] Modernize and extract subtitles --- youtube_dl/extractor/nfb.py | 110 +++++++++++++++++++++--------------- 1 file changed, 64 insertions(+), 46 deletions(-) diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index 234e49047..adcc636bc 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -2,8 +2,12 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - sanitized_Request, + clean_html, + determine_ext, + int_or_none, + qualities, urlencode_postdata, + xpath_text, ) @@ -16,12 +20,12 @@ class NFBIE(InfoExtractor): 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', 'info_dict': { 'id': 'qallunaat_why_white_people_are_funny', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Qallunaat! Why White People Are Funny ', - 'description': 'md5:836d8aff55e087d04d9f6df554d4e038', + 'description': 'md5:6b8e32dde3abf91e58857b174916620c', 'duration': 3128, + 'creator': 'Mark Sandiford', 'uploader': 'Mark Sandiford', - 'uploader_id': 'mark-sandiford', }, 'params': { # rtmp download @@ -31,64 +35,78 @@ class NFBIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - page = self._download_webpage( - 'https://www.nfb.ca/film/%s' % video_id, video_id, - 'Downloading film page') - uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"', - page, 'director id', fatal=False) - uploader = self._og_search_property('video:director', page, 'director name') - - request = sanitized_Request( + config = self._download_xml( 'https://www.nfb.ca/film/%s/player_config' % video_id, - urlencode_postdata({'getConfig': 'true'})) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') - request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') + video_id, 'Downloading player config XML', + data=urlencode_postdata({'getConfig': 'true'}), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf' + }) - config = self._download_xml(request, video_id, 'Downloading player config XML') - - title = None - description = None - thumbnail = None - duration = None - formats = [] - - def extract_thumbnail(media): - thumbnails = {} - for asset in media.findall('assets/asset'): - thumbnails[asset.get('quality')] = asset.find('default/url').text - if not thumbnails: - return None - if 'high' in thumbnails: - return thumbnails['high'] - return list(thumbnails.values())[0] + title, description, thumbnail, duration, uploader, author = [None] * 6 + thumbnails, formats = [[]] * 2 + subtitles = {} for media in config.findall('./player/stream/media'): if media.get('type') == 'posterImage': - thumbnail = extract_thumbnail(media) - elif media.get('type') == 'video': - duration = int(media.get('duration')) - title = media.find('title').text - description = media.find('description').text - # It seems assets always go from lower to better quality, so no need to sort + quality_key = qualities(('low', 'high')) + thumbnails = [] for asset in media.findall('assets/asset'): - for x in asset: + asset_url = xpath_text(asset, 'default/url', default=None) + if not asset_url: + continue + quality = asset.get('quality') + thumbnails.append({ + 'url': asset_url, + 'id': quality, + 'preference': quality_key(quality), + }) + elif media.get('type') == 'video': + title = xpath_text(media, 'title', fatal=True) + for asset in media.findall('assets/asset'): + quality = asset.get('quality') + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', quality or '', 'height', default=None)) + for node in asset: + streamer = xpath_text(node, 'streamerURI', default=None) + if not streamer: + continue + play_path = xpath_text(node, 'url', default=None) + if not play_path: + continue formats.append({ - 'url': x.find('streamerURI').text, - 'app': x.find('streamerURI').text.split('/', 3)[3], - 'play_path': x.find('url').text, + 'url': streamer, + 'app': streamer.split('/', 3)[3], + 'play_path': play_path, 'rtmp_live': False, - 'ext': 'mp4', - 'format_id': '%s-%s' % (x.tag, asset.get('quality')), + 'ext': 'flv', + 'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag, + 'height': height, }) + self._sort_formats(formats) + description = clean_html(xpath_text(media, 'description')) + uploader = xpath_text(media, 'author') + duration = int_or_none(media.get('duration')) + for subtitle in media.findall('./subtitles/subtitle'): + subtitle_url = xpath_text(subtitle, 'url', default=None) + if not subtitle_url: + continue + lang = xpath_text(subtitle, 'lang', default='en') + subtitles.setdefault(lang, []).append({ + 'url': subtitle_url, + 'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(), + }) return { 'id': video_id, 'title': title, 'description': description, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'duration': duration, + 'creator': uploader, 'uploader': uploader, - 'uploader_id': uploader_id, 'formats': formats, + 'subtitles': subtitles, } From b78531a36abd765aa9c9df1dba1cf82dc23f8fec Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Wed, 18 May 2016 22:24:46 +0100 Subject: [PATCH 243/501] [formula1] Add new extractor(closes #3617) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/formula1.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 youtube_dl/extractor/formula1.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 861701f4c..efbe970fe 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -242,6 +242,7 @@ from .fktv import FKTVIE from .flickr import FlickrIE from .folketinget import FolketingetIE from .footyroom import FootyRoomIE +from .formula1 import Formula1IE from .fourtube import FourTubeIE from .fox import FOXIE from .foxgay import FoxgayIE diff --git a/youtube_dl/extractor/formula1.py b/youtube_dl/extractor/formula1.py new file mode 100644 index 000000000..726393fcc --- /dev/null +++ b/youtube_dl/extractor/formula1.py @@ -0,0 +1,25 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class Formula1IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html' + _TEST = { + 'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html', + 'md5': '8c79e54be72078b26b89e0e111c0502b', + 'info_dict': { + 'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV', + 'ext': 'flv', + 'title': 'Race highlights - Spain 2016', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + ooyala_embed_code = self._search_regex( + r'data-videoid="([^"]+)"', webpage, 'ooyala embed code') + return self.url_result( + 'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code) From 46bc9b7d7cea2e161670e65abe42ef01d39e8957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 19 May 2016 04:31:30 +0600 Subject: [PATCH 244/501] [utils] Allow None in remove_{start,end} --- test/test_utils.py | 12 ++++++++++++ youtube_dl/utils.py | 8 ++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 520d32ff5..a697232a8 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -50,6 +50,8 @@ from youtube_dl.utils import ( sanitize_path, prepend_extension, replace_extension, + remove_start, + remove_end, remove_quotes, shell_quote, smuggle_url, @@ -215,6 +217,16 @@ class TestUtil(unittest.TestCase): self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') + def test_remove_start(self): + self.assertEqual(remove_start(None, 'A - '), None) + self.assertEqual(remove_start('A - B', 'A - '), 'B') + self.assertEqual(remove_start('B - A', 'A - '), 'B - A') + + def test_remove_end(self): + self.assertEqual(remove_end(None, ' - B'), None) + self.assertEqual(remove_end('A - B', ' - B'), 'A') + self.assertEqual(remove_end('B - A', ' - B'), 'B - A') + def test_remove_quotes(self): self.assertEqual(remove_quotes(None), None) self.assertEqual(remove_quotes('"'), '"') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ac60ba18c..5301d0740 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1549,15 +1549,11 @@ def setproctitle(title): def remove_start(s, start): - if s.startswith(start): - return s[len(start):] - return s + return s[len(start):] if s is not None and s.startswith(start) else s def remove_end(s, end): - if s.endswith(end): - return s[:-len(end)] - return s + return s[:-len(end)] if s is not None and s.endswith(end) else s def remove_quotes(s): From dd81769c62661d168fb87b896ffb8a80dacbe45b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 19 May 2016 04:34:19 +0600 Subject: [PATCH 245/501] [ndtv] Fix extraction --- youtube_dl/extractor/ndtv.py | 40 ++++++++++-------------------------- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/ndtv.py b/youtube_dl/extractor/ndtv.py index 2a1ca80df..96528f649 100644 --- a/youtube_dl/extractor/ndtv.py +++ b/youtube_dl/extractor/ndtv.py @@ -1,19 +1,18 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( - month_by_name, int_or_none, + remove_end, + unified_strdate, ) class NDTVIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P<id>\d+)' _TEST = { - 'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710', + 'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710', 'md5': '39f992dbe5fb531c395d8bbedb1e5e88', 'info_dict': { 'id': '300710', @@ -22,7 +21,7 @@ class NDTVIE(InfoExtractor): 'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02', 'upload_date': '20131208', 'duration': 1327, - 'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg', + 'thumbnail': 're:https?://.*\.jpg', }, } @@ -30,36 +29,19 @@ class NDTVIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + title = remove_end(self._og_search_title(webpage), ' - NDTV') + filename = self._search_regex( r"__filename='([^']+)'", webpage, 'video filename') - video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % - filename) + video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename duration = int_or_none(self._search_regex( r"__duration='([^']+)'", webpage, 'duration', fatal=False)) - date_m = re.search(r'''(?x) - <p\s+class="vod_dateline">\s* - Published\s+On:\s* - (?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+) - ''', webpage) - upload_date = None + upload_date = unified_strdate(self._html_search_meta( + 'publish-date', webpage, 'upload date', fatal=False)) - if date_m is not None: - month = month_by_name(date_m.group('monthname')) - if month is not None: - upload_date = '%s%02d%02d' % ( - date_m.group('year'), month, int(date_m.group('day'))) - - description = self._og_search_description(webpage) - READ_MORE = ' (Read more)' - if description.endswith(READ_MORE): - description = description[:-len(READ_MORE)] - - title = self._og_search_title(webpage) - TITLE_SUFFIX = ' - NDTV' - if title.endswith(TITLE_SUFFIX): - title = title[:-len(TITLE_SUFFIX)] + description = remove_end(self._og_search_description(webpage), ' (Read more)') return { 'id': video_id, From 8585dc4cdc735eb8a077dffb68affa81e1a98693 Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Thu, 19 May 2016 01:18:01 +0200 Subject: [PATCH 246/501] [Makefile] delete thumbnails --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5d7cd5a7e..d760e4576 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From a00129670390c241d097afd873b4ee226ca7d550 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 19 May 2016 18:18:03 +0100 Subject: [PATCH 247/501] [learnr] Add new extractor(closes #4284) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/learnr.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 youtube_dl/extractor/learnr.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index efbe970fe..74aba2d5c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -370,6 +370,7 @@ from .kuwo import ( ) from .la7 import LA7IE from .laola1tv import Laola1TvIE +from .learnr import LearnrIE from .lecture2go import Lecture2GoIE from .lemonde import LemondeIE from .leeco import ( diff --git a/youtube_dl/extractor/learnr.py b/youtube_dl/extractor/learnr.py new file mode 100644 index 000000000..1435e090e --- /dev/null +++ b/youtube_dl/extractor/learnr.py @@ -0,0 +1,33 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class LearnrIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript', + 'md5': '3719fdf0a68397f49899e82c308a89de', + 'info_dict': { + 'id': '51624', + 'ext': 'mp4', + 'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript', + 'description': 'md5:b36dbfa92350176cdf12b4d388485503', + 'uploader': 'LearnCode.academy', + 'uploader_id': 'learncodeacademy', + 'upload_date': '20131021', + }, + 'add_ie': ['Youtube'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + return { + '_type': 'url_transparent', + 'url': self._search_regex( + r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'), + 'id': video_id, + } From f6e588afc0b12ebec2bc65551e882e6d99467499 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 08:53:04 +0600 Subject: [PATCH 248/501] [24video] Fix description extraction --- youtube_dl/extractor/twentyfourvideo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index e03e2dbaa..4025edf02 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -47,7 +47,8 @@ class TwentyFourVideoIE(InfoExtractor): title = self._og_search_title(webpage) description = self._html_search_regex( - r'<span itemprop="description">([^<]+)</span>', webpage, 'description', fatal=False) + r'<(p|span)[^>]+itemprop="description"[^>]*>(?P<description>[^<]+)</\1>', + webpage, 'description', fatal=False, group='description') thumbnail = self._og_search_thumbnail(webpage) duration = int_or_none(self._og_search_property( 'duration', webpage, 'duration', fatal=False)) From 52f7c75cff3d7f7923deda469f9d2a551742c193 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 20 May 2016 06:53:14 +0100 Subject: [PATCH 249/501] [cbc] extract http formats and update tests --- youtube_dl/extractor/cbc.py | 63 +++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 68a0633b6..581928f7d 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import ( + js_to_json, + smuggle_url, +) class CBCIE(InfoExtractor): @@ -12,57 +15,54 @@ class CBCIE(InfoExtractor): _TESTS = [{ # with mediaId 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs', + 'md5': '97e24d09672fc4cf56256d6faa6c25bc', 'info_dict': { 'id': '2682904050', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Don Cherry – All-Stars', 'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.', - 'timestamp': 1454475540, + 'timestamp': 1454463000, 'upload_date': '20160203', - }, - 'params': { - # rtmp download - 'skip_download': True, + 'uploader': 'CBCC-NEW', }, }, { # with clipId 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', 'info_dict': { 'id': '2487345465', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Robin Williams freestyles on 90 Minutes Live', 'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.', - 'upload_date': '19700101', + 'upload_date': '19780210', 'uploader': 'CBCC-NEW', - }, - 'params': { - # rtmp download - 'skip_download': True, + 'timestamp': 255977160, }, }, { # multiple iframes 'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot', 'playlist': [{ + 'md5': '377572d0b49c4ce0c9ad77470e0b96b4', 'info_dict': { 'id': '2680832926', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'An Eagle\'s-Eye View Off Burrard Bridge', 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.', - 'upload_date': '19700101', + 'upload_date': '20160201', + 'timestamp': 1454342820, + 'uploader': 'CBCC-NEW', }, }, { + 'md5': '415a0e3f586113894174dfb31aa5bb1a', 'info_dict': { 'id': '2658915080', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Fly like an eagle!', 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower', - 'upload_date': '19700101', + 'upload_date': '20150315', + 'timestamp': 1426443984, + 'uploader': 'CBCC-NEW', }, }], - 'params': { - # rtmp download - 'skip_download': True, - }, }] @classmethod @@ -95,20 +95,23 @@ class CBCPlayerIE(InfoExtractor): 'url': 'http://www.cbc.ca/player/play/2683190193', 'info_dict': { 'id': '2683190193', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Gerry Runs a Sweat Shop', 'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0', - 'timestamp': 1455067800, + 'timestamp': 1455071400, 'upload_date': '20160210', - }, - 'params': { - # rtmp download - 'skip_download': True, + 'uploader': 'CBCC-NEW', }, } def _real_extract(self, url): video_id = self._match_id(url) - return self.url_result( - 'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id, - 'ThePlatformFeed', video_id) + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url( + 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true' % video_id, { + 'force_smil_url': True + }), + 'id': video_id, + } From 043dc9d36fea85a964bad3ec13f77d32c462115b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 20 May 2016 18:39:54 +0800 Subject: [PATCH 250/501] [cbc] Fix for old-styled URLs The URL http://www.cbc.ca/player/News/ID/2672225049/ (#6342) redirects to http://www.cbc.ca/player/play/2672224672, while youtube-dl wasn't able to handle it correctly. --- youtube_dl/extractor/cbc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 581928f7d..daf237ca8 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -11,7 +11,7 @@ from ..utils import ( class CBCIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)' _TESTS = [{ # with mediaId 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs', From ad96b4c8f56ba9873c62a2ce9916253f9b8a49ee Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 20 May 2016 19:02:53 +0800 Subject: [PATCH 251/501] [common] Extract audio formats in SMIL Found in http://www.cbc.ca/player/play/2657631896 Closes #5156 --- youtube_dl/extractor/common.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8a8c07226..9f22ee930 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1278,21 +1278,21 @@ class InfoExtractor(object): m3u8_count = 0 srcs = [] - videos = smil.findall(self._xpath_ns('.//video', namespace)) - for video in videos: - src = video.get('src') + media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace)) + for medium in media: + src = medium.get('src') if not src or src in srcs: continue srcs.append(src) - bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) - filesize = int_or_none(video.get('size') or video.get('fileSize')) - width = int_or_none(video.get('width')) - height = int_or_none(video.get('height')) - proto = video.get('proto') - ext = video.get('ext') + bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000) + filesize = int_or_none(medium.get('size') or medium.get('fileSize')) + width = int_or_none(medium.get('width')) + height = int_or_none(medium.get('height')) + proto = medium.get('proto') + ext = medium.get('ext') src_ext = determine_ext(src) - streamer = video.get('streamer') or base + streamer = medium.get('streamer') or base if proto == 'rtmp' or streamer.startswith('rtmp'): rtmp_count += 1 From 31a70191e730a2a963c8b2e4d19921cad573ad8a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 20 May 2016 19:04:50 +0800 Subject: [PATCH 252/501] [cbc] Add the test case from #5156 --- youtube_dl/extractor/cbc.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index daf237ca8..22d5e72d5 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -91,7 +91,7 @@ class CBCIE(InfoExtractor): class CBCPlayerIE(InfoExtractor): _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.cbc.ca/player/play/2683190193', 'info_dict': { 'id': '2683190193', @@ -102,7 +102,20 @@ class CBCPlayerIE(InfoExtractor): 'upload_date': '20160210', 'uploader': 'CBCC-NEW', }, - } + }, { + # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ + 'url': 'http://www.cbc.ca/player/play/2657631896', + 'md5': 'e5e708c34ae6fca156aafe17c43e8b75', + 'info_dict': { + 'id': '2657631896', + 'ext': 'mp3', + 'title': 'CBC Montreal is organizing its first ever community hackathon!', + 'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.', + 'timestamp': 1425704400, + 'upload_date': '20150307', + 'uploader': 'CBCC-NEW', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) From f0c96af9cb0edc69f9ba73d39e6e191994e31256 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 20:55:10 +0600 Subject: [PATCH 253/501] [wistia] Add alias and modernize --- youtube_dl/extractor/wistia.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 8b14840a2..478c42833 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -3,16 +3,16 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( ExtractorError, - sanitized_Request, int_or_none, ) class WistiaIE(InfoExtractor): - _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)' - _API_URL = 'http://fast.wistia.com/embed/medias/{0:}.json' + _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P<id>[a-z0-9]+)' + _API_URL = 'http://fast.wistia.com/embed/medias/%s.json' + _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s' - _TEST = { + _TESTS = [{ 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', 'md5': 'cafeb56ec0c53c18c97405eecb3133df', 'info_dict': { @@ -24,17 +24,25 @@ class WistiaIE(InfoExtractor): 'timestamp': 1386185018, 'duration': 117, }, - } + }, { + 'url': 'wistia:sh7fpupwlt', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - request = sanitized_Request(self._API_URL.format(video_id)) - request.add_header('Referer', url) # Some videos require this. - data_json = self._download_json(request, video_id) + data_json = self._download_json( + self._API_URL % video_id, video_id, + # Some videos require this. + headers={ + 'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id, + }) + if data_json.get('error'): - raise ExtractorError('Error while getting the playlist', - expected=True) + raise ExtractorError( + 'Error while getting the playlist', expected=True) + data = data_json['media'] title = data['name'] From 36ca2c55db7939aff2dc700523843a9a0f82ae2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 21:04:01 +0600 Subject: [PATCH 254/501] [wistia] Skip storyboard and improve extraction --- youtube_dl/extractor/wistia.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 478c42833..6eb94fcab 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -49,19 +49,23 @@ class WistiaIE(InfoExtractor): formats = [] thumbnails = [] for a in data['assets']: + aurl = a.get('url') + if not aurl: + continue astatus = a.get('status') atype = a.get('type') - if (astatus is not None and astatus != 2) or atype == 'preview': + if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'): continue elif atype in ('still', 'still_image'): thumbnails.append({ - 'url': a['url'], - 'resolution': '%dx%d' % (a['width'], a['height']), + 'url': aurl, + 'width': int_or_none(a.get('width')), + 'height': int_or_none(a.get('height')), }) else: formats.append({ 'format_id': atype, - 'url': a['url'], + 'url': aurl, 'tbr': int_or_none(a.get('bitrate')), 'vbr': int_or_none(a.get('opt_vbitrate')), 'width': int_or_none(a.get('width')), From 45f160a43c5f103af7a843f1159a1f6e8f498f0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 21:16:08 +0600 Subject: [PATCH 255/501] [wistia] Improve hls support --- youtube_dl/extractor/wistia.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 6eb94fcab..97139a35a 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -27,6 +27,10 @@ class WistiaIE(InfoExtractor): }, { 'url': 'wistia:sh7fpupwlt', 'only_matching': True, + }, { + # with hls video + 'url': 'wistia:807fafadvk', + 'only_matching': True, }] def _real_extract(self, url): @@ -63,6 +67,8 @@ class WistiaIE(InfoExtractor): 'height': int_or_none(a.get('height')), }) else: + aext = a.get('ext') + is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8' formats.append({ 'format_id': atype, 'url': aurl, @@ -73,7 +79,8 @@ class WistiaIE(InfoExtractor): 'filesize': int_or_none(a.get('size')), 'vcodec': a.get('codec'), 'container': a.get('container'), - 'ext': a.get('ext'), + 'ext': 'mp4' if is_m3u8 else aext, + 'protocol': 'm3u8' if is_m3u8 else None, 'preference': 1 if atype == 'original' else None, }) From 64413f7563eb7a89e06ede91fc135de73bc57db4 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 20 May 2016 16:20:05 +0100 Subject: [PATCH 256/501] [cbc] fix extraction for flv only videos(fixes #5309) --- youtube_dl/extractor/cbc.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 22d5e72d5..ff663d079 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -28,6 +28,7 @@ class CBCIE(InfoExtractor): }, { # with clipId 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', + 'md5': '0274a90b51a9b4971fe005c63f592f12', 'info_dict': { 'id': '2487345465', 'ext': 'mp4', @@ -93,6 +94,7 @@ class CBCPlayerIE(InfoExtractor): _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.cbc.ca/player/play/2683190193', + 'md5': '64d25f841ddf4ddb28a235338af32e2c', 'info_dict': { 'id': '2683190193', 'ext': 'mp4', @@ -115,6 +117,19 @@ class CBCPlayerIE(InfoExtractor): 'upload_date': '20150307', 'uploader': 'CBCC-NEW', }, + }, { + # available only when we add `formats=MPEG4,FLV,MP3` to theplatform url + 'url': 'http://www.cbc.ca/player/play/2164402062', + 'md5': '17a61eb813539abea40618d6323a7f82', + 'info_dict': { + 'id': '2164402062', + 'ext': 'flv', + 'title': 'Cancer survivor four times over', + 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.', + 'timestamp': 1320410746, + 'upload_date': '20111104', + 'uploader': 'CBCC-NEW', + }, }] def _real_extract(self, url): @@ -123,7 +138,7 @@ class CBCPlayerIE(InfoExtractor): '_type': 'url_transparent', 'ie_key': 'ThePlatform', 'url': smuggle_url( - 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true' % video_id, { + 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, { 'force_smil_url': True }), 'id': video_id, From aa5957ac49aad5165ce9ab5b9403539d61a09dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 21:33:31 +0600 Subject: [PATCH 257/501] [extractor/generic] Add support for async wistia embeds (Closes #9549) --- youtube_dl/extractor/generic.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a6b1e23e3..632d7b5f0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1563,6 +1563,15 @@ class GenericIE(InfoExtractor): 'id': match.group('id') } + match = re.search( + r'''(?sx) + <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? + <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2 + ''', webpage) + if match: + return self.url_result(self._proto_relative_url( + 'wistia:%s' % match.group('id')), 'Wistia') + # Look for SVT player svt_url = SVTIE._extract_url(webpage) if svt_url: From 7ded6545edb18bb008e8277b42a21d60fb6cd653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 21:43:36 +0600 Subject: [PATCH 258/501] [extractor/generic] Add test for wistia standard embed --- youtube_dl/extractor/generic.py | 16 ++++++++++++++++ youtube_dl/extractor/wistia.py | 3 ++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 632d7b5f0..9883cde61 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -742,6 +742,22 @@ class GenericIE(InfoExtractor): 'timestamp': 1401832161, }, }, + # Wistia standard embed (async) + { + 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/', + 'info_dict': { + 'id': '807fafadvk', + 'ext': 'mp4', + 'title': 'Drip Brennan Dunn Workshop', + 'description': 'a JV Webinars video from getdrip-1', + 'duration': 4986.95, + 'upload_date': '20160518', + 'timestamp': 1463607249, + }, + 'params': { + 'skip_download': True, + } + }, # Soundcloud embed { 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/', diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 97139a35a..c634b8dec 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + float_or_none, ) @@ -92,6 +93,6 @@ class WistiaIE(InfoExtractor): 'description': data.get('seoDescription'), 'formats': formats, 'thumbnails': thumbnails, - 'duration': int_or_none(data.get('duration')), + 'duration': float_or_none(data.get('duration')), 'timestamp': int_or_none(data.get('createdAt')), } From 6c114b12104e8c9d0713d1cb2cd6c4ddc7872b7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 21:55:35 +0600 Subject: [PATCH 259/501] [extractor/generic] Remove generic id and title from wistia extractionand update tests --- youtube_dl/extractor/generic.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9883cde61..c368f08e1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -717,15 +717,18 @@ class GenericIE(InfoExtractor): }, # Wistia embed { - 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', - 'md5': '8788b683c777a5cf25621eaf286d0c23', + 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', + 'md5': '1953f3a698ab51cfc948ed3992a0b7ff', 'info_dict': { - 'id': '1cfaf6b7ea', + 'id': '6e2wtrbdaf', 'ext': 'mov', - 'title': 'md5:51364a8d3d009997ba99656004b5e20d', - 'duration': 643.0, - 'filesize': 182808282, - 'uploader': 'education-portal.com', + 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england', + 'description': 'a Paywall Videos video from Remilon', + 'duration': 644.072, + 'uploader': 'study.com', + 'timestamp': 1459678540, + 'upload_date': '20160403', + 'filesize': 24687186, }, }, { @@ -734,12 +737,12 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'uxjb0lwrcz', 'ext': 'mp4', - 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks', + 'title': 'Conversation about Hexagonal Rails Part 1', 'description': 'a Martin Fowler video from ThoughtWorks', 'duration': 1715.0, 'uploader': 'thoughtworks.wistia.com', - 'upload_date': '20140603', 'timestamp': 1401832161, + 'upload_date': '20140603', }, }, # Wistia standard embed (async) @@ -751,8 +754,8 @@ class GenericIE(InfoExtractor): 'title': 'Drip Brennan Dunn Workshop', 'description': 'a JV Webinars video from getdrip-1', 'duration': 4986.95, - 'upload_date': '20160518', 'timestamp': 1463607249, + 'upload_date': '20160518', }, 'params': { 'skip_download': True, @@ -1564,19 +1567,15 @@ class GenericIE(InfoExtractor): 'url': embed_url, 'ie_key': 'Wistia', 'uploader': video_uploader, - 'title': video_title, - 'id': video_id, } match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage) if match: return { '_type': 'url_transparent', - 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')), + 'url': 'wistia:%s' % match.group('id'), 'ie_key': 'Wistia', 'uploader': video_uploader, - 'title': video_title, - 'id': match.group('id') } match = re.search( From 6756602be6b59c7bff57ccaeb33844cdc5636910 Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Thu, 19 May 2016 03:42:09 +0200 Subject: [PATCH 260/501] [LocalNews8] add extractor (Closes #9200) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/localnews8.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 youtube_dl/extractor/localnews8.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 74aba2d5c..5b96a086d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -396,6 +396,7 @@ from .livestream import ( LivestreamShortenerIE, ) from .lnkgo import LnkGoIE +from .localnews8 import LocalNews8IE from .lovehomeporn import LoveHomePornIE from .lrt import LRTIE from .lynda import ( diff --git a/youtube_dl/extractor/localnews8.py b/youtube_dl/extractor/localnews8.py new file mode 100644 index 000000000..b38d1d58a --- /dev/null +++ b/youtube_dl/extractor/localnews8.py @@ -0,0 +1,29 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class LocalNews8IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?localnews8\.com/.+?/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304', + 'md5': '477bdb188f177788c65db27ecb56649b', + 'info_dict': { + 'id': '35183304', + 'ext': 'mp4', + 'title': 'Rexburg business turns carbon fiber scraps into wedding ring', + 'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.', + 'duration': '153', + 'timestamp': '1441844822', + 'uploader_id': 'api', + }} + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + partner_id = self._search_regex(r'partnerId\s*:\s*"(\d+)"', webpage, video_id) + kaltura_id = self._search_regex(r'var\s+videoIdString\s*=\s*"kaltura:(.+)";', webpage, video_id) + + return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') From 1846e9ade0fb9508459282a992539c700aa26f9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 22:31:08 +0600 Subject: [PATCH 261/501] [localnews8] Fix extractor (Closes #9539) --- youtube_dl/extractor/localnews8.py | 38 ++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/localnews8.py b/youtube_dl/extractor/localnews8.py index b38d1d58a..aad396135 100644 --- a/youtube_dl/extractor/localnews8.py +++ b/youtube_dl/extractor/localnews8.py @@ -1,29 +1,47 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor class LocalNews8IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?localnews8\.com/.+?/(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P<display_id>[^/]+)/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304', - 'md5': '477bdb188f177788c65db27ecb56649b', + 'md5': 'be4d48aea61aa2bde7be2ee47691ad20', 'info_dict': { 'id': '35183304', + 'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings', 'ext': 'mp4', 'title': 'Rexburg business turns carbon fiber scraps into wedding ring', 'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.', - 'duration': '153', - 'timestamp': '1441844822', + 'duration': 153, + 'timestamp': 1441844822, + 'upload_date': '20150910', 'uploader_id': 'api', - }} + } + } def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') - partner_id = self._search_regex(r'partnerId\s*:\s*"(\d+)"', webpage, video_id) - kaltura_id = self._search_regex(r'var\s+videoIdString\s*=\s*"kaltura:(.+)";', webpage, video_id) + webpage = self._download_webpage(url, display_id) - return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') + partner_id = self._search_regex( + r'partnerId\s*[:=]\s*(["\'])(?P<id>\d+)\1', + webpage, 'partner id', group='id') + kaltura_id = self._search_regex( + r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P<id>[0-9a-z_]+)\1', + webpage, 'videl id', group='id') + + return { + '_type': 'url_transparent', + 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id), + 'ie_key': 'Kaltura', + 'id': video_id, + 'display_id': display_id, + } From b219f5e51be520b2e23acd1ec08735fc733f9619 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 21 May 2016 00:59:06 +0600 Subject: [PATCH 262/501] [brightcove:new] Improve error reporting --- youtube_dl/extractor/brightcove.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index fc7fc5b16..ef560b592 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -444,6 +444,10 @@ class BrightcoveNewIE(InfoExtractor): # non numeric ref: prefixed video id 'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356', 'only_matching': True, + }, { + # unavailable video without message but with error_code + 'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001', + 'only_matching': True, }] @staticmethod @@ -514,8 +518,9 @@ class BrightcoveNewIE(InfoExtractor): }) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - json_data = self._parse_json(e.cause.read().decode(), video_id) - raise ExtractorError(json_data[0]['message'], expected=True) + json_data = self._parse_json(e.cause.read().decode(), video_id)[0] + raise ExtractorError( + json_data.get('message') or json_data['error_code'], expected=True) raise title = json_data['name'].strip() From c8602b2f9bcdda00398b2c54db4c1be85b75ce39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 21 May 2016 05:09:16 +0600 Subject: [PATCH 263/501] [nrk] Unquote subtitles' URLs --- youtube_dl/extractor/nrk.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 7532f40c1..486e086bb 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -55,7 +55,9 @@ class NRKBaseIE(InfoExtractor): for subtitle in ('webVtt', 'timedText'): subtitle_url = asset.get('%sSubtitlesUrl' % subtitle) if subtitle_url: - subtitles.setdefault('no', []).append({'url': subtitle_url}) + subtitles.setdefault('no', []).append({ + 'url': compat_urllib_parse_unquote(subtitle_url) + }) entries.append({ 'id': asset.get('carrierId') or entry_id, 'title': entry_title, From 16da9bbc29b76b6e6e1a6134a17e9f25d91296c8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 May 2016 13:15:28 +0800 Subject: [PATCH 264/501] [common] Add _m3u8_meta_format() template For extractors who handle m3u8 manifests by themselves. (eg., AnvatoIE) Part of #9522 --- youtube_dl/extractor/common.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9f22ee930..17e866f91 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1058,12 +1058,8 @@ class InfoExtractor(object): }) return formats - def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, - entry_protocol='m3u8', preference=None, - m3u8_id=None, note=None, errnote=None, - fatal=True, live=False): - - formats = [{ + def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None): + return { 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])), 'url': m3u8_url, 'ext': ext, @@ -1071,7 +1067,14 @@ class InfoExtractor(object): 'preference': preference - 1 if preference else -1, 'resolution': 'multiple', 'format_note': 'Quality selection URL', - }] + } + + def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, + entry_protocol='m3u8', preference=None, + m3u8_id=None, note=None, errnote=None, + fatal=True, live=False): + + formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] format_url = lambda u: ( u From 7b2fcbfd4ea34e6d29484f5987a36665117aefaa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 May 2016 13:16:28 +0800 Subject: [PATCH 265/501] [common] Skip TYPE=CLOSED-CAPTIONS lines in m3u8 manifests According to [1], valid values for TYPE are AUDIO, VIDEO, SUBTITLES and CLOSED-CAPTIONS. Such a value is found in Anvato master playlists, though I don't use _extract_m3u8_formats() in the end. Part of #9522. [1] https://tools.ietf.org/html/draft-pantos-http-live-streaming-19#section-4.3.4.1 --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 17e866f91..4bfa610c1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1141,7 +1141,7 @@ class InfoExtractor(object): format_id = [] if m3u8_id: format_id.append(m3u8_id) - last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None + last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None # Despite specification does not mention NAME attribute for # EXT-X-STREAM-INF it still sometimes may be present stream_name = last_info.get('NAME') or last_media_name From 9f54e692d2de2d52f147f2d714d0312dbe21a5ed Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 May 2016 13:18:29 +0800 Subject: [PATCH 266/501] [anvato] Add new extractor Used in CBSLocal (#9522) --- youtube_dl/extractor/anvato.py | 224 +++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 youtube_dl/extractor/anvato.py diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py new file mode 100644 index 000000000..cb29cf111 --- /dev/null +++ b/youtube_dl/extractor/anvato.py @@ -0,0 +1,224 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import hashlib +import json +import random +import time + +from .common import InfoExtractor +from ..aes import aes_encrypt +from ..compat import compat_str +from ..utils import ( + bytes_to_intlist, + determine_ext, + intlist_to_bytes, + int_or_none, + strip_jsonp, +) + + +def md5_text(s): + if not isinstance(s, compat_str): + s = compat_str(s) + return hashlib.md5(s.encode('utf-8')).hexdigest() + + +class AnvatoIE(InfoExtractor): + # Copied from anvplayer.min.js + _ANVACK_TABLE = { + 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ', + 'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA', + 'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP', + 'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv', + 'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7', + 'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR', + 'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg', + 'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto', + 'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY', + 'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh', + 'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK', + 'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D', + 'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad', + 'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp', + 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih', + 'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR', + 'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW', + 'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su', + 'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q', + 'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5', + 'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3', + 'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI', + 'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s', + 'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz', + 'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg', + 'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x', + 'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH', + 'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX', + 'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc', + 'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK', + 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7', + 'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C', + 'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e', + 'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1', + 'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re', + 'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51', + 'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho', + 'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9', + 'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH', + 'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F', + 'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo', + 'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR', + 'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa', + 'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk', + 'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ', + 'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ', + 'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m', + 'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b', + 'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3', + 'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK', + 'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua', + 'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua', + 'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F', + 'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx', + 'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ', + 'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH', + 'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm', + 'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt', + 'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl', + 'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b', + 'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV', + 'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg', + 'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk', + 'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT', + 'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa', + 'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv', + 'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k', + 'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI', + 'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr', + 'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw', + 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K', + 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH', + 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK', + 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu', + 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg', + 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg', + 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK', + 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n', + 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD', + 'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk', + 'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn', + 'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W', + 'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ', + 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ' + } + + _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' + + def __init__(self, *args, **kwargs): + super(AnvatoIE, self).__init__(*args, **kwargs) + self.__server_time = None + + def _server_time(self, access_key, video_id): + if self.__server_time is not None: + return self.__server_time + + self.__server_time = int(self._download_json( + self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id, + note='Fetching server time')['server_time']) + + return self.__server_time + + def _api_prefix(self, access_key): + return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage') + + def _get_video_json(self, access_key, video_id): + # See et() in anvplayer.min.js, which is an alias of getVideoJSON() + video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key) + server_time = self._server_time(access_key, video_id) + input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time)) + + auth_secret = intlist_to_bytes(aes_encrypt( + bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY))) + + video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii') + anvrid = md5_text(time.time() * 1000 * random.random())[:30] + payload = { + 'api': { + 'anvrid': anvrid, + 'anvstk': md5_text('%s|%s|%d|%s' % ( + access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])), + 'anvts': server_time, + }, + } + + return self._download_json( + video_data_url, video_id, transform_source=strip_jsonp, + data=json.dumps(payload).encode('utf-8')) + + def _extract_anvato_videos(self, webpage, video_id): + anvplayer_data = self._parse_json(self._html_search_regex( + r'<script[^>]+data-anvp=\'([^\']+)\'', webpage, + 'Anvato player data'), video_id) + + video_id = anvplayer_data['video'] + access_key = anvplayer_data['accessKey'] + + video_data = self._get_video_json(access_key, video_id) + + formats = [] + for published_url in video_data['published_urls']: + video_url = published_url['embed_url'] + ext = determine_ext(video_url) + + if ext == 'smil': + formats.extend(self._extract_smil_formats(video_url, video_id)) + continue + + tbr = int_or_none(published_url.get('kbps')) + a_format = { + 'url': video_url, + 'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(), + 'tbr': tbr if tbr != 0 else None, + } + + if ext == 'm3u8': + # Not using _extract_m3u8_formats here as individual media + # playlists are also included in published_urls. + if tbr is None: + formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls')) + continue + else: + a_format.update({ + 'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])), + 'ext': 'mp4', + }) + elif ext == 'mp3': + a_format['vcodec'] = 'none' + else: + a_format.update({ + 'width': int_or_none(published_url.get('width')), + 'height': int_or_none(published_url.get('height')), + }) + formats.append(a_format) + + self._sort_formats(formats) + + subtitles = {} + for caption in video_data.get('captions', []): + a_caption = { + 'url': caption['url'], + 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None + } + subtitles.setdefault(caption['language'], []).append(a_caption) + + return { + 'id': video_id, + 'formats': formats, + 'title': video_data.get('def_title'), + 'description': video_data.get('def_description'), + 'categories': video_data.get('categories'), + 'thumbnail': video_data.get('thumbnail'), + 'subtitles': subtitles, + } From 612b5f403e33d5c164b5c0bbad9f01ef6d38d050 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 May 2016 13:38:01 +0800 Subject: [PATCH 267/501] [jwplatform] Improved m3u8 and rtmp support Changes made for SendtoNewsIE. Part of #9522 --- youtube_dl/extractor/jwplatform.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 8a5e562db..0aa6fc750 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -5,33 +5,47 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, float_or_none, int_or_none, ) class JWPlatformBaseIE(InfoExtractor): - def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True): + def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None): video_data = jwplayer_data['playlist'][0] formats = [] for source in video_data['sources']: source_url = self._proto_relative_url(source['file']) source_type = source.get('type') or '' - if source_type in ('application/vnd.apple.mpegurl', 'hls'): + if source_type in ('application/vnd.apple.mpegurl', 'hls') or determine_ext(source_url) == 'm3u8': formats.extend(self._extract_m3u8_formats( - source_url, video_id, 'mp4', 'm3u8_native', fatal=False)) + source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) elif source_type.startswith('audio'): formats.append({ 'url': source_url, 'vcodec': 'none', }) else: - formats.append({ + a_format = { 'url': source_url, 'width': int_or_none(source.get('width')), 'height': int_or_none(source.get('height')), - }) + } + if source_url.startswith('rtmp'): + # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as + # of jwplayer.flash.swf + rtmp_url, prefix, play_path = re.split( + r'((?:mp4|mp3|flv):)', source_url, 1) + a_format.update({ + 'url': rtmp_url, + 'ext': 'flv', + 'play_path': prefix + play_path, + }) + if rtmp_params: + a_format.update(rtmp_params) + formats.append(a_format) self._sort_formats(formats) subtitles = {} From 5ce3d5bd1b0933a26a4224643cf8d3ad14330e17 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 May 2016 13:39:42 +0800 Subject: [PATCH 268/501] [sendtonews] Add new extractor Used in CBSLocal. Part of #9522 --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/sendtonews.py | 86 ++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 youtube_dl/extractor/sendtonews.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5b96a086d..8352b3c3a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -670,6 +670,7 @@ from .screencastomatic import ScreencastOMaticIE from .screenjunkies import ScreenJunkiesIE from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE from .senateisvp import SenateISVPIE +from .sendtonews import SendtoNewsIE from .servingsys import ServingSysIE from .sexu import SexuIE from .shahid import ShahidIE diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py new file mode 100644 index 000000000..1c636f672 --- /dev/null +++ b/youtube_dl/extractor/sendtonews.py @@ -0,0 +1,86 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .jwplatform import JWPlatformBaseIE +from ..compat import compat_parse_qs +from ..utils import ( + ExtractorError, + parse_duration, +) + + +class SendtoNewsIE(JWPlatformBaseIE): + _VALID_URL = r'https?://embed\.sendtonews\.com/player/embed\.php\?(?P<query>[^#]+)' + + _TEST = { + # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/ + 'url': 'http://embed.sendtonews.com/player/embed.php?SK=GxfCe0Zo7D&MK=175909&PK=5588&autoplay=on&sound=yes', + 'info_dict': { + 'id': 'GxfCe0Zo7D-175909-5588', + 'ext': 'mp4', + 'title': 'Recap: CLE 15, CIN 6', + 'description': '5/16/16: Indians\' bats explode for 15 runs in a win', + 'duration': 49, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + _URL_TEMPLATE = '//embed.sendtonews.com/player/embed.php?SK=%s&MK=%s&PK=%s' + + @classmethod + def _extract_url(cls, webpage): + mobj = re.search(r'''(?x)<script[^>]+src=([\'"]) + (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\? + .*\bSC=(?P<SC>[0-9a-zA-Z-]+).* + \1>''', webpage) + if mobj: + sk, mk, pk = mobj.group('SC').split('-') + return cls._URL_TEMPLATE % (sk, mk, pk) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + params = compat_parse_qs(mobj.group('query')) + + if 'SK' not in params or 'MK' not in params or 'PK' not in params: + raise ExtractorError('Invalid URL', expected=True) + + video_id = '-'.join([params['SK'][0], params['MK'][0], params['PK'][0]]) + + webpage = self._download_webpage(url, video_id) + + jwplayer_data_str = self._search_regex( + r'jwplayer\("[^"]+"\)\.setup\((.+?)\);', webpage, 'JWPlayer data') + js_vars = { + 'w': 1024, + 'h': 768, + 'modeVar': 'html5', + } + for name, val in js_vars.items(): + js_val = '%d' % val if isinstance(val, int) else '"%s"' % val + jwplayer_data_str = jwplayer_data_str.replace(':%s,' % name, ':%s,' % js_val) + + info_dict = self._parse_jwplayer_data( + self._parse_json(jwplayer_data_str, video_id), + video_id, require_title=False, rtmp_params={'no_resume': True}) + + title = self._html_search_regex( + r'<div[^>]+class="embedTitle">([^<]+)</div>', webpage, 'title') + description = self._html_search_regex( + r'<div[^>]+class="embedSubTitle">([^<]+)</div>', webpage, + 'description', fatal=False) + duration = parse_duration(self._html_search_regex( + r'<div[^>]+class="embedDetails">([0-9:]+)', webpage, + 'duration', fatal=False)) + + info_dict.update({ + 'title': title, + 'description': description, + 'duration': duration, + }) + + return info_dict From 661d46b28f6de2772fc642c36b505a3c7b9a3b10 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 May 2016 13:40:45 +0800 Subject: [PATCH 269/501] [cbslocal] Add new extractor (closes #9522) --- youtube_dl/extractor/cbslocal.py | 84 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 85 insertions(+) create mode 100644 youtube_dl/extractor/cbslocal.py diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py new file mode 100644 index 000000000..74adb38a6 --- /dev/null +++ b/youtube_dl/extractor/cbslocal.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import calendar +import datetime + +from .anvato import AnvatoIE +from .sendtonews import SendtoNewsIE +from ..compat import compat_urlparse + + +class CBSLocalIE(AnvatoIE): + _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)' + + _TESTS = [{ + # Anvato backend + 'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis', + 'md5': 'f0ee3081e3843f575fccef901199b212', + 'info_dict': { + 'id': '3401037', + 'ext': 'mp4', + 'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'', + 'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.', + 'thumbnail': 're:^https?://.*', + 'timestamp': 1463440500, + 'upload_date': '20160516', + 'subtitles': { + 'en': 'mincount:5', + }, + 'categories': [ + 'Stations\\Spoken Word\\KCBSTV', + 'Syndication\\MSN', + 'Syndication\\NDN', + 'Syndication\\AOL', + 'Syndication\\Yahoo', + 'Syndication\\Tribune', + 'Syndication\\Curb.tv', + 'Content\\News' + ], + }, + }, { + # SendtoNews embed + 'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/', + 'info_dict': { + 'id': 'GxfCe0Zo7D-175909-5588', + 'ext': 'mp4', + 'title': 'Recap: CLE 15, CIN 6', + 'description': '5/16/16: Indians\' bats explode for 15 runs in a win', + 'upload_date': '20160516', + 'timestamp': 1463433840, + 'duration': 49, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + sendtonews_url = SendtoNewsIE._extract_url(webpage) + if sendtonews_url: + info_dict = { + '_type': 'url_transparent', + 'url': compat_urlparse.urljoin(url, sendtonews_url), + } + else: + info_dict = self._extract_anvato_videos(webpage, display_id) + + time_str = self._html_search_regex( + r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) + timestamp = None + if time_str: + timestamp = calendar.timegm(datetime.datetime.strptime( + time_str, '%b %d, %Y %I:%M %p').timetuple()) + + info_dict.update({ + 'display_id': display_id, + 'timestamp': timestamp, + }) + + return info_dict diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8352b3c3a..c93cd2765 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -111,6 +111,7 @@ from .cbc import ( CBCPlayerIE, ) from .cbs import CBSIE +from .cbslocal import CBSLocalIE from .cbsinteractive import CBSInteractiveIE from .cbsnews import ( CBSNewsIE, From 115c65793af4c56c8f1986d2640105fc7e760c13 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 May 2016 13:50:38 +0800 Subject: [PATCH 270/501] [jwplatform] Don't fail with RTMP URLs without mp4:, mp3: or flv: --- youtube_dl/extractor/jwplatform.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 0aa6fc750..fa6f335e1 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -34,15 +34,18 @@ class JWPlatformBaseIE(InfoExtractor): 'height': int_or_none(source.get('height')), } if source_url.startswith('rtmp'): + a_format['ext'] = 'flv', + # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as # of jwplayer.flash.swf - rtmp_url, prefix, play_path = re.split( + rtmp_url_parts = re.split( r'((?:mp4|mp3|flv):)', source_url, 1) - a_format.update({ - 'url': rtmp_url, - 'ext': 'flv', - 'play_path': prefix + play_path, - }) + if len(rtmp_url_parts) == 3: + rtmp_url, prefix, play_path = rtmp_url_parts + a_format.update({ + 'url': rtmp_url, + 'play_path': prefix + play_path, + }) if rtmp_params: a_format.update(rtmp_params) formats.append(a_format) From 4c718d3c50b8d80bf07e44c73a5bdcd98544388f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 21 May 2016 17:37:35 +0200 Subject: [PATCH 271/501] [rtve] Recognize 'filmoteca' URLs --- youtube_dl/extractor/rtve.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index edd0d108e..f11e3588b 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -64,7 +64,7 @@ def _decrypt_url(png): class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' IE_DESC = 'RTVE a la carta' - _VALID_URL = r'https?://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://www\.rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', @@ -87,6 +87,9 @@ class RTVEALaCartaIE(InfoExtractor): }, { 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', 'only_matching': True, + }, { + 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', + 'only_matching': True, }] def _real_initialize(self): From c8cc3745fbb34d39f4dfb0c3facb6fa9278af93c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 21 May 2016 21:18:59 +0200 Subject: [PATCH 272/501] release 2016.05.21 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 7 +++++++ youtube_dl/version.py | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7024fc729..00cc634e3 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.16 +[debug] youtube-dl version 2016.05.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 29db13883..cd6bfa51c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -16,6 +16,8 @@ - **9gag** - **abc.net.au** - **Abc7News** + - **abcnews** + - **abcnews:video** - **AcademicEarth:Course** - **acast** - **acast:channel** @@ -104,6 +106,7 @@ - **CBCPlayer** - **CBS** - **CBSInteractive** + - **CBSLocal** - **CBSNews**: CBS News - **CBSNewsLiveVideo**: CBS News Live Videos - **CBSSports** @@ -213,6 +216,7 @@ - **Flickr** - **Folketinget**: Folketinget (ft.dk; Danish parliament) - **FootyRoom** + - **Formula1** - **FOX** - **Foxgay** - **FoxNews**: Fox News and Fox Business Video @@ -316,6 +320,7 @@ - **la7.tv** - **Laola1Tv** - **Le**: 乐视网 + - **Learnr** - **Lecture2Go** - **Lemonde** - **LePlaylist** @@ -331,6 +336,7 @@ - **livestream** - **livestream:original** - **LnkGo** + - **LocalNews8** - **LoveHomePorn** - **lrt.lt** - **lynda**: lynda.com videos @@ -556,6 +562,7 @@ - **ScreenJunkies** - **ScreenwaveMedia** - **SenateISVP** + - **SendtoNews** - **ServingSys** - **Sexu** - **Shahid** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5a0fdd6ce..4bdb5f352 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.16' +__version__ = '2016.05.21' From 7e642e4fd68f9418ecdcb852aa34a4e49c41e58b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 21 May 2016 21:24:53 +0200 Subject: [PATCH 273/501] release: check for pandoc Abort releaseing if pandoc is missing. (pandoc was not included in my essential app database, and thus missing on my new machine.) --- devscripts/release.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/devscripts/release.sh b/devscripts/release.sh index 8dea55dbb..8b37152a7 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -33,6 +33,7 @@ if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: th useless_files=$(find youtube_dl -type f -not -name '*.py') if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi +if ! type pandoc 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi /bin/echo -e "\n### First of all, testing..." make clean From 598869afb139707c7064a6c8397bbcf09b2b43f5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 21 May 2016 21:27:00 +0200 Subject: [PATCH 274/501] release 2016.05.21.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 00cc634e3..7f8650553 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.21 +[debug] youtube-dl version 2016.05.21.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4bdb5f352..0a2e43d05 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.21' +__version__ = '2016.05.21.1' From d2fee3c99e9d1c8eba5bd55aa3a9dd5702b23b34 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 21 May 2016 21:46:42 +0200 Subject: [PATCH 275/501] release.sh: also check for python3 rsa module --- devscripts/release.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 8b37152a7..7dd391b38 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -33,7 +33,8 @@ if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: th useless_files=$(find youtube_dl -type f -not -name '*.py') if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi -if ! type pandoc 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi +if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi +if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi /bin/echo -e "\n### First of all, testing..." make clean From e03b35b8f998692aa853c6dbd498655fc831f9e7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 21 May 2016 21:47:39 +0200 Subject: [PATCH 276/501] release 2016.05.21.2 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7f8650553..2d80d45b6 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.2** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.21.1 +[debug] youtube-dl version 2016.05.21.2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0a2e43d05..522a56669 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.21.1' +__version__ = '2016.05.21.2' From 0db9a05f88cbbe6709da3875b798634dc536536b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 May 2016 02:15:56 +0600 Subject: [PATCH 277/501] [periscope:user] Adapt to layout changes (Closes #9563) --- youtube_dl/extractor/periscope.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 0a4bc761d..b2008decc 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import parse_iso8601 +from ..utils import ( + parse_iso8601, + unescapeHTML, +) class PeriscopeIE(InfoExtractor): @@ -92,6 +95,7 @@ class PeriscopeUserIE(InfoExtractor): 'info_dict': { 'id': 'LularoeHusbandMike', 'title': 'LULAROE HUSBAND MIKE', + 'description': 'md5:6cf4ec8047768098da58e446e82c82f0', }, # Periscope only shows videos in the last 24 hours, so it's possible to # get 0 videos @@ -103,16 +107,19 @@ class PeriscopeUserIE(InfoExtractor): webpage = self._download_webpage(url, user_id) - broadcast_data = self._parse_json(self._html_search_meta( - 'broadcast-data', webpage, default='{}'), user_id) - username = broadcast_data.get('user', {}).get('display_name') - user_broadcasts = self._parse_json( - self._html_search_meta('user-broadcasts', webpage, default='{}'), + data_store = self._parse_json( + unescapeHTML(self._search_regex( + r'data-store=(["\'])(?P<data>.+?)\1', + webpage, 'data store', default='{}', group='data')), user_id) + user = data_store.get('User', {}).get('user', {}) + title = user.get('display_name') or user.get('username') + description = user.get('description') + entries = [ self.url_result( 'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id'])) - for broadcast in user_broadcasts.get('broadcasts', [])] + for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])] - return self.playlist_result(entries, user_id, username) + return self.playlist_result(entries, user_id, title, description) From 92d221ad4858a62143ce5645c56261b26023308e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 May 2016 02:39:15 +0600 Subject: [PATCH 278/501] [periscope] Update uploader_id (Closes #9565) --- youtube_dl/extractor/periscope.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index b2008decc..c23b314e7 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -45,8 +45,11 @@ class PeriscopeIE(InfoExtractor): broadcast = broadcast_data['broadcast'] status = broadcast['status'] - uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name') - uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id') + user = broadcast_data.get('user', {}) + + uploader = broadcast.get('user_display_name') or user.get('display_name') + uploader_id = (broadcast.get('username') or user.get('username') or + broadcast.get('user_id') or user.get('id')) title = '%s - %s' % (uploader, status) if uploader else status state = broadcast.get('state').lower() From c8831015f41879e0d8788c228acf52579e6cf12b Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Sat, 21 May 2016 18:51:34 +0200 Subject: [PATCH 279/501] [ComedyCentral] Add support for tosh.cc.com and cc.com/video-clips --- youtube_dl/extractor/comedycentral.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 0c59102e0..830073834 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -44,10 +44,10 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): # or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524 _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow) |https?://(:www\.)? - (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/ + (?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/ ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)| (?P<clip> - (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+)) + (?:(?:guests/[^/]+|videos|video-clips|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+)) |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) )| From 0150a00f333371b366ff10871458e0b071f20ee3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 May 2016 02:58:41 +0600 Subject: [PATCH 280/501] [cc] Add test for tosh.cc (Closes #9566) --- youtube_dl/extractor/comedycentral.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 830073834..2b6aaa3aa 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -47,7 +47,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): (?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/ ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)| (?P<clip> - (?:(?:guests/[^/]+|videos|video-clips|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+)) + (?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+)) |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) )| @@ -129,6 +129,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): }, { 'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel', 'only_matching': True, + }, { + 'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans', + 'only_matching': True, }] _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] From 898f4b49ccc828f86a075d656aa9a1e1428e538c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 May 2016 06:47:22 +0600 Subject: [PATCH 281/501] [theplatform] Add _extract_urls --- youtube_dl/extractor/theplatform.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index a25417f94..02dbef913 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -151,6 +151,22 @@ class ThePlatformIE(ThePlatformBaseIE): 'only_matching': True, }] + @classmethod + def _extract_urls(cls, webpage): + m = re.search( + r'''(?x) + <meta\s+ + property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+ + content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2 + ''', webpage) + if m: + return [m.group('url')] + + matches = re.findall( + r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) + if matches: + return list(zip(*matches))[1] + @staticmethod def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False): flags = '10' if include_qs else '00' From 4d8819d2492e10f10bd09490f8f203d2f5e2cac4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 May 2016 06:52:39 +0600 Subject: [PATCH 282/501] [extractor/generic] Add support for theplatform embeds (Closes #8636, closes #9476) --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c368f08e1..ad6a40730 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -62,6 +62,7 @@ from .digiteka import DigitekaIE from .instagram import InstagramIE from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE +from .theplatform import ThePlatformIE class GenericIE(InfoExtractor): @@ -1499,6 +1500,11 @@ class GenericIE(InfoExtractor): if bc_urls: return _playlist_from_matches(bc_urls, ie='BrightcoveNew') + # Look for ThePlatform embeds + tp_urls = ThePlatformIE._extract_urls(webpage) + if tp_urls: + return _playlist_from_matches(tp_urls, ie='ThePlatform') + # Look for embedded rtl.nl player matches = re.findall( r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', From c6b9cf05e1dbd5e2534607fd3319ac73791d1c89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 May 2016 08:28:41 +0600 Subject: [PATCH 283/501] [utils] Do not fail on unknown date formats in unified_strdate --- youtube_dl/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 5301d0740..d65f5e833 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1055,7 +1055,10 @@ def unified_strdate(date_str, day_first=True): if upload_date is None: timetuple = email.utils.parsedate_tz(date_str) if timetuple: - upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') + try: + upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') + except ValueError: + pass if upload_date is not None: return compat_str(upload_date) From 21a19aa94d7a650d90ab258bd277a8648378c135 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 22 May 2016 08:59:28 +0600 Subject: [PATCH 284/501] [README.md] Clarify location for youtube-dl.exe --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a2febab2c..96cefb548 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget: sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl -Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29). +Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory (`%USERPROFILE%`, for example `C:\Users\<user name>\` or `C:\Documents and Settings\<user name>\`) or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\system32` (e.g. **do not** put in `C:\Windows\System32`). OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/). From 4f3a25c2b413977bf0ea6f7bd16d3d20259470bb Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 22 May 2016 09:00:08 +0600 Subject: [PATCH 285/501] [README.md] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 96cefb548..759d2bb01 100644 --- a/README.md +++ b/README.md @@ -433,7 +433,7 @@ You can use `--ignore-config` if you want to disable the configuration file for ### Authentication with `.netrc` file -You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a`.netrc` file in your `$HOME` and restrict permissions to read/write by you only: +You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by you only: ``` touch $HOME/.netrc chmod a-rwx,u+rw $HOME/.netrc From 9b06b0fb9297efe47a8de71142e926dda5031b65 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 22 May 2016 09:26:06 +0600 Subject: [PATCH 286/501] [README.md] Clarify updating on Windows --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 759d2bb01..649e78186 100644 --- a/README.md +++ b/README.md @@ -675,6 +675,8 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231 If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`). +Note that on Windows before running the update command in command prompt you should either `cd` to the directory where `youtube-dl.exe` is located or use the full path to `youtube-dl.exe` (e.g. `C:\Program Files (x86)\youtube-dl\youtube-dl.exe -U`). + If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update. If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum. From e5871c672b32d30fe4a943ad1375a7000829f03c Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 22 May 2016 09:36:07 +0600 Subject: [PATCH 287/501] [README.md] Clarify location for youtube-dl.exe even more %USERPROFILE% not in %PATH% by default. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 649e78186..185df5e76 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget: sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl -Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory (`%USERPROFILE%`, for example `C:\Users\<user name>\` or `C:\Documents and Settings\<user name>\`) or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\system32` (e.g. **do not** put in `C:\Windows\System32`). +Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`). OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/). From e9297256d405651428d5d52f0bb6b32ca66ea15a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 May 2016 10:06:45 +0600 Subject: [PATCH 288/501] [update] Fix youtube-dl.exe updating from arbitrary directory (Closes #2718) --- youtube_dl/update.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/update.py b/youtube_dl/update.py index 676ebe1c4..ebce9666a 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -83,11 +83,8 @@ def update_self(to_screen, verbose, opener): print_notes(to_screen, versions_info['versions']) - filename = sys.argv[0] - # Py2EXE: Filename could be different - if hasattr(sys, 'frozen') and not os.path.isfile(filename): - if os.path.isfile(filename + '.exe'): - filename += '.exe' + # sys.executable is set to the full pathname of the exe-file for py2exe + filename = sys.executable if hasattr(sys, 'frozen') else sys.argv[0] if not os.access(filename, os.W_OK): to_screen('ERROR: no write permissions on %s' % filename) @@ -95,7 +92,7 @@ def update_self(to_screen, verbose, opener): # Py2EXE if hasattr(sys, 'frozen'): - exe = os.path.abspath(filename) + exe = filename directory = os.path.dirname(exe) if not os.access(directory, os.W_OK): to_screen('ERROR: no write permissions on %s' % directory) From c776b99691e5fdec75cc7d5c268c260f23bd2ac7 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 22 May 2016 10:14:02 +0600 Subject: [PATCH 289/501] [README.md] Remove Windows updating trickery Windows updating fixed in e9297256d405651428d5d52f0bb6b32ca66ea15a. --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 185df5e76..00f42e056 100644 --- a/README.md +++ b/README.md @@ -675,8 +675,6 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231 If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`). -Note that on Windows before running the update command in command prompt you should either `cd` to the directory where `youtube-dl.exe` is located or use the full path to `youtube-dl.exe` (e.g. `C:\Program Files (x86)\youtube-dl\youtube-dl.exe -U`). - If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update. If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum. From 70346165fe9348b54e8d71fb40654d135af945f8 Mon Sep 17 00:00:00 2001 From: Thor77 <xXThor77Xx@gmail.com> Date: Sun, 22 May 2016 08:15:39 +0200 Subject: [PATCH 290/501] [bandcamp] raise ExtractorError when track not streamable (#9465) * [bandcamp] raise ExtractorError when track not streamable * [bandcamp] update md5 for second test * don't rely on json-data, but just check for 'file' * don't rely on presence of 'file' --- youtube_dl/extractor/bandcamp.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index c1ef8051d..991ab0676 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -29,7 +29,7 @@ class BandcampIE(InfoExtractor): '_skip': 'There is a limit of 200 free downloads / month for the test song' }, { 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', - 'md5': '2b68e5851514c20efdff2afc5603b8b4', + 'md5': '73d0b3171568232574e45652f8720b5c', 'info_dict': { 'id': '2650410135', 'ext': 'mp3', @@ -48,6 +48,10 @@ class BandcampIE(InfoExtractor): if m_trackinfo: json_code = m_trackinfo.group(1) data = json.loads(json_code)[0] + track_id = compat_str(data['id']) + + if not data.get('file'): + raise ExtractorError('Not streamable', video_id=track_id, expected=True) formats = [] for format_id, format_url in data['file'].items(): @@ -64,7 +68,7 @@ class BandcampIE(InfoExtractor): self._sort_formats(formats) return { - 'id': compat_str(data['id']), + 'id': track_id, 'title': data['title'], 'formats': formats, 'duration': float_or_none(data.get('duration')), From a4a7c44bd337cdda534ad879c516d5b33e25a893 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 22 May 2016 15:04:51 +0600 Subject: [PATCH 291/501] [README.md] Document solution for extremely slow start on Windows --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 00f42e056..ef0e265c8 100644 --- a/README.md +++ b/README.md @@ -693,6 +693,10 @@ hash -r Again, from then on you'll be able to update with `sudo youtube-dl -U`. +### youtube-dl is extremely slow to start on Windows + +Add a file exclusion for `youtube-dl.exe` in Windows Defender settings. + ### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos. From 4a12077855026a0ca9cf31868c13d2d029f7a723 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 22 May 2016 22:22:27 +0800 Subject: [PATCH 292/501] [genric] Eliminate duplicated video URLs (closes #6562) --- youtube_dl/extractor/generic.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ad6a40730..bb96e7231 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1194,6 +1194,16 @@ class GenericIE(InfoExtractor): 'uploader': 'Lake8737', } }, + # Duplicated embedded video URLs + { + 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443', + 'info_dict': { + 'id': '149298443_480_16c25b74_2', + 'ext': 'mp4', + 'title': 'vs. Blue Orange Spring Game', + 'uploader': 'www.hudl.com', + }, + }, ] def report_following_redirect(self, new_url): @@ -2111,7 +2121,7 @@ class GenericIE(InfoExtractor): raise UnsupportedError(url) entries = [] - for video_url in found: + for video_url in orderedSet(found): video_url = unescapeHTML(video_url) video_url = video_url.replace('\\/', '/') video_url = compat_urlparse.urljoin(url, video_url) From c5f51551006c9d9ad7263cb3d3d90d1c91c8c648 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 22 Apr 2016 09:36:14 +0100 Subject: [PATCH 293/501] [wat] extract all formats --- youtube_dl/extractor/wat.py | 129 +++++++++++++++++------------------- 1 file changed, 59 insertions(+), 70 deletions(-) diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 5227bb5ad..de7d6b559 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -2,25 +2,26 @@ from __future__ import unicode_literals import re -import hashlib from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, unified_strdate, + HEADRequest, + float_or_none, ) class WatIE(InfoExtractor): - _VALID_URL = r'(?:wat:(?P<real_id>\d{8})|https?://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)' + _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)' IE_NAME = 'wat.tv' _TESTS = [ { 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html', - 'md5': 'ce70e9223945ed26a8056d413ca55dc9', + 'md5': '83d882d9de5c9d97f0bb2c6273cde56a', 'info_dict': { 'id': '11713067', - 'display_id': 'soupe-figues-l-orange-aux-epices', 'ext': 'mp4', 'title': 'Soupe de figues à l\'orange et aux épices', 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.', @@ -33,7 +34,6 @@ class WatIE(InfoExtractor): 'md5': 'fbc84e4378165278e743956d9c1bf16b', 'info_dict': { 'id': '11713075', - 'display_id': 'gregory-lemarchal-voix-ange', 'ext': 'mp4', 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)', 'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3', @@ -44,96 +44,85 @@ class WatIE(InfoExtractor): }, ] - def download_video_info(self, real_id): + def _real_extract(self, url): + video_id = self._match_id(url) + video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36)) + # 'contentv4' is used in the website, but it also returns the related # videos, we don't need them - info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id) - return info['media'] - - def _real_extract(self, url): - def real_id_for_chapter(chapter): - return chapter['tc_start'].split('-')[0] - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - real_id = mobj.group('real_id') - if not real_id: - short_id = mobj.group('short_id') - webpage = self._download_webpage(url, display_id or short_id) - real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id') - - video_info = self.download_video_info(real_id) + video_info = self._download_json( + 'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media'] error_desc = video_info.get('error_desc') if error_desc: raise ExtractorError( '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True) - geo_list = video_info.get('geoList') - country = geo_list[0] if geo_list else '' - chapters = video_info['chapters'] first_chapter = chapters[0] - files = video_info['files'] - first_file = files[0] - if real_id_for_chapter(first_chapter) != real_id: + def video_id_for_chapter(chapter): + return chapter['tc_start'].split('-')[0] + + if video_id_for_chapter(first_chapter) != video_id: self.to_screen('Multipart video detected') - chapter_urls = [] - for chapter in chapters: - chapter_id = real_id_for_chapter(chapter) - # Yes, when we this chapter is processed by WatIE, - # it will download the info again - chapter_info = self.download_video_info(chapter_id) - chapter_urls.append(chapter_info['url']) - entries = [self.url_result(chapter_url) for chapter_url in chapter_urls] - return self.playlist_result(entries, real_id, video_info['title']) - - upload_date = None - if 'date_diffusion' in first_chapter: - upload_date = unified_strdate(first_chapter['date_diffusion']) + entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters] + return self.playlist_result(entries, video_id, video_info['title']) # Otherwise we can continue and extract just one part, we have to use - # the short id for getting the video url + # the video id for getting the video url - formats = [{ - 'url': 'http://wat.tv/get/android5/%s.mp4' % real_id, - 'format_id': 'Mobile', - }] + date_diffusion = first_chapter.get('date_diffusion') + upload_date = unified_strdate(date_diffusion) if date_diffusion else None - fmts = [('SD', 'web')] - if first_file.get('hasHD'): - fmts.append(('HD', 'webhd')) + def extract_url(path_template, url_type): + req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id) + head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type) + red_url = head.geturl() + if req_url == red_url: + raise ExtractorError( + '%s said: Sorry, this video is not available from your country.' % self.IE_NAME, + expected=True) + return red_url - def compute_token(param): - timestamp = '%08x' % int(self._download_webpage( - 'http://www.wat.tv/servertime', real_id, - 'Downloading server time').split('|')[0]) - magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564' - return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp) + m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8') + http_url = extract_url('android5/%s.mp4', 'http') - for fmt in fmts: - webid = '/%s/%s' % (fmt[1], real_id) - video_url = self._download_webpage( - 'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country), - real_id, - 'Downloading %s video URL' % fmt[0], - 'Failed to download %s video URL' % fmt[0], - False) - if not video_url: + formats = [] + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') + formats.extend(m3u8_formats) + formats.extend(self._extract_f4m_formats( + m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'), + video_id, f4m_id='hds', fatal=False)) + for m3u8_format in m3u8_formats: + mobj = re.search( + r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url']) + if not mobj: continue - formats.append({ - 'url': video_url, - 'ext': 'mp4', - 'format_id': fmt[0], + abr, vbr = mobj.groups() + abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000) + m3u8_format.update({ + 'vbr': vbr, + 'abr': abr, }) + if not vbr or not abr: + continue + f = m3u8_format.copy() + f.update({ + 'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url), + 'format_id': f['format_id'].replace('hls', 'http'), + 'protocol': 'http', + }) + formats.append(f) + self._sort_formats(formats) return { - 'id': real_id, - 'display_id': display_id, + 'id': video_id, 'title': first_chapter['title'], 'thumbnail': first_chapter['preview'], 'description': first_chapter['description'], 'view_count': video_info['views'], 'upload_date': upload_date, - 'duration': first_file['duration'], + 'duration': video_info['files'][0]['duration'], 'formats': formats, } From db3b8b2103099a8859402f2167d7ad1a8fa66829 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Sun, 22 May 2016 16:54:41 +0100 Subject: [PATCH 294/501] [tf1] add support for more related web sites --- youtube_dl/extractor/tf1.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 3f54b2744..aff5121b9 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class TF1IE(InfoExtractor): """TF1 uses the wat.tv player.""" - _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html' + _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|(?:www\.)?(?:tfou|ushuaiatv|histoire|tvbreizh))\.fr/(?:[^/]+/)*(?P<id>[^/?#.]+)' _TESTS = [{ 'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', 'info_dict': { @@ -48,6 +48,6 @@ class TF1IE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) wat_id = self._html_search_regex( - r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:#.*?)?\1', + r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:.*?)?\1', webpage, 'wat id', group='id') return self.url_result('wat:%s' % wat_id, 'Wat') From eb7941e3e6e92bac26f5d21525fc8ac89c934abe Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 23 May 2016 01:34:08 +0800 Subject: [PATCH 295/501] [compat] Fix for XML with <!DOCTYPE> in Python 2.7 and 3.2 Such XML documents cause DeprecationWarning if python is run with `-W error` --- test/test_compat.py | 6 ++++++ youtube_dl/compat.py | 17 ++++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index 539b30540..f5317ac3e 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -103,6 +103,12 @@ class TestCompat(unittest.TestCase): self.assertTrue(isinstance(doc.find('chinese').text, compat_str)) self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str)) + def test_compat_etree_fromstring_doctype(self): + xml = '''<?xml version="1.0"?> +<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd"> +<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>''' + compat_etree_fromstring(xml) + def test_struct_unpack(self): self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,)) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 1392361a1..06e5f3ff6 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -245,13 +245,20 @@ try: except ImportError: # Python 2.6 from xml.parsers.expat import ExpatError as compat_xml_parse_error + +etree = xml.etree.ElementTree + + +class _TreeBuilder(etree.TreeBuilder): + def doctype(self, name, pubid, system): + pass + if sys.version_info[0] >= 3: - compat_etree_fromstring = xml.etree.ElementTree.fromstring + def compat_etree_fromstring(text): + return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) else: # python 2.x tries to encode unicode strings with ascii (see the # XMLParser._fixtext method) - etree = xml.etree.ElementTree - try: _etree_iter = etree.Element.iter except AttributeError: # Python <=2.6 @@ -265,7 +272,7 @@ else: # 2.7 source def _XML(text, parser=None): if not parser: - parser = etree.XMLParser(target=etree.TreeBuilder()) + parser = etree.XMLParser(target=_TreeBuilder()) parser.feed(text) return parser.close() @@ -277,7 +284,7 @@ else: return el def compat_etree_fromstring(text): - doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) + doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory))) for el in _etree_iter(doc): if el.text is not None and isinstance(el.text, bytes): el.text = el.text.decode('utf-8') From 7a46542f97c99e47ad86707bf21628630c8d871e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 23 May 2016 01:38:00 +0800 Subject: [PATCH 296/501] [livestream] Video IDs should always be strings (#2234) --- youtube_dl/extractor/livestream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index eada7c299..0edc06c43 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -150,7 +150,7 @@ class LivestreamIE(InfoExtractor): } def _extract_stream_info(self, stream_info): - broadcast_id = stream_info['broadcast_id'] + broadcast_id = compat_str(stream_info['broadcast_id']) is_live = stream_info.get('is_live') formats = [] From 78d3b3e2137f6be75b64e9bbfdec88cb420a91d1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 23 May 2016 01:39:09 +0800 Subject: [PATCH 297/501] [generic] Improve Livestream detection (closes #2234) --- youtube_dl/extractor/generic.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index bb96e7231..303e112d2 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -784,6 +784,19 @@ class GenericIE(InfoExtractor): 'title': 'Rosetta #CometLanding webcast HL 10', } }, + # Another Livestream embed, without 'new.' in URL + { + 'url': 'https://www.freespeech.org/', + 'info_dict': { + 'id': '123537347', + 'ext': 'mp4', + 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + # Live stream + 'skip_download': True, + }, + }, # LazyYT { 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', @@ -1878,7 +1891,7 @@ class GenericIE(InfoExtractor): return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast') mobj = re.search( - r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"', + r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"', webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'Livestream') From 102810ef0402834bd5d43e70a5e397f2a581a5dc Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Sun, 22 May 2016 20:36:23 +0100 Subject: [PATCH 298/501] [voxmedia] fix volume embed extraction --- youtube_dl/extractor/voxmedia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index 0c6b1f030..9d73600aa 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -117,7 +117,7 @@ class VoxMediaIE(InfoExtractor): volume_webpage = self._download_webpage( 'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid) video_data = self._parse_json(self._search_regex( - r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid) + r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid) for provider_video_type in ('ooyala', 'youtube'): provider_video_id = video_data.get('%s_id' % provider_video_type) if provider_video_id: From e54373204ab6c5be36823695a571680d9a641ba0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 May 2016 03:44:04 +0600 Subject: [PATCH 299/501] [lifenews] Fix metadata extraction --- youtube_dl/extractor/lifenews.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index ba2f80a75..4b1fb9772 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -7,10 +7,10 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( determine_ext, - int_or_none, - remove_end, - unified_strdate, ExtractorError, + int_or_none, + parse_iso8601, + remove_end, ) @@ -28,7 +28,9 @@ class LifeNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'Мужчина нашел дома архив оборонного завода', 'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26', + 'timestamp': 1344154740, 'upload_date': '20120805', + 'view_count': int, } }, { # single video embedded via iframe @@ -39,7 +41,9 @@ class LifeNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ', 'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ', + 'timestamp': 1427961840, 'upload_date': '20150402', + 'view_count': int, } }, { # two videos embedded via iframe @@ -48,7 +52,8 @@ class LifeNewsIE(InfoExtractor): 'id': '153461', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве', 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.', - 'upload_date': '20150505', + 'timestamp': 1430825520, + 'view_count': int, }, 'playlist': [{ 'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795', @@ -57,6 +62,7 @@ class LifeNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)', 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.', + 'timestamp': 1430825520, 'upload_date': '20150505', }, }, { @@ -66,6 +72,7 @@ class LifeNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)', 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.', + 'timestamp': 1430825520, 'upload_date': '20150505', }, }], @@ -100,21 +107,17 @@ class LifeNewsIE(InfoExtractor): description = self._og_search_description(webpage) view_count = self._html_search_regex( - r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False) - comment_count = self._html_search_regex( - r'=\'commentCount\'[^>]*>\s*(\d+)\s*<', - webpage, 'comment count', fatal=False) + r'<div[^>]+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P<value>\d+)\s*</div>', + webpage, 'view count', fatal=False, group='value') - upload_date = self._html_search_regex( - r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False) - if upload_date is not None: - upload_date = unified_strdate(upload_date) + timestamp = parse_iso8601(self._search_regex( + r'<time[^>]+datetime=(["\'])(?P<value>.+?)\1', + webpage, 'upload date', fatal=False, group='value')) common_info = { 'description': description, 'view_count': int_or_none(view_count), - 'comment_count': int_or_none(comment_count), - 'upload_date': upload_date, + 'timestamp': timestamp, } def make_entry(video_id, video_url, index=None): From 5181759c0d488f9fc30175f6aff4b8d4a236352d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 May 2016 04:00:08 +0600 Subject: [PATCH 300/501] [life] Update _VALID_URL --- youtube_dl/extractor/lifenews.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 4b1fb9772..d5d528a36 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -15,13 +15,13 @@ from ..utils import ( class LifeNewsIE(InfoExtractor): - IE_NAME = 'lifenews' - IE_DESC = 'LIFE | NEWS' - _VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)' + IE_NAME = 'life' + IE_DESC = 'Life.ru' + _VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)' _TESTS = [{ # single video embedded via video/source - 'url': 'http://lifenews.ru/news/98736', + 'url': 'https://life.ru/t/новости/98736', 'md5': '77c95eaefaca216e32a76a343ad89d23', 'info_dict': { 'id': '98736', @@ -34,7 +34,7 @@ class LifeNewsIE(InfoExtractor): } }, { # single video embedded via iframe - 'url': 'http://lifenews.ru/news/152125', + 'url': 'https://life.ru/t/новости/152125', 'md5': '77d19a6f0886cd76bdbf44b4d971a273', 'info_dict': { 'id': '152125', @@ -47,7 +47,7 @@ class LifeNewsIE(InfoExtractor): } }, { # two videos embedded via iframe - 'url': 'http://lifenews.ru/news/153461', + 'url': 'https://life.ru/t/новости/153461', 'info_dict': { 'id': '153461', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве', @@ -77,18 +77,20 @@ class LifeNewsIE(InfoExtractor): }, }], }, { - 'url': 'http://lifenews.ru/video/13035', + 'url': 'https://life.ru/t/новости/213035', + 'only_matching': True, + }, { + 'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461', + 'only_matching': True, + }, { + 'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil', 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - section = mobj.group('section') + video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://lifenews.ru/%s/%s' % (section, video_id), - video_id, 'Downloading page') + webpage = self._download_webpage(url, video_id) video_urls = re.findall( r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage) @@ -102,7 +104,7 @@ class LifeNewsIE(InfoExtractor): title = remove_end( self._og_search_title(webpage), - ' - Первый по срочным новостям — LIFE | NEWS') + ' - Life.ru') description = self._og_search_description(webpage) From 5db9df622fb45ba6fbb57ef4a2ad5f2da0236a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 May 2016 04:22:09 +0600 Subject: [PATCH 301/501] [life:embed] Use native hls --- youtube_dl/extractor/lifenews.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index d5d528a36..c2b4490c4 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -188,7 +188,8 @@ class LifeEmbedIE(InfoExtractor): ext = determine_ext(video_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id='m3u8')) + video_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='m3u8')) else: formats.append({ 'url': video_url, From 4b464a6a78749dfdc7c71fa932146403f18f6cb5 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Mon, 23 May 2016 00:47:22 +0100 Subject: [PATCH 302/501] [washingtonpost] improve format extraction and add support for video pages extraction --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/washingtonpost.py | 148 +++++++++++++++++-------- 2 files changed, 103 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c93cd2765..d0346714c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -941,7 +941,10 @@ from .vube import VubeIE from .vuclip import VuClipIE from .vulture import VultureIE from .walla import WallaIE -from .washingtonpost import WashingtonPostIE +from .washingtonpost import ( + WashingtonPostIE, + WashingtonPostArticleIE, +) from .wat import WatIE from .watchindianporn import WatchIndianPornIE from .wdr import ( diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dl/extractor/washingtonpost.py index ec8b99998..71349d487 100644 --- a/youtube_dl/extractor/washingtonpost.py +++ b/youtube_dl/extractor/washingtonpost.py @@ -11,7 +11,100 @@ from ..utils import ( class WashingtonPostIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])' + IE_NAME = 'washingtonpost' + _VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _TEST = { + 'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d', + 'md5': '6f537e1334b714eb15f9563bd4b9cdfa', + 'info_dict': { + 'id': '480ba4ee-1ec7-11e6-82c2-a7dcb313287d', + 'ext': 'mp4', + 'title': 'Egypt finds belongings, debris from plane crash', + 'description': 'md5:a17ceee432f215a5371388c1f680bd86', + 'upload_date': '20160520', + 'uploader': 'Reuters', + 'timestamp': 1463778452, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + 'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % video_id, + video_id, transform_source=strip_jsonp)[0]['contentConfig'] + title = video_data['title'] + + urls = [] + formats = [] + for s in video_data.get('streams', []): + s_url = s.get('url') + if not s_url or s_url in urls: + continue + urls.append(s_url) + video_type = s.get('type') + if video_type == 'smil': + continue + elif video_type in ('ts', 'hls'): + m3u8_formats = self._extract_m3u8_formats( + s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + for m3u8_format in m3u8_formats: + width = m3u8_format.get('width') + if not width: + continue + vbr = self._search_regex( + r'%d_%d_(\d+)' % (width, m3u8_format['height']), m3u8_format['url'], 'vbr', default=None) + if vbr: + m3u8_format.update({ + 'vbr': int_or_none(vbr), + }) + formats.extend(m3u8_formats) + else: + width = int_or_none(s.get('width')) + vbr = int_or_none(s.get('bitrate')) + has_width = width != 0 + formats.append({ + 'format_id': ( + '%s-%d-%d' % (video_type, width, vbr) + if width + else video_type), + 'vbr': vbr if has_width else None, + 'width': width, + 'height': int_or_none(s.get('height')), + 'acodec': s.get('audioCodec'), + 'vcodec': s.get('videoCodec') if has_width else 'none', + 'filesize': int_or_none(s.get('fileSize')), + 'url': s_url, + 'ext': 'mp4', + 'protocol': { + 'mp4': 'http', + 'ts': 'm3u8_native', + 'hls': 'm3u8_native', + }.get(s.get('type')), + }) + source_media_url = video_data.get('sourceMediaURL') + if source_media_url: + formats.append({ + 'format_id': 'source_media', + 'url': source_media_url, + }) + self._sort_formats( + formats, ('width', 'height', 'vbr', 'filesize', 'tbr', 'format_id')) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('blurb'), + 'uploader': video_data.get('credits', {}).get('source'), + 'formats': formats, + 'duration': int_or_none(video_data.get('videoDuration'), 100), + 'timestamp': int_or_none( + video_data.get('dateConfig', {}).get('dateFirstPublished'), 1000), + } + + +class WashingtonPostArticleIE(InfoExtractor): + IE_NAME = 'washingtonpost:article' + _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/(?:[^/]+/)*(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/', 'info_dict': { @@ -63,6 +156,10 @@ class WashingtonPostIE(InfoExtractor): }] }] + @classmethod + def suitable(cls, url): + return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url) + def _real_extract(self, url): page_id = self._match_id(url) webpage = self._download_webpage(url, page_id) @@ -74,54 +171,7 @@ class WashingtonPostIE(InfoExtractor): <div\s+class="posttv-video-embed[^>]*?data-uuid=| data-video-uuid= )"([^"]+)"''', webpage) - entries = [] - for i, uuid in enumerate(uuids, start=1): - vinfo_all = self._download_json( - 'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid, - page_id, - transform_source=strip_jsonp, - note='Downloading information of video %d/%d' % (i, len(uuids)) - ) - vinfo = vinfo_all[0]['contentConfig'] - uploader = vinfo.get('credits', {}).get('source') - timestamp = int_or_none( - vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000) - - formats = [{ - 'format_id': ( - '%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate')) - if s.get('width') - else s.get('type')), - 'vbr': s.get('bitrate') if s.get('width') != 0 else None, - 'width': s.get('width'), - 'height': s.get('height'), - 'acodec': s.get('audioCodec'), - 'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none', - 'filesize': s.get('fileSize'), - 'url': s.get('url'), - 'ext': 'mp4', - 'preference': -100 if s.get('type') == 'smil' else None, - 'protocol': { - 'MP4': 'http', - 'F4F': 'f4m', - }.get(s.get('type')), - } for s in vinfo.get('streams', [])] - source_media_url = vinfo.get('sourceMediaURL') - if source_media_url: - formats.append({ - 'format_id': 'source_media', - 'url': source_media_url, - }) - self._sort_formats(formats) - entries.append({ - 'id': uuid, - 'title': vinfo['title'], - 'description': vinfo.get('blurb'), - 'uploader': uploader, - 'formats': formats, - 'duration': int_or_none(vinfo.get('videoDuration'), 100), - 'timestamp': timestamp, - }) + entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids] return { '_type': 'playlist', From 0c50eeb9870ec7d940c35c9cec52bfd35d009420 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Mon, 23 May 2016 02:27:31 +0100 Subject: [PATCH 303/501] [reuters] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/reuters.py | 69 ++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 youtube_dl/extractor/reuters.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d0346714c..d8b3170ba 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -630,6 +630,7 @@ from .rds import RDSIE from .redtube import RedTubeIE from .regiotv import RegioTVIE from .restudy import RestudyIE +from .reuters import ReutersIE from .reverbnation import ReverbNationIE from .revision3 import Revision3IE from .rice import RICEIE diff --git a/youtube_dl/extractor/reuters.py b/youtube_dl/extractor/reuters.py new file mode 100644 index 000000000..961d504eb --- /dev/null +++ b/youtube_dl/extractor/reuters.py @@ -0,0 +1,69 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + js_to_json, + int_or_none, + unescapeHTML, +) + + +class ReutersIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562', + 'md5': '8015113643a0b12838f160b0b81cc2ee', + 'info_dict': { + 'id': '368575562', + 'ext': 'mp4', + 'title': 'San Francisco police chief resigns', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id) + video_data = js_to_json(self._search_regex( + r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);', + webpage, 'video data')) + + def get_json_value(key, fatal=False): + return self._search_regex('"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal) + + title = unescapeHTML(get_json_value('title', fatal=True)) + mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups() + + mas_data = self._download_json( + 'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid), + video_id, transform_source=js_to_json) + formats = [] + for f in mas_data: + f_url = f.get('url') + if not f_url: + continue + method = f.get('method') + if method == 'hls': + formats.extend(self._extract_m3u8_formats( + f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + container = f.get('container') + ext = '3gp' if method == 'mobile' else container + formats.append({ + 'format_id': ext, + 'url': f_url, + 'ext': ext, + 'container': container if method != 'mobile' else None, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': get_json_value('thumb'), + 'duration': int_or_none(get_json_value('seconds')), + 'formats': formats, + } From b1e9ebd08087c7e591b55451551d51120b7eec9d Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Mon, 23 May 2016 02:30:12 +0100 Subject: [PATCH 304/501] [washingtonpost] remove unnecessary code --- youtube_dl/extractor/washingtonpost.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dl/extractor/washingtonpost.py index 71349d487..c2c5bae05 100644 --- a/youtube_dl/extractor/washingtonpost.py +++ b/youtube_dl/extractor/washingtonpost.py @@ -74,12 +74,6 @@ class WashingtonPostIE(InfoExtractor): 'vcodec': s.get('videoCodec') if has_width else 'none', 'filesize': int_or_none(s.get('fileSize')), 'url': s_url, - 'ext': 'mp4', - 'protocol': { - 'mp4': 'http', - 'ts': 'm3u8_native', - 'hls': 'm3u8_native', - }.get(s.get('type')), }) source_media_url = video_data.get('sourceMediaURL') if source_media_url: From 42a7439717610530b0f7c630ef0eecf1b0638475 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Mon, 23 May 2016 09:30:26 +0100 Subject: [PATCH 305/501] [cbs] allow to pass content id to the extractor(closes #9589) --- youtube_dl/extractor/cbs.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 051d783a2..ac2c7dced 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .theplatform import ThePlatformIE from ..utils import ( xpath_text, @@ -21,7 +23,7 @@ class CBSBaseIE(ThePlatformIE): class CBSIE(CBSBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)' + _VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))' _TESTS = [{ 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', @@ -66,11 +68,12 @@ class CBSIE(CBSBaseIE): TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - content_id = self._search_regex( - [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"], - webpage, 'content id') + content_id, display_id = re.match(self._VALID_URL, url).groups() + if not content_id: + webpage = self._download_webpage(url, display_id) + content_id = self._search_regex( + [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"], + webpage, 'content id') items_data = self._download_xml( 'http://can.cbs.com/thunder/player/videoPlayerService.php', content_id, query={'partner': 'cbs', 'contentId': content_id}) From 05b651e3a58081492eb35d896c80dd1bdb87081c Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Mon, 23 May 2016 13:04:50 +0100 Subject: [PATCH 306/501] [washingtonpost] reduce requests for m3u8 manifests --- youtube_dl/extractor/washingtonpost.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dl/extractor/washingtonpost.py index c2c5bae05..839cad986 100644 --- a/youtube_dl/extractor/washingtonpost.py +++ b/youtube_dl/extractor/washingtonpost.py @@ -44,7 +44,7 @@ class WashingtonPostIE(InfoExtractor): video_type = s.get('type') if video_type == 'smil': continue - elif video_type in ('ts', 'hls'): + elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url): m3u8_formats = self._extract_m3u8_formats( s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) for m3u8_format in m3u8_formats: @@ -74,6 +74,8 @@ class WashingtonPostIE(InfoExtractor): 'vcodec': s.get('videoCodec') if has_width else 'none', 'filesize': int_or_none(s.get('fileSize')), 'url': s_url, + 'ext': 'mp4', + 'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None, }) source_media_url = video_data.get('sourceMediaURL') if source_media_url: From e8593f346a4b1236d2a023eb3070610bf180459c Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Mon, 23 May 2016 23:58:16 +0100 Subject: [PATCH 307/501] [ooyala] extract subtitles --- youtube_dl/extractor/ooyala.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 95e982897..4c119071d 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -22,13 +22,7 @@ class OoyalaBaseIE(InfoExtractor): metadata = content_tree[list(content_tree)[0]] embed_code = metadata['embed_code'] pcode = metadata.get('asset_pcode') or embed_code - video_info = { - 'id': embed_code, - 'title': metadata['title'], - 'description': metadata.get('description'), - 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), - 'duration': float_or_none(metadata.get('duration'), 1000), - } + title = metadata['title'] urls = [] formats = [] @@ -78,8 +72,24 @@ class OoyalaBaseIE(InfoExtractor): self.IE_NAME, cur_auth_data['message']), expected=True) self._sort_formats(formats) - video_info['formats'] = formats - return video_info + subtitles = {} + for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items(): + sub_url = sub.get('url') + if not sub_url: + continue + subtitles[lang] = [{ + 'url': sub_url, + }] + + return { + 'id': embed_code, + 'title': title, + 'description': metadata.get('description'), + 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), + 'duration': float_or_none(metadata.get('duration'), 1000), + 'subtitles': subtitles, + 'formats': formats, + } class OoyalaIE(OoyalaBaseIE): From a4760d204fe4cd7592bdfc91cbf550eb985374ac Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 24 May 2016 00:22:29 +0100 Subject: [PATCH 308/501] [ooyala] use api v2 to reduce requests for format extraction --- youtube_dl/extractor/ooyala.py | 88 +++++++++++++++++----------------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 4c119071d..09bc291f0 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -15,7 +15,7 @@ from ..compat import compat_urllib_parse_urlencode class OoyalaBaseIE(InfoExtractor): _PLAYER_BASE = 'http://player.ooyala.com/' _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' - _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v1/authorization/embed_code/%s/%s?' + _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?' def _extract(self, content_tree_url, video_id, domain='example.org'): content_tree = self._download_json(content_tree_url, video_id)['content_tree'] @@ -24,52 +24,50 @@ class OoyalaBaseIE(InfoExtractor): pcode = metadata.get('asset_pcode') or embed_code title = metadata['title'] + auth_data = self._download_json( + self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + + compat_urllib_parse_urlencode({ + 'domain': domain, + 'supportedFormats': 'mp4,rtmp,m3u8,hds', + }), video_id) + + cur_auth_data = auth_data['authorization_data'][embed_code] + urls = [] formats = [] - for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'): - auth_data = self._download_json( - self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + - compat_urllib_parse_urlencode({ - 'domain': domain, - 'supportedFormats': supported_format - }), - video_id, 'Downloading %s JSON' % supported_format) - - cur_auth_data = auth_data['authorization_data'][embed_code] - - if cur_auth_data['authorized']: - for stream in cur_auth_data['streams']: - url = base64.b64decode( - stream['url']['data'].encode('ascii')).decode('utf-8') - if url in urls: - continue - urls.append(url) - delivery_type = stream['delivery_type'] - if delivery_type == 'hls' or '.m3u8' in url: - formats.extend(self._extract_m3u8_formats( - url, embed_code, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif delivery_type == 'hds' or '.f4m' in url: - formats.extend(self._extract_f4m_formats( - url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) - elif '.smil' in url: - formats.extend(self._extract_smil_formats( - url, embed_code, fatal=False)) - else: - formats.append({ - 'url': url, - 'ext': stream.get('delivery_type'), - 'vcodec': stream.get('video_codec'), - 'format_id': delivery_type, - 'width': int_or_none(stream.get('width')), - 'height': int_or_none(stream.get('height')), - 'abr': int_or_none(stream.get('audio_bitrate')), - 'vbr': int_or_none(stream.get('video_bitrate')), - 'fps': float_or_none(stream.get('framerate')), - }) - else: - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, cur_auth_data['message']), expected=True) + if cur_auth_data['authorized']: + for stream in cur_auth_data['streams']: + url = base64.b64decode( + stream['url']['data'].encode('ascii')).decode('utf-8') + if url in urls: + continue + urls.append(url) + delivery_type = stream['delivery_type'] + if delivery_type == 'hls' or '.m3u8' in url: + formats.extend(self._extract_m3u8_formats( + url, embed_code, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif delivery_type == 'hds' or '.f4m' in url: + formats.extend(self._extract_f4m_formats( + url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) + elif '.smil' in url: + formats.extend(self._extract_smil_formats( + url, embed_code, fatal=False)) + else: + formats.append({ + 'url': url, + 'ext': stream.get('delivery_type'), + 'vcodec': stream.get('video_codec'), + 'format_id': delivery_type, + 'width': int_or_none(stream.get('width')), + 'height': int_or_none(stream.get('height')), + 'abr': int_or_none(stream.get('audio_bitrate')), + 'vbr': int_or_none(stream.get('video_bitrate')), + 'fps': float_or_none(stream.get('framerate')), + }) + else: + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, cur_auth_data['message']), expected=True) self._sort_formats(formats) subtitles = {} From 25bcd3550ee67bb521173d7a43dbc91178a11cfc Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi <kagami@genshiken.org> Date: Tue, 24 May 2016 12:13:05 +0300 Subject: [PATCH 309/501] [vlive] Address site update Changes: * Fix video params extraction * Don't make status request since status info now available on the page * Remove unneeded code * Fix test --- youtube_dl/extractor/vlive.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index a672ea9c5..147f52d45 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -1,8 +1,7 @@ # coding: utf-8 -from __future__ import division, unicode_literals +from __future__ import unicode_literals import re -import time from .common import InfoExtractor from ..utils import ( @@ -23,7 +22,7 @@ class VLiveIE(InfoExtractor): 'info_dict': { 'id': '1326', 'ext': 'mp4', - 'title': "[V] Girl's Day's Broadcast", + 'title': "[V LIVE] Girl's Day's Broadcast", 'creator': "Girl's Day", 'view_count': int, }, @@ -35,24 +34,11 @@ class VLiveIE(InfoExtractor): webpage = self._download_webpage( 'http://www.vlive.tv/video/%s' % video_id, video_id) - # UTC+x - UTC+9 (KST) - tz = time.altzone if time.localtime().tm_isdst == 1 else time.timezone - tz_offset = -tz // 60 - 9 * 60 - self._set_cookie('vlive.tv', 'timezoneOffset', '%d' % tz_offset) - - status_params = self._download_json( - 'http://www.vlive.tv/video/status?videoSeq=%s' % video_id, - video_id, 'Downloading JSON status', - headers={'Referer': url.encode('utf-8')}) - status = status_params.get('status') - air_start = status_params.get('onAirStartAt', '') - is_live = status_params.get('isLive') - video_params = self._search_regex( - r'vlive\.tv\.video\.ajax\.request\.handler\.init\((.+)\)', + r'\bvlive\.video\.init\(([^)]+)\)', webpage, 'video params') - live_params, long_video_id, key = re.split( - r'"\s*,\s*"', video_params)[1:4] + status, _, _, live_params, long_video_id, key = re.split( + r'"\s*,\s*"', video_params)[2:8] if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR': live_params = self._parse_json('"%s"' % live_params, video_id) @@ -61,8 +47,6 @@ class VLiveIE(InfoExtractor): elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO': if long_video_id and key: return self._replay(video_id, webpage, long_video_id, key) - elif is_live: - status = 'LIVE_END' else: status = 'COMING_SOON' @@ -70,7 +54,7 @@ class VLiveIE(InfoExtractor): raise ExtractorError('Uploading for replay. Please wait...', expected=True) elif status == 'COMING_SOON': - raise ExtractorError('Coming soon! %s' % air_start, expected=True) + raise ExtractorError('Coming soon!', expected=True) elif status == 'CANCELED': raise ExtractorError('We are sorry, ' 'but the live broadcast has been canceled.', From 277c7465f58e0ac50de0dd9ebc2083f6142e9a94 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 24 May 2016 11:24:29 +0100 Subject: [PATCH 310/501] [ooyala] check manifest ext with determine_ext and update tests for related extractors --- youtube_dl/extractor/byutv.py | 5 ++--- youtube_dl/extractor/espn.py | 12 ++++-------- youtube_dl/extractor/formula1.py | 3 ++- youtube_dl/extractor/groupon.py | 7 +++---- youtube_dl/extractor/howcast.py | 7 ++----- youtube_dl/extractor/ooyala.py | 24 +++++++++++++----------- youtube_dl/extractor/teachingchannel.py | 6 ++---- youtube_dl/extractor/veoh.py | 1 + youtube_dl/extractor/vice.py | 3 +++ youtube_dl/extractor/voxmedia.py | 12 ++++++++---- 10 files changed, 40 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index dda98059e..54eb57b46 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)' _TEST = { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', + 'md5': '05850eb8c749e2ee05ad5a1c34668493', 'info_dict': { 'id': 'studio-c-season-5-episode-5', 'ext': 'mp4', @@ -19,9 +20,7 @@ class BYUtvIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 1486.486, }, - 'params': { - 'skip_download': True, - } + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index db4b263bc..e3575aed1 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -8,28 +8,24 @@ class ESPNIE(InfoExtractor): _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://espn.go.com/video/clip?id=10365079', + 'md5': '60e5d097a523e767d06479335d1bdc58', 'info_dict': { 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', 'ext': 'mp4', 'title': '30 for 30 Shorts: Judging Jewell', 'description': None, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'add_ie': ['OoyalaExternal'], }, { # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season 'url': 'http://espn.go.com/video/clip?id=2743663', + 'md5': 'f4ac89b59afc7e2d7dbb049523df6768', 'info_dict': { 'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg', 'ext': 'mp4', 'title': 'Must-See Moments: Best of the MLS season', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'add_ie': ['OoyalaExternal'], }, { 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', 'only_matching': True, diff --git a/youtube_dl/extractor/formula1.py b/youtube_dl/extractor/formula1.py index 726393fcc..322c41e5a 100644 --- a/youtube_dl/extractor/formula1.py +++ b/youtube_dl/extractor/formula1.py @@ -13,7 +13,8 @@ class Formula1IE(InfoExtractor): 'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV', 'ext': 'flv', 'title': 'Race highlights - Spain 2016', - } + }, + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/groupon.py b/youtube_dl/extractor/groupon.py index 1dd0a81cc..7bbb669c7 100644 --- a/youtube_dl/extractor/groupon.py +++ b/youtube_dl/extractor/groupon.py @@ -14,6 +14,7 @@ class GrouponIE(InfoExtractor): 'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors', }, 'playlist': [{ + 'md5': '42428ce8a00585f9bc36e49226eae7a1', 'info_dict': { 'id': 'fk6OhWpXgIQ', 'ext': 'mp4', @@ -24,10 +25,8 @@ class GrouponIE(InfoExtractor): 'uploader_id': 'groupon', 'uploader': 'Groupon', }, - }], - 'params': { - 'skip_download': True, - } + 'add_ie': ['Youtube'], + }] } _PROVIDERS = { diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py index e8f51e545..92caeb8f9 100644 --- a/youtube_dl/extractor/howcast.py +++ b/youtube_dl/extractor/howcast.py @@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)' _TEST = { 'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly', - 'md5': '8b743df908c42f60cf6496586c7f12c3', + 'md5': '7d45932269a288149483144f01b99789', 'info_dict': { 'id': '390161', 'ext': 'mp4', @@ -18,10 +18,7 @@ class HowcastIE(InfoExtractor): 'upload_date': '20100609', 'duration': 56.823, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 09bc291f0..2038a6ba5 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -8,6 +8,7 @@ from ..utils import ( float_or_none, ExtractorError, unsmuggle_url, + determine_ext, ) from ..compat import compat_urllib_parse_urlencode @@ -37,26 +38,27 @@ class OoyalaBaseIE(InfoExtractor): formats = [] if cur_auth_data['authorized']: for stream in cur_auth_data['streams']: - url = base64.b64decode( + s_url = base64.b64decode( stream['url']['data'].encode('ascii')).decode('utf-8') - if url in urls: + if s_url in urls: continue - urls.append(url) + urls.append(s_url) + ext = determine_ext(s_url, None) delivery_type = stream['delivery_type'] - if delivery_type == 'hls' or '.m3u8' in url: + if delivery_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - url, embed_code, 'mp4', 'm3u8_native', + s_url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - elif delivery_type == 'hds' or '.f4m' in url: + elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( - url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) - elif '.smil' in url: + s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) + elif ext == 'smil': formats.extend(self._extract_smil_formats( - url, embed_code, fatal=False)) + s_url, embed_code, fatal=False)) else: formats.append({ - 'url': url, - 'ext': stream.get('delivery_type'), + 'url': s_url, + 'ext': ext or stream.get('delivery_type'), 'vcodec': stream.get('video_codec'), 'format_id': delivery_type, 'width': int_or_none(stream.get('width')), diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py index e0477382c..e279280e9 100644 --- a/youtube_dl/extractor/teachingchannel.py +++ b/youtube_dl/extractor/teachingchannel.py @@ -11,6 +11,7 @@ class TeachingChannelIE(InfoExtractor): _TEST = { 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', + 'md5': '3d6361864d7cac20b57c8784da17166f', 'info_dict': { 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', 'ext': 'mp4', @@ -18,10 +19,7 @@ class TeachingChannelIE(InfoExtractor): 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', 'duration': 422.255, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index 23ce0a0d1..0f5d68738 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -37,6 +37,7 @@ class VeohIE(InfoExtractor): 'uploader': 'afp-news', 'duration': 123, }, + 'skip': 'This video has been deleted.', }, { 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX', diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 95daf4dfd..e2b2ce098 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -11,12 +11,14 @@ class ViceIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.vice.com/video/cowboy-capitalists-part-1', + 'md5': 'e9d77741f9e42ba583e683cd170660f7', 'info_dict': { 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', 'ext': 'flv', 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', 'duration': 725.983, }, + 'add_ie': ['Ooyala'], }, { 'url': 'http://www.vice.com/video/how-to-hack-a-car', 'md5': '6fb2989a3fed069fb8eab3401fc2d3c9', @@ -29,6 +31,7 @@ class ViceIE(InfoExtractor): 'uploader': 'Motherboard', 'upload_date': '20140529', }, + 'add_ie': ['Youtube'], }, { 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', 'only_matching': True, diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index 9d73600aa..b1b32ad44 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -15,7 +15,8 @@ class VoxMediaIE(InfoExtractor): 'ext': 'mp4', 'title': 'Google\'s new material design direction', 'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2', - } + }, + 'add_ie': ['Ooyala'], }, { # data-ooyala-id 'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet', @@ -25,7 +26,8 @@ class VoxMediaIE(InfoExtractor): 'ext': 'mp4', 'title': 'The Nexus 6: hands-on with Google\'s phablet', 'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af', - } + }, + 'add_ie': ['Ooyala'], }, { # volume embed 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', @@ -35,7 +37,8 @@ class VoxMediaIE(InfoExtractor): 'ext': 'mp4', 'title': 'The new frontier of LGBTQ civil rights, explained', 'description': 'md5:0dc58e94a465cbe91d02950f770eb93f', - } + }, + 'add_ie': ['Ooyala'], }, { # youtube embed 'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance', @@ -48,7 +51,8 @@ class VoxMediaIE(InfoExtractor): 'upload_date': '20160324', 'uploader_id': 'voxdotcom', 'uploader': 'Vox', - } + }, + 'add_ie': ['Youtube'], }, { # SBN.VideoLinkset.entryGroup multiple ooyala embeds 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', From 444417edb55a5bf471697a3b2353fdbfb6f7e26d Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 24 May 2016 15:58:27 +0100 Subject: [PATCH 311/501] [radiocanada] Add new extractor(#4020) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/radiocanada.py | 130 ++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 youtube_dl/extractor/radiocanada.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d8b3170ba..f9fed18f6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -617,6 +617,10 @@ from .qqmusic import ( QQMusicPlaylistIE, ) from .r7 import R7IE +from .radiocanada import ( + RadioCanadaIE, + RadioCanadaAudioVideoIE, +) from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py new file mode 100644 index 000000000..4f05bbddc --- /dev/null +++ b/youtube_dl/extractor/radiocanada.py @@ -0,0 +1,130 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + xpath_text, + find_xpath_attr, + determine_ext, + int_or_none, + unified_strdate, + xpath_element, + ExtractorError, +) + + +class RadioCanadaIE(InfoExtractor): + IE_NAME = 'radiocanada' + _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' + _TEST = { + 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', + 'info_dict': { + 'id': '7184272', + 'ext': 'flv', + 'title': 'Le parcours du tireur capté sur vidéo', + 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', + 'upload_date': '20141023', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + app_code, video_id = re.match(self._VALID_URL, url).groups() + + formats = [] + # TODO: extract m3u8 and f4m formats + # m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements + # f4m formats can be extracted using flashhd device_type but they produce unplayable file + for device_type in ('flash',): + v_data = self._download_xml( + 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx', + video_id, note='Downloading %s XML' % device_type, query={ + 'appCode': app_code, + 'idMedia': video_id, + 'connectionType': 'broadband', + 'multibitrate': 'true', + 'deviceType': device_type, + # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction + 'paysJ391wsHjbOJwvCs26toz': 'CA', + 'bypasslock': 'NZt5K62gRqfc', + }) + v_url = xpath_text(v_data, 'url') + if not v_url: + continue + if v_url == 'null': + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, xpath_text(v_data, 'message')), expected=True) + ext = determine_ext(v_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + v_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False)) + else: + ext = determine_ext(v_url) + bitrates = xpath_element(v_data, 'bitrates') + for url_e in bitrates.findall('url'): + tbr = int_or_none(url_e.get('bitrate')) + if not tbr: + continue + formats.append({ + 'format_id': 'rtmp-%d' % tbr, + 'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url), + 'ext': 'flv', + 'protocol': 'rtmp', + 'width': int_or_none(url_e.get('width')), + 'height': int_or_none(url_e.get('height')), + 'tbr': tbr, + }) + self._sort_formats(formats) + + metadata = self._download_xml( + 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', + video_id, note='Downloading metadata XML', query={ + 'appCode': app_code, + 'idMedia': video_id, + }) + + def get_meta(name): + el = find_xpath_attr(metadata, './/Meta', 'name', name) + return el.text if el is not None else None + + return { + 'id': video_id, + 'title': get_meta('Title'), + 'description': get_meta('Description') or get_meta('ShortDescription'), + 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), + 'duration': int_or_none(get_meta('length')), + 'series': get_meta('Emission'), + 'season_number': int_or_none('SrcSaison'), + 'episode_number': int_or_none('SrcEpisode'), + 'upload_date': unified_strdate(get_meta('Date')), + 'formats': formats, + } + + +class RadioCanadaAudioVideoIE(InfoExtractor): + 'radiocanada:audiovideo' + _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', + 'info_dict': { + 'id': '7527184', + 'ext': 'flv', + 'title': 'Barack Obama au Vietnam', + 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', + 'upload_date': '20160523', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) From a4690b3244a42a833146c406e622c96045b23df5 Mon Sep 17 00:00:00 2001 From: mexican porn commits <xyz71412@laoeq.com> Date: Mon, 23 May 2016 16:32:39 -0500 Subject: [PATCH 312/501] [xhamster] url regex fix for videos with empty title. --- youtube_dl/extractor/xhamster.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index b3547174d..314e5020d 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -12,7 +12,7 @@ from ..utils import ( class XHamsterIE(InfoExtractor): - _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?' + _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?' _TESTS = [ { 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', @@ -38,6 +38,18 @@ class XHamsterIE(InfoExtractor): 'age_limit': 18, } }, + { + 'url': 'http://xhamster.com/movies/5667973/.html', + 'info_dict': { + 'id': '5667973', + 'ext': 'mp4', + 'title': '....', + 'upload_date': '20160208', + 'uploader': 'parejafree', + 'duration': 72.0, + 'age_limit': 18, + } + }, { 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', 'only_matching': True, @@ -170,7 +182,7 @@ class XHamsterEmbedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id, + r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id, webpage, 'xhamster url', default=None) if not video_url: From 6b43132ce9ec7477d69d8ad9d5b868060679de95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 May 2016 21:38:27 +0600 Subject: [PATCH 313/501] [xhamster] Update tests --- youtube_dl/extractor/xhamster.py | 79 +++++++++++++++++--------------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 314e5020d..bd8e1af2e 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -13,48 +13,51 @@ from ..utils import ( class XHamsterIE(InfoExtractor): _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?' - _TESTS = [ - { - 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', - 'info_dict': { - 'id': '1509445', - 'ext': 'mp4', - 'title': 'FemaleAgent Shy beauty takes the bait', - 'upload_date': '20121014', - 'uploader': 'Ruseful2011', - 'duration': 893.52, - 'age_limit': 18, - } + _TESTS = [{ + 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', + 'md5': '8281348b8d3c53d39fffb377d24eac4e', + 'info_dict': { + 'id': '1509445', + 'ext': 'mp4', + 'title': 'FemaleAgent Shy beauty takes the bait', + 'upload_date': '20121014', + 'uploader': 'Ruseful2011', + 'duration': 893.52, + 'age_limit': 18, }, - { - 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', - 'info_dict': { - 'id': '2221348', - 'ext': 'mp4', - 'title': 'Britney Spears Sexy Booty', - 'upload_date': '20130914', - 'uploader': 'jojo747400', - 'duration': 200.48, - 'age_limit': 18, - } + }, { + 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', + 'info_dict': { + 'id': '2221348', + 'ext': 'mp4', + 'title': 'Britney Spears Sexy Booty', + 'upload_date': '20130914', + 'uploader': 'jojo747400', + 'duration': 200.48, + 'age_limit': 18, }, - { - 'url': 'http://xhamster.com/movies/5667973/.html', - 'info_dict': { - 'id': '5667973', - 'ext': 'mp4', - 'title': '....', - 'upload_date': '20160208', - 'uploader': 'parejafree', - 'duration': 72.0, - 'age_limit': 18, - } + 'params': { + 'skip_download': True, }, - { - 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', - 'only_matching': True, + }, { + # empty seo + 'url': 'http://xhamster.com/movies/5667973/.html', + 'info_dict': { + 'id': '5667973', + 'ext': 'mp4', + 'title': '....', + 'upload_date': '20160208', + 'uploader': 'parejafree', + 'duration': 72.0, + 'age_limit': 18, }, - ] + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', + 'only_matching': True, + }] def _real_extract(self, url): def extract_video_url(webpage, name): From 0d6ee9750801045e45157f38d98ef2be0c6da4f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 May 2016 21:42:47 +0600 Subject: [PATCH 314/501] Credit @TRox1972 for tosh.cc (#9566) and localnews8 (#9539) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 5ca71ace7..3272fc6ea 100644 --- a/AUTHORS +++ b/AUTHORS @@ -172,3 +172,4 @@ blahgeek Kevin Deldycke inondle Tomáš Čech +Déstin Reed From 688c634b7d95a20c6081b202427a9e5fd7f36422 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 24 May 2016 16:42:22 +0100 Subject: [PATCH 315/501] skip some tests to reduce test time --- youtube_dl/extractor/byutv.py | 3 +++ youtube_dl/extractor/espn.py | 6 ++++++ youtube_dl/extractor/groupon.py | 5 ++++- youtube_dl/extractor/howcast.py | 3 +++ youtube_dl/extractor/teachingchannel.py | 3 +++ 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 54eb57b46..3aec601f8 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -20,6 +20,9 @@ class BYUtvIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 1486.486, }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['Ooyala'], } diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index e3575aed1..66c08bec4 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -15,6 +15,9 @@ class ESPNIE(InfoExtractor): 'title': '30 for 30 Shorts: Judging Jewell', 'description': None, }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['OoyalaExternal'], }, { # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season @@ -25,6 +28,9 @@ class ESPNIE(InfoExtractor): 'ext': 'mp4', 'title': 'Must-See Moments: Best of the MLS season', }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['OoyalaExternal'], }, { 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', diff --git a/youtube_dl/extractor/groupon.py b/youtube_dl/extractor/groupon.py index 7bbb669c7..a6da90931 100644 --- a/youtube_dl/extractor/groupon.py +++ b/youtube_dl/extractor/groupon.py @@ -26,7 +26,10 @@ class GrouponIE(InfoExtractor): 'uploader': 'Groupon', }, 'add_ie': ['Youtube'], - }] + }], + 'params': { + 'skip_download': True, + }, } _PROVIDERS = { diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py index 92caeb8f9..7e36b85ad 100644 --- a/youtube_dl/extractor/howcast.py +++ b/youtube_dl/extractor/howcast.py @@ -18,6 +18,9 @@ class HowcastIE(InfoExtractor): 'upload_date': '20100609', 'duration': 56.823, }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['Ooyala'], } diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py index e279280e9..d14d93e3a 100644 --- a/youtube_dl/extractor/teachingchannel.py +++ b/youtube_dl/extractor/teachingchannel.py @@ -19,6 +19,9 @@ class TeachingChannelIE(InfoExtractor): 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', 'duration': 422.255, }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['Ooyala'], } From 1de32771e1d3f89ef2738883b304ce52a5ecf303 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 24 May 2016 20:10:12 +0100 Subject: [PATCH 316/501] [eyedotv] Add new extractor(closes #9582) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/eyedotv.py | 64 ++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 youtube_dl/extractor/eyedotv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f9fed18f6..05561149a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -231,6 +231,7 @@ from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE from .expotv import ExpoTVIE from .extremetube import ExtremeTubeIE +from .eyedotv import EyedoTVIE from .facebook import FacebookIE from .faz import FazIE from .fc2 import FC2IE diff --git a/youtube_dl/extractor/eyedotv.py b/youtube_dl/extractor/eyedotv.py new file mode 100644 index 000000000..2f3035147 --- /dev/null +++ b/youtube_dl/extractor/eyedotv.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + xpath_text, + parse_duration, + ExtractorError, +) + + +class EyedoTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301', + 'md5': 'ba14f17995cdfc20c36ba40e21bf73f7', + 'info_dict': { + 'id': '16301', + 'ext': 'mp4', + 'title': 'Journée du conseil scientifique de l\'Afnic 2015', + 'description': 'md5:4abe07293b2f73efc6e1c37028d58c98', + 'uploader': 'Afnic Live', + 'uploader_id': '8023', + } + } + _ROOT_URL = 'http://live.eyedo.net:1935/' + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id) + + def _add_ns(path): + return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api') + + title = xpath_text(video_data, _add_ns('Titre'), 'title', True) + state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True) + if state_live_code == 'avenir': + raise ExtractorError( + '%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME, + expected=True) + + is_live = state_live_code == 'live' + m3u8_url = None + # http://eyedo.tv/Content/Html5/Scripts/html5view.js + if is_live: + if xpath_text(video_data, 'Cdn') == 'true': + m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id + else: + m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id + else: + m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id) + + return { + 'id': video_id, + 'title': title, + 'formats': self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'), + 'description': xpath_text(video_data, _add_ns('Description')), + 'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))), + 'uploader': xpath_text(video_data, _add_ns('Createur')), + 'uploader_id': xpath_text(video_data, _add_ns('CreateurId')), + 'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')), + 'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')), + } From 4ee0b8afdb384ad3e2d65b6b0159a801ee73d26d Mon Sep 17 00:00:00 2001 From: wankerer <git@wanker.33mail.com> Date: Tue, 24 May 2016 10:18:36 -0700 Subject: [PATCH 317/501] [eporner] fix for the new URL layout Recently eporner slightly changed the URL layout, the ID that used to be digits only are now digits and letters, so youtube-dl falls back to the generic extractor that doesn't work. Fix the matching regex to allow letters in ID. [v2: added a test case] --- youtube_dl/extractor/eporner.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index e006921ec..581276694 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -11,8 +11,8 @@ from ..utils import ( class EpornerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)' + _TESTS = [{ 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', 'md5': '39d486f046212d8e1b911c52ab4691f8', 'info_dict': { @@ -23,8 +23,22 @@ class EpornerIE(InfoExtractor): 'duration': 1838, 'view_count': int, 'age_limit': 18, - } - } + }, + }, + # New (May 2016) URL layout + { + 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', + 'md5': '3469eeaa93b6967a34cdbdbb9d064b33', + 'info_dict': { + 'id': '3YRUtzMcWn0', + 'display_id': 'Star-Wars-XXX-Parody', + 'ext': 'mp4', + 'title': 'Star Wars XXX Parody', + 'duration': 361.0, + 'view_count': int, + 'age_limit': 18, + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 6f748df43ff3476e4dbd29c7464837ea63d78b2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 May 2016 20:51:17 +0600 Subject: [PATCH 318/501] [eporner] Make test only_matching --- youtube_dl/extractor/eporner.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index 581276694..ac5d0fe24 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -24,20 +24,10 @@ class EpornerIE(InfoExtractor): 'view_count': int, 'age_limit': 18, }, - }, - # New (May 2016) URL layout - { + }, { + # New (May 2016) URL layout 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', - 'md5': '3469eeaa93b6967a34cdbdbb9d064b33', - 'info_dict': { - 'id': '3YRUtzMcWn0', - 'display_id': 'Star-Wars-XXX-Parody', - 'ext': 'mp4', - 'title': 'Star Wars XXX Parody', - 'duration': 361.0, - 'view_count': int, - 'age_limit': 18, - }, + 'only_matching': True, }] def _real_extract(self, url): From 0a5685b26fae0940f14cb063a6e4fc6986f9c124 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 21:41:47 +0800 Subject: [PATCH 319/501] [common] Support non-bootstraped streams in f4m manifests Related: #9531 --- youtube_dl/extractor/common.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4bfa610c1..7eb7464ec 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -987,7 +987,7 @@ class InfoExtractor(object): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True): + fatal=True, assume_f4mv2=False): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', @@ -1001,11 +1001,11 @@ class InfoExtractor(object): return self._parse_f4m_formats( manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id, - transform_source=transform_source, fatal=fatal) + transform_source=transform_source, fatal=fatal, assume_f4mv2=assume_f4mv2) def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True): + fatal=True, assume_f4mv2=False): # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') if akamai_pv is not None and ';' in akamai_pv.text: @@ -1029,8 +1029,13 @@ class InfoExtractor(object): 'base URL', default=None) if base_url: base_url = base_url.strip() + + bootstrap_info = xpath_text( + manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'], + 'bootstrap info', default=None) + for i, media_el in enumerate(media_nodes): - if manifest_version == '2.0': + if manifest_version == '2.0' or assume_f4mv2: media_url = media_el.attrib.get('href') or media_el.attrib.get('url') if not media_url: continue @@ -1050,7 +1055,7 @@ class InfoExtractor(object): formats.append({ 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), 'url': manifest_url, - 'ext': 'flv', + 'ext': 'flv' if bootstrap_info else None, 'tbr': tbr, 'width': int_or_none(media_el.attrib.get('width')), 'height': int_or_none(media_el.attrib.get('height')), From 85b0fe7d6442d4ddb056fb5a5d15e51e8a625ae7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 21:43:35 +0800 Subject: [PATCH 320/501] [playwire] Use _extract_f4m_formats Related: #9531 --- youtube_dl/extractor/playwire.py | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py index 6d138ef25..7580e4a85 100644 --- a/youtube_dl/extractor/playwire.py +++ b/youtube_dl/extractor/playwire.py @@ -4,9 +4,8 @@ import re from .common import InfoExtractor from ..utils import ( - xpath_text, + dict_get, float_or_none, - int_or_none, ) @@ -23,6 +22,7 @@ class PlaywireIE(InfoExtractor): 'duration': 145.94, }, }, { + # Multiple resolutions while bitrates missing 'url': 'http://cdn.playwire.com/11625/embed/85228.html', 'only_matching': True, }, { @@ -48,25 +48,10 @@ class PlaywireIE(InfoExtractor): thumbnail = content.get('poster') src = content['media']['f4m'] - f4m = self._download_xml(src, video_id) - base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True) - formats = [] - for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'): - media_url = media.get('url') - if not media_url: - continue - tbr = int_or_none(media.get('bitrate')) - width = int_or_none(media.get('width')) - height = int_or_none(media.get('height')) - f = { - 'url': '%s/%s' % (base_url, media.attrib['url']), - 'tbr': tbr, - 'width': width, - 'height': height, - } - if not (tbr or width or height): - f['quality'] = 1 if '-hd.' in media_url else 0 - formats.append(f) + formats = self._extract_f4m_formats(src, video_id, assume_f4mv2=True) + for a_format in formats: + if not dict_get(a_format, ['tbr', 'width', 'height']): + a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 self._sort_formats(formats) return { From 240b60453e1237473dfd8deff40c9dc54661668c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 21:55:43 +0800 Subject: [PATCH 321/501] [common] Support m3u8 in f4m manifests Related: #9531 --- youtube_dl/extractor/common.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7eb7464ec..b5bea5904 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -987,7 +987,7 @@ class InfoExtractor(object): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True, assume_f4mv2=False): + fatal=True, assume_f4mv2=False, m3u8_id=None): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', @@ -1001,11 +1001,12 @@ class InfoExtractor(object): return self._parse_f4m_formats( manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id, - transform_source=transform_source, fatal=fatal, assume_f4mv2=assume_f4mv2) + transform_source=transform_source, fatal=fatal, assume_f4mv2=assume_f4mv2, + m3u8_id=m3u8_id) def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True, assume_f4mv2=False): + fatal=True, assume_f4mv2=False, m3u8_id=None): # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') if akamai_pv is not None and ';' in akamai_pv.text: @@ -1046,11 +1047,17 @@ class InfoExtractor(object): # since bitrates in parent manifest (this one) and media_url manifest # may differ leading to inability to resolve the format by requested # bitrate in f4m downloader - if determine_ext(manifest_url) == 'f4m': + ext = determine_ext(manifest_url) + if ext == 'f4m': formats.extend(self._extract_f4m_formats( manifest_url, video_id, preference=preference, f4m_id=f4m_id, transform_source=transform_source, fatal=fatal)) continue + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', preference=preference, + m3u8_id=m3u8_id, fatal=False)) + continue tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), From 761052db922a525d6ccaf250f9914841c9d3d66f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 21:57:06 +0800 Subject: [PATCH 322/501] [playwire] Add the test (closed #9531) --- youtube_dl/extractor/playwire.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py index 7580e4a85..2ee5c5aa3 100644 --- a/youtube_dl/extractor/playwire.py +++ b/youtube_dl/extractor/playwire.py @@ -21,6 +21,18 @@ class PlaywireIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.png$', 'duration': 145.94, }, + }, { + # m3u8 in f4m + 'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json', + 'info_dict': { + 'id': '4840492', + 'ext': 'mp4', + 'title': 'ITV EL SHOW FULL', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { # Multiple resolutions while bitrates missing 'url': 'http://cdn.playwire.com/11625/embed/85228.html', @@ -48,7 +60,7 @@ class PlaywireIE(InfoExtractor): thumbnail = content.get('poster') src = content['media']['f4m'] - formats = self._extract_f4m_formats(src, video_id, assume_f4mv2=True) + formats = self._extract_f4m_formats(src, video_id, assume_f4mv2=True, m3u8_id='hls') for a_format in formats: if not dict_get(a_format, ['tbr', 'width', 'height']): a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 From 5950cb1d6d8d27f7a7272895100da9652212fad6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 22:44:00 +0800 Subject: [PATCH 323/501] [utils] Support a new form of date Found in dw.com (#9475) --- youtube_dl/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d65f5e833..316a307e0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1035,6 +1035,7 @@ def unified_strdate(date_str, day_first=True): format_expressions.extend([ '%d-%m-%Y', '%d.%m.%Y', + '%d.%m.%y', '%d/%m/%Y', '%d/%m/%y', '%d/%m/%Y %H:%M:%S', @@ -1049,6 +1050,8 @@ def unified_strdate(date_str, day_first=True): ]) for expression in format_expressions: try: + print(expression) + print(date_str) upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') except ValueError: pass @@ -1910,7 +1913,7 @@ def parse_age_limit(s): def strip_jsonp(code): return re.sub( - r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code) + r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code) def js_to_json(code): From ac88d2316ebef5b00cf5c94d94f01c9f7e17ce51 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 22:48:47 +0800 Subject: [PATCH 324/501] [dw] Support documentaries (closes #9475) --- youtube_dl/extractor/dw.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dw.py b/youtube_dl/extractor/dw.py index ae7c571bd..0f0f0b8d3 100644 --- a/youtube_dl/extractor/dw.py +++ b/youtube_dl/extractor/dw.py @@ -2,13 +2,16 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + unified_strdate, +) from ..compat import compat_urlparse class DWIE(InfoExtractor): IE_NAME = 'dw' - _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+av-(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)' _TESTS = [{ # video 'url': 'http://www.dw.com/en/intelligent-light/av-19112290', @@ -31,6 +34,16 @@ class DWIE(InfoExtractor): 'description': 'md5:bc9ca6e4e063361e21c920c53af12405', 'upload_date': '20160311', } + }, { + 'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798', + 'md5': '56b6214ef463bfb9a3b71aeb886f3cf1', + 'info_dict': { + 'id': '19274438', + 'ext': 'mp4', + 'title': 'Welcome to the 90s – Hip Hop', + 'description': 'Welcome to the 90s - The Golden Decade of Hip Hop', + 'upload_date': '20160521', + }, }] def _real_extract(self, url): @@ -38,6 +51,7 @@ class DWIE(InfoExtractor): webpage = self._download_webpage(url, media_id) hidden_inputs = self._hidden_inputs(webpage) title = hidden_inputs['media_title'] + media_id = hidden_inputs.get('media_id') or media_id if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1': formats = self._extract_smil_formats( @@ -49,13 +63,20 @@ class DWIE(InfoExtractor): else: formats = [{'url': hidden_inputs['file_name']}] + upload_date = hidden_inputs.get('display_date') + if not upload_date: + upload_date = self._html_search_regex( + r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage, + 'upload date', default=None) + upload_date = unified_strdate(upload_date) + return { 'id': media_id, 'title': title, 'description': self._og_search_description(webpage), 'thumbnail': hidden_inputs.get('preview_image'), 'duration': int_or_none(hidden_inputs.get('file_duration')), - 'upload_date': hidden_inputs.get('display_date'), + 'upload_date': upload_date, 'formats': formats, } From 293c2556886c34d11919eb0af6760c52bd6a2632 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 22:54:16 +0800 Subject: [PATCH 325/501] [utils] Remove debugging codes --- youtube_dl/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 316a307e0..cfb2d1bf5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1050,8 +1050,6 @@ def unified_strdate(date_str, day_first=True): ]) for expression in format_expressions: try: - print(expression) - print(date_str) upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') except ValueError: pass From 448bb5f333c6c4c8084e479e1035ff674e4f8fd4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 27 May 2016 00:03:03 +0800 Subject: [PATCH 326/501] [common] Fix non-bootstrapped support in f4m --- youtube_dl/extractor/common.py | 19 +++++++++++++------ youtube_dl/extractor/playwire.py | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b5bea5904..e53b7ad64 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -987,7 +987,7 @@ class InfoExtractor(object): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True, assume_f4mv2=False, m3u8_id=None): + fatal=True, m3u8_id=None): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', @@ -1001,12 +1001,11 @@ class InfoExtractor(object): return self._parse_f4m_formats( manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id, - transform_source=transform_source, fatal=fatal, assume_f4mv2=assume_f4mv2, - m3u8_id=m3u8_id) + transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id) def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True, assume_f4mv2=False, m3u8_id=None): + fatal=True, m3u8_id=None): # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') if akamai_pv is not None and ';' in akamai_pv.text: @@ -1036,8 +1035,16 @@ class InfoExtractor(object): 'bootstrap info', default=None) for i, media_el in enumerate(media_nodes): - if manifest_version == '2.0' or assume_f4mv2: - media_url = media_el.attrib.get('href') or media_el.attrib.get('url') + # If <bootstrapInfo> is present, the specified f4m is a + # stream-level manifest, and only set-level manifests may refer to + # external resources. See section 11.4 and section 4 of F4M spec + if bootstrap_info is None: + media_url = None + # @href is introduced in 2.0, see section 11.6 of F4M spec + if manifest_version == '2.0': + media_url = media_el.attrib.get('href') + if media_url is None: + media_url = media_el.attrib.get('url') if not media_url: continue manifest_url = ( diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py index 2ee5c5aa3..0bc743118 100644 --- a/youtube_dl/extractor/playwire.py +++ b/youtube_dl/extractor/playwire.py @@ -60,7 +60,7 @@ class PlaywireIE(InfoExtractor): thumbnail = content.get('poster') src = content['media']['f4m'] - formats = self._extract_f4m_formats(src, video_id, assume_f4mv2=True, m3u8_id='hls') + formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls') for a_format in formats: if not dict_get(a_format, ['tbr', 'width', 'height']): a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 From 6f8cb2421948fd128b3004fde7eebaa2463f5f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 26 May 2016 22:21:55 +0600 Subject: [PATCH 327/501] [tvp] Expand _VALID_URL and improve naming (Closes #9602) --- youtube_dl/extractor/extractors.py | 5 +++- youtube_dl/extractor/tvp.py | 47 ++++++++++++++++-------------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 05561149a..ddf62139e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -833,7 +833,10 @@ from .tvc import ( ) from .tvigle import TvigleIE from .tvland import TVLandIE -from .tvp import TvpIE, TvpSeriesIE +from .tvp import ( + TVPIE, + TVPSeriesIE, +) from .tvplay import TVPlayIE from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index f57d609d4..a4997cb89 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re @@ -6,20 +6,13 @@ import re from .common import InfoExtractor -class TvpIE(InfoExtractor): - IE_NAME = 'tvp.pl' - _VALID_URL = r'https?://(?:vod|www)\.tvp\.pl/.*/(?P<id>\d+)$' +class TVPIE(InfoExtractor): + IE_NAME = 'tvp' + IE_DESC = 'Telewizja Polska' + _VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)' _TESTS = [{ - 'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035', - 'md5': 'cdd98303338b8a7f7abab5cd14092bf2', - 'info_dict': { - 'id': '4278035', - 'ext': 'wmv', - 'title': 'Ogniem i mieczem, odc. 2', - }, - }, { - 'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536', + 'url': 'http://vod.tvp.pl/194536/i-seria-odc-13', 'md5': '8aa518c15e5cc32dfe8db400dc921fbb', 'info_dict': { 'id': '194536', @@ -36,12 +29,22 @@ class TvpIE(InfoExtractor): }, }, { 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', - 'md5': 'c3b15ed1af288131115ff17a17c19dda', - 'info_dict': { - 'id': '17834272', - 'ext': 'mp4', - 'title': 'Na sygnale, odc. 39', - }, + 'only_matching': True, + }, { + 'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200', + 'only_matching': True, + }, { + 'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa', + 'only_matching': True, + }, { + 'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach', + 'only_matching': True, + }, { + 'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum', + 'only_matching': True, + }, { + 'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji', + 'only_matching': True, }] def _real_extract(self, url): @@ -92,8 +95,8 @@ class TvpIE(InfoExtractor): } -class TvpSeriesIE(InfoExtractor): - IE_NAME = 'tvp.pl:Series' +class TVPSeriesIE(InfoExtractor): + IE_NAME = 'tvp:series' _VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$' _TESTS = [{ @@ -127,7 +130,7 @@ class TvpSeriesIE(InfoExtractor): videos_paths = re.findall( '(?s)class="shortTitle">.*?href="(/[^"]+)', playlist) entries = [ - self.url_result('http://vod.tvp.pl%s' % v_path, ie=TvpIE.ie_key()) + self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key()) for v_path in videos_paths] return { From fac2af3c51c92b7f9abc4f229bc9351e8a301b29 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 27 May 2016 01:41:27 +0800 Subject: [PATCH 328/501] [common] Fix m3u8 extraction in f4m manifests --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e53b7ad64..0029c3694 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1063,7 +1063,7 @@ class InfoExtractor(object): elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( manifest_url, video_id, 'mp4', preference=preference, - m3u8_id=m3u8_id, fatal=False)) + m3u8_id=m3u8_id, fatal=fatal)) continue tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ From 3874e6ea66c738910c6a1065b2d781e04a8143ae Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister <boris-code@gmx.com> Date: Thu, 26 May 2016 16:45:14 +0200 Subject: [PATCH 329/501] [WDR] use single quotes for strings --- youtube_dl/extractor/wdr.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index ec81f1a28..05bfe7deb 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -17,7 +17,7 @@ from ..utils import ( class WDRIE(InfoExtractor): _CURRENT_MAUS_URL = r'https?://www.wdrmaus.de/aktuelle-sendung/(wdr|index).php5' _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' - _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + "|" + _CURRENT_MAUS_URL + _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)' @@ -116,23 +116,23 @@ class WDRIE(InfoExtractor): json_data = self._search_regex(r'\(({.*})\)', js_data, 'json') metadata = self._parse_json(json_data, display_id) - metadata_tracker_data = metadata["trackerData"] - metadata_media_resource = metadata["mediaResource"] + metadata_tracker_data = metadata['trackerData'] + metadata_media_resource = metadata['mediaResource'] formats = [] # check if the metadata contains a direct URL to a file - metadata_media_alt = metadata_media_resource.get("alt") + metadata_media_alt = metadata_media_resource.get('alt') if metadata_media_alt: - for tag_name in ["videoURL", 'audioURL']: + for tag_name in ['videoURL', 'audioURL']: if tag_name in metadata_media_alt: formats.append({ 'url': metadata_media_alt[tag_name] }) # check if there are flash-streams for this video - if "dflt" in metadata_media_resource and "videoURL" in metadata_media_resource["dflt"]: - video_url = metadata_media_resource["dflt"]["videoURL"] + if 'dflt' in metadata_media_resource and 'videoURL' in metadata_media_resource['dflt']: + video_url = metadata_media_resource['dflt']['videoURL'] if video_url.endswith('.f4m'): full_video_url = video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' formats.extend(self._extract_f4m_formats(full_video_url, display_id, f4m_id='hds', fatal=False)) @@ -140,13 +140,13 @@ class WDRIE(InfoExtractor): formats.extend(self._extract_smil_formats(video_url, 'stream', fatal=False)) subtitles = {} - caption_url = metadata_media_resource.get("captionURL") + caption_url = metadata_media_resource.get('captionURL') if caption_url: subtitles['de'] = [{ 'url': caption_url }] - title = metadata_tracker_data.get("trackerClipTitle") + title = metadata_tracker_data.get('trackerClipTitle') is_live = url_type == 'live' if is_live: @@ -163,13 +163,13 @@ class WDRIE(InfoExtractor): self._sort_formats(formats) return { - 'id': metadata_tracker_data.get("trackerClipId", display_id), + 'id': metadata_tracker_data.get('trackerClipId', display_id), 'display_id': display_id, 'title': title, - 'alt_title': metadata_tracker_data.get("trackerClipSubcategory"), + 'alt_title': metadata_tracker_data.get('trackerClipSubcategory'), 'formats': formats, 'upload_date': upload_date, - 'description': self._html_search_meta("Description", webpage), + 'description': self._html_search_meta('Description', webpage), 'is_live': is_live, 'subtitles': subtitles, } From 37f972954da0d0f1f0c5e97da8357c4baf687ee6 Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister <boris-code@gmx.com> Date: Thu, 26 May 2016 16:59:45 +0200 Subject: [PATCH 330/501] [WDR] use _download_json with a strip_jsonp --- youtube_dl/extractor/wdr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 05bfe7deb..73a343c69 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -9,6 +9,7 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + strip_jsonp, unified_strdate, ExtractorError, ) @@ -112,9 +113,8 @@ class WDRIE(InfoExtractor): raise ExtractorError('No downloadable streams found', expected=True) - js_data = self._download_webpage(js_url, 'metadata') - json_data = self._search_regex(r'\(({.*})\)', js_data, 'json') - metadata = self._parse_json(json_data, display_id) + metadata = self._download_json( + js_url, 'metadata', transform_source=strip_jsonp) metadata_tracker_data = metadata['trackerData'] metadata_media_resource = metadata['mediaResource'] From bec2c14f2cf4f06f1b99e04d59779d8d103d726a Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister <boris-code@gmx.com> Date: Thu, 26 May 2016 17:30:38 +0200 Subject: [PATCH 331/501] [WDR] add special handling if alt-url is a m3u8 --- youtube_dl/extractor/wdr.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 73a343c69..fddcbf190 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -9,6 +9,7 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + determine_ext, strip_jsonp, unified_strdate, ExtractorError, @@ -61,7 +62,7 @@ class WDRIE(InfoExtractor): 'url': 'http://www1.wdr.de/mediathek/video/live/index.html', 'info_dict': { 'id': 'mdb-103364', - 'ext': 'flv', + 'ext': 'mp4', 'display_id': 'index', 'title': r're:^WDR Fernsehen im Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'alt_title': 'WDR Fernsehen Live', @@ -69,7 +70,10 @@ class WDRIE(InfoExtractor): 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9', 'is_live': True, 'subtitles': {} - } + }, + 'params': { + 'skip_download': True, # m3u8 download + }, }, { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html', @@ -126,9 +130,16 @@ class WDRIE(InfoExtractor): if metadata_media_alt: for tag_name in ['videoURL', 'audioURL']: if tag_name in metadata_media_alt: - formats.append({ - 'url': metadata_media_alt[tag_name] - }) + alt_url = metadata_media_alt[tag_name] + if determine_ext(alt_url) == 'm3u8': + m3u_fmt = self._extract_m3u8_formats( + alt_url, display_id, 'mp4', 'm3u8_native', + m3u8_id='hls') + formats.extend(m3u_fmt) + else: + formats.append({ + 'url': alt_url + }) # check if there are flash-streams for this video if 'dflt' in metadata_media_resource and 'videoURL' in metadata_media_resource['dflt']: From 33a1ff7113d9dd656b3c56cb404de85646caa559 Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister <boris-code@gmx.com> Date: Thu, 26 May 2016 19:08:12 +0200 Subject: [PATCH 332/501] [WDR] extract jsonp-url by parsing data-extension of mediaLink --- youtube_dl/extractor/wdr.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index fddcbf190..dd107ef8a 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -10,6 +10,7 @@ from ..compat import ( ) from ..utils import ( determine_ext, + js_to_json, strip_jsonp, unified_strdate, ExtractorError, @@ -21,8 +22,6 @@ class WDRIE(InfoExtractor): _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL - _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)' - _TESTS = [ { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html', @@ -102,9 +101,13 @@ class WDRIE(InfoExtractor): display_id = mobj.group('display_id') webpage = self._download_webpage(url, display_id) - js_url = self._search_regex(self._JS_URL_REGEX, webpage, 'js_url', default=None) + # for wdr.de the data-extension is in a tag with the class "mediaLink" + # for wdrmaus its in a link to the page in a multiline "videoLink"-tag + json_metadata = self._html_search_regex( + r'class=(?:"mediaLink\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', + webpage, 'media link', default=None, flags=re.MULTILINE) - if not js_url: + if not json_metadata: entries = [ self.url_result(page_url + href[0], 'WDR') for href in re.findall( @@ -117,8 +120,12 @@ class WDRIE(InfoExtractor): raise ExtractorError('No downloadable streams found', expected=True) + media_link_obj = self._parse_json(json_metadata, display_id, + transform_source=js_to_json) + jsonp_url = media_link_obj['mediaObj']['url'] + metadata = self._download_json( - js_url, 'metadata', transform_source=strip_jsonp) + jsonp_url, 'metadata', transform_source=strip_jsonp) metadata_tracker_data = metadata['trackerData'] metadata_media_resource = metadata['mediaResource'] From 949fc42e009aed5414caad280d0dc551ffcd9c14 Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister <boris-code@gmx.com> Date: Thu, 26 May 2016 19:58:55 +0200 Subject: [PATCH 333/501] [WDR] the other wdrmaus.de pages also changed to the new player --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/wdr.py | 89 +++++------------------------- 2 files changed, 15 insertions(+), 75 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6de3438fc..023598130 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -923,7 +923,6 @@ from .wat import WatIE from .wdr import ( WDRIE, WDRMobileIE, - WDRMausIE, ) from .webofstories import ( WebOfStoriesIE, diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index dd107ef8a..1af1e996d 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -4,10 +4,6 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urlparse, -) from ..utils import ( determine_ext, js_to_json, @@ -18,7 +14,7 @@ from ..utils import ( class WDRIE(InfoExtractor): - _CURRENT_MAUS_URL = r'https?://www.wdrmaus.de/aktuelle-sendung/(wdr|index).php5' + _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL @@ -92,6 +88,20 @@ class WDRIE(InfoExtractor): }, 'skip': 'The id changes from week to week because of the new episode' }, + { + 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5', + 'md5': 'ca365705551e4bd5217490f3b0591290', + 'info_dict': { + 'id': 'mdb-186083', + 'ext': 'flv', + 'upload_date': '20130919', + 'title': 'Sachgeschichte - Achterbahn ', + 'description': '- Die Sendung mit der Maus -', + }, + 'params': { + 'skip_download': True, # the file has different versions :( + }, + }, ] def _real_extract(self, url): @@ -222,72 +232,3 @@ class WDRMobileIE(InfoExtractor): 'User-Agent': 'mobile', }, } - - -class WDRMausIE(InfoExtractor): - _VALID_URL = 'https?://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)((?<!index)\.php5|/(?:$|[?#]))' - IE_DESC = 'Sendung mit der Maus' - _TESTS = [{ - 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5', - 'md5': '178b432d002162a14ccb3e0876741095', - 'info_dict': { - 'id': 'achterbahn', - 'ext': 'mp4', - 'thumbnail': 're:^http://.+\.jpg', - 'upload_date': '20131001', - 'title': '19.09.2013 - Achterbahn', - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - param_code = self._html_search_regex( - r'<a href="\?startVideo=1&([^"]+)"', webpage, 'parameters') - - title_date = self._search_regex( - r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>', - webpage, 'air date') - title_str = self._html_search_regex( - r'<h1>(.*?)</h1>', webpage, 'title') - title = '%s - %s' % (title_date, title_str) - upload_date = unified_strdate( - self._html_search_meta('dc.date', webpage)) - - fields = compat_parse_qs(param_code) - video_url = fields['firstVideo'][0] - thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0]) - - formats = [{ - 'format_id': 'rtmp', - 'url': video_url, - }] - - jscode = self._download_webpage( - 'http://www.wdrmaus.de/codebase/js/extended-medien.min.js', - video_id, fatal=False, - note='Downloading URL translation table', - errnote='Could not download URL translation table') - if jscode: - for m in re.finditer( - r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}", - jscode): - if video_url.startswith(m.group('stream')): - http_url = video_url.replace( - m.group('stream'), m.group('dl')) - formats.append({ - 'format_id': 'http', - 'url': http_url, - }) - break - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - } From 3a686853e1739dfc26548cdc09fe89e693e76a9f Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister <boris-code@gmx.com> Date: Thu, 26 May 2016 20:16:33 +0200 Subject: [PATCH 334/501] [WDR] fixed parsing of playlists --- youtube_dl/extractor/wdr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 1af1e996d..1e729cb7c 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -72,7 +72,7 @@ class WDRIE(InfoExtractor): }, { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html', - 'playlist_mincount': 10, + 'playlist_mincount': 8, 'info_dict': { 'id': 'aktuelle-stunde/aktuelle-stunde-120', }, @@ -121,7 +121,7 @@ class WDRIE(InfoExtractor): entries = [ self.url_result(page_url + href[0], 'WDR') for href in re.findall( - r'<a href="(%s)"' % self._PAGE_REGEX, + r'<a href="(%s)"[^>]+data-extension=' % self._PAGE_REGEX, webpage) ] From 2615fa758422deaaf11049e71f0c183e655c0b76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 27 May 2016 01:46:12 +0600 Subject: [PATCH 335/501] [downloader/f4m] Simply select format when it's the only one --- youtube_dl/downloader/f4m.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 314def4cb..8f88b0241 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -319,7 +319,7 @@ class F4mFD(FragmentFD): doc = compat_etree_fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) for f in self._get_unencrypted_media(doc)] - if requested_bitrate is None: + if requested_bitrate is None or len(formats) == 1: # get the best format formats = sorted(formats, key=lambda f: f[0]) rate, media = formats[-1] From 77b8b4e696dd5ffb1330a2de328eb9c3ecd09a15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 27 May 2016 01:47:44 +0600 Subject: [PATCH 336/501] [extractor/common] Borrow quality metadata from parent set-level manifest for f4m --- youtube_dl/extractor/common.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 0029c3694..57793537b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1035,6 +1035,10 @@ class InfoExtractor(object): 'bootstrap info', default=None) for i, media_el in enumerate(media_nodes): + tbr = int_or_none(media_el.attrib.get('bitrate')) + width = int_or_none(media_el.attrib.get('width')) + height = int_or_none(media_el.attrib.get('height')) + format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])) # If <bootstrapInfo> is present, the specified f4m is a # stream-level manifest, and only set-level manifests may refer to # external resources. See section 11.4 and section 4 of F4M spec @@ -1056,23 +1060,35 @@ class InfoExtractor(object): # bitrate in f4m downloader ext = determine_ext(manifest_url) if ext == 'f4m': - formats.extend(self._extract_f4m_formats( + f4m_formats = self._extract_f4m_formats( manifest_url, video_id, preference=preference, f4m_id=f4m_id, - transform_source=transform_source, fatal=fatal)) + transform_source=transform_source, fatal=fatal) + # Sometimes stream-level manifest contains single media entry that + # does not contain any quality metadata (e.g. http://matchtv.ru/#live-player). + # At the same time parent's media entry in set-level manifest may + # contain it. We will copy it from parent in such cases. + if len(f4m_formats) == 1: + f = f4m_formats[0] + f.update({ + 'tbr': f.get('tbr') or tbr, + 'width': f.get('width') or width, + 'height': f.get('height') or height, + 'format_id': f.get('format_id') if not tbr else format_id, + }) + formats.extend(f4m_formats) continue elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( manifest_url, video_id, 'mp4', preference=preference, m3u8_id=m3u8_id, fatal=fatal)) continue - tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ - 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), + 'format_id': format_id, 'url': manifest_url, 'ext': 'flv' if bootstrap_info else None, 'tbr': tbr, - 'width': int_or_none(media_el.attrib.get('width')), - 'height': int_or_none(media_el.attrib.get('height')), + 'width': width, + 'height': height, 'preference': preference, }) return formats From f36532404dedb08f103083fba931864927de369d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 27 May 2016 22:19:10 +0600 Subject: [PATCH 337/501] [vk] Remove superfluous code --- youtube_dl/extractor/vk.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 041d93629..79c819bc3 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -217,7 +217,6 @@ class VKIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') - info_url = url if video_id: info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id # Some videos (removed?) can only be downloaded with list id specified From 11c70deba792de58c64c82d96ffcfdf295483b84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 27 May 2016 23:34:58 +0600 Subject: [PATCH 338/501] [coub] Add extractor (Closes #9609) --- youtube_dl/extractor/coub.py | 139 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 140 insertions(+) create mode 100644 youtube_dl/extractor/coub.py diff --git a/youtube_dl/extractor/coub.py b/youtube_dl/extractor/coub.py new file mode 100644 index 000000000..c3b09b177 --- /dev/null +++ b/youtube_dl/extractor/coub.py @@ -0,0 +1,139 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + parse_iso8601, + qualities, +) + + +class CoubIE(InfoExtractor): + _VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P<id>[\da-z]+)' + + _TESTS = [{ + 'url': 'http://coub.com/view/5u5n1', + 'info_dict': { + 'id': '5u5n1', + 'ext': 'mp4', + 'title': 'The Matrix Moonwalk', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 4.6, + 'timestamp': 1428527772, + 'upload_date': '20150408', + 'uploader': 'Артём Лоскутников', + 'uploader_id': 'artyom.loskutnikov', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + 'age_limit': 0, + }, + }, { + 'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4', + 'only_matching': True, + }, { + 'url': 'coub:5u5n1', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + coub = self._download_json( + 'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id) + + if coub.get('error'): + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, coub['error']), expected=True) + + title = coub['title'] + + file_versions = coub['file_versions'] + + QUALITIES = ('low', 'med', 'high') + + MOBILE = 'mobile' + IPHONE = 'iphone' + HTML5 = 'html5' + + SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5) + + quality_key = qualities(QUALITIES) + preference_key = qualities(SOURCE_PREFERENCE) + + formats = [] + + for kind, items in file_versions.get(HTML5, {}).items(): + if kind not in ('video', 'audio'): + continue + if not isinstance(items, dict): + continue + for quality, item in items.items(): + if not isinstance(item, dict): + continue + item_url = item.get('url') + if not item_url: + continue + formats.append({ + 'url': item_url, + 'format_id': '%s-%s-%s' % (HTML5, kind, quality), + 'filesize': int_or_none(item.get('size')), + 'vcodec': 'none' if kind == 'audio' else None, + 'quality': quality_key(quality), + 'preference': preference_key(HTML5), + }) + + iphone_url = file_versions.get(IPHONE, {}).get('url') + if iphone_url: + formats.append({ + 'url': iphone_url, + 'format_id': IPHONE, + 'preference': preference_key(IPHONE), + }) + + mobile_url = file_versions.get(MOBILE, {}).get('audio_url') + if mobile_url: + formats.append({ + 'url': mobile_url, + 'format_id': '%s-audio' % MOBILE, + 'preference': preference_key(MOBILE), + }) + + self._sort_formats(formats) + + thumbnail = coub.get('picture') + duration = float_or_none(coub.get('duration')) + timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at')) + uploader = coub.get('channel', {}).get('title') + uploader_id = coub.get('channel', {}).get('permalink') + + view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count')) + like_count = int_or_none(coub.get('likes_count')) + repost_count = int_or_none(coub.get('recoubs_count')) + comment_count = int_or_none(coub.get('comments_count')) + + age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin')) + if age_restricted is not None: + age_limit = 18 if age_restricted is True else 0 + else: + age_limit = None + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'timestamp': timestamp, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'view_count': view_count, + 'like_count': like_count, + 'repost_count': repost_count, + 'comment_count': comment_count, + 'age_limit': age_limit, + 'formats': formats, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ddf62139e..dd4b2b838 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -143,6 +143,7 @@ from .cnn import ( CNNBlogsIE, CNNArticleIE, ) +from .coub import CoubIE from .collegerama import CollegeRamaIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comcarcoff import ComCarCoffIE From de7d76af52c6cb462dfab967d57f5fa7cd17df50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 27 May 2016 23:38:17 +0600 Subject: [PATCH 339/501] [coub] Add another test --- youtube_dl/extractor/coub.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/coub.py b/youtube_dl/extractor/coub.py index c3b09b177..a901b8d22 100644 --- a/youtube_dl/extractor/coub.py +++ b/youtube_dl/extractor/coub.py @@ -38,6 +38,10 @@ class CoubIE(InfoExtractor): }, { 'url': 'coub:5u5n1', 'only_matching': True, + }, { + # longer video id + 'url': 'http://coub.com/view/237d5l5h', + 'only_matching': True, }] def _real_extract(self, url): From 807cf7b07f6ac1299f5578ea1264b43fc30d8301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 28 May 2016 21:18:24 +0600 Subject: [PATCH 340/501] [udemy] Fix authentication for localized layout (Closes #9594) --- youtube_dl/extractor/udemy.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 13e0cd237..89b869559 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -142,7 +142,9 @@ class UdemyIE(InfoExtractor): self._LOGIN_URL, None, 'Downloading login popup') def is_logged(webpage): - return any(p in webpage for p in ['href="https://www.udemy.com/user/logout/', '>Logout<']) + return any(re.search(p, webpage) for p in ( + r'href=["\'](?:https://www\.udemy\.com)?/user/logout/', + r'>Logout<')) # already logged in if is_logged(login_popup): From 6461f2b7ec5fa0114d4bb38ca27c0f72edff8e23 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 May 2016 01:26:00 +0800 Subject: [PATCH 341/501] [bilibili] Fix extraction, improve and cleanup --- youtube_dl/extractor/bilibili.py | 129 ++++++++++++++++++++----------- 1 file changed, 86 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 8baff2041..71a54b4f4 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -1,34 +1,42 @@ # coding: utf-8 from __future__ import unicode_literals +import calendar +import datetime import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_etree_fromstring, + compat_str, + compat_parse_qs, + compat_xml_parse_error, +) from ..utils import ( - int_or_none, - unescapeHTML, ExtractorError, + int_or_none, + float_or_none, xpath_text, ) class BiliBiliIE(InfoExtractor): - _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?' + _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', - 'md5': '2c301e4dab317596e837c3e7633e7d86', + 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', 'info_dict': { 'id': '1554319', 'ext': 'flv', 'title': '【金坷垃】金泡沫', - 'duration': 308313, + 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', + 'duration': 308.067, + 'timestamp': 1398012660, 'upload_date': '20140420', 'thumbnail': 're:^https?://.+\.jpg', - 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', - 'timestamp': 1397983878, 'uploader': '菊子桑', + 'uploader_id': '156160', }, }, { 'url': 'http://www.bilibili.com/video/av1041170/', @@ -36,75 +44,110 @@ class BiliBiliIE(InfoExtractor): 'id': '1041170', 'title': '【BD1080P】刀语【诸神&异域】', 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', - 'uploader': '枫叶逝去', - 'timestamp': 1396501299, }, 'playlist_count': 9, }] + # BiliBili blocks keys from time to time. The current key is extracted from + # the Android client + # TODO: find the sign algorithm used in the flash player + _APP_KEY = '86385cdc024c0f6c' + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - page_num = mobj.group('page_num') or '1' - view_data = self._download_json( - 'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num), - video_id) - if 'error' in view_data: - raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True) + webpage = self._download_webpage(url, video_id) - cid = view_data['cid'] - title = unescapeHTML(view_data['title']) + params = compat_parse_qs(self._search_regex( + [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', + r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], + webpage, 'player parameters')) + cid = params['cid'][0] - doc = self._download_xml( - 'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid, - cid, - 'Downloading page %s/%s' % (page_num, view_data['pages']) - ) + info_xml_str = self._download_webpage( + 'http://interface.bilibili.com/v_cdn_play', + cid, query={'appkey': self._APP_KEY, 'cid': cid}, + note='Downloading video info page') - if xpath_text(doc, './result') == 'error': - raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True) + err_msg = None + durls = None + info_xml = None + try: + info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8')) + except compat_xml_parse_error: + info_json = self._parse_json(info_xml_str, video_id, fatal=False) + err_msg = (info_json or {}).get('error_text') + else: + err_msg = xpath_text(info_xml, './message') + + if info_xml is not None: + durls = info_xml.findall('./durl') + if not durls: + if err_msg: + raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True) + else: + raise ExtractorError('No videos found!') entries = [] - for durl in doc.findall('./durl'): + for durl in durls: size = xpath_text(durl, ['./filesize', './size']) formats = [{ 'url': durl.find('./url').text, 'filesize': int_or_none(size), - 'ext': 'flv', }] - backup_urls = durl.find('./backup_url') - if backup_urls is not None: - for backup_url in backup_urls.findall('./url'): - formats.append({'url': backup_url.text}) - formats.reverse() + for backup_url in durl.findall('./backup_url/url'): + formats.append({ + 'url': backup_url.text, + # backup URLs have lower priorities + 'preference': -2 if 'hd.mp4' in backup_url.text else -3, + }) + + self._sort_formats(formats) entries.append({ 'id': '%s_part%s' % (cid, xpath_text(durl, './order')), - 'title': title, 'duration': int_or_none(xpath_text(durl, './length'), 1000), 'formats': formats, }) + title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title') + description = self._html_search_meta('description', webpage) + datetime_str = self._html_search_regex( + r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False) + if datetime_str: + timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple()) + + # TODO 'view_count' requires deobfuscating Javascript info = { 'id': compat_str(cid), 'title': title, - 'description': view_data.get('description'), - 'thumbnail': view_data.get('pic'), - 'uploader': view_data.get('author'), - 'timestamp': int_or_none(view_data.get('created')), - 'view_count': int_or_none(view_data.get('play')), - 'duration': int_or_none(xpath_text(doc, './timelength')), + 'description': description, + 'timestamp': timestamp, + 'thumbnail': self._html_search_meta('thumbnailUrl', webpage), + 'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000), } + uploader_mobj = re.search( + r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"', + webpage) + if uploader_mobj: + info.update({ + 'uploader': uploader_mobj.group('name'), + 'uploader_id': uploader_mobj.group('id'), + }) + + for entry in entries: + entry.update(info) + if len(entries) == 1: - entries[0].update(info) return entries[0] else: - info.update({ + return { '_type': 'multi_video', 'id': video_id, + 'title': title, + 'description': description, 'entries': entries, - }) - return info + } From 92cf872a4870482e797bcd54316a4b8dc024fcc5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 May 2016 01:58:27 +0800 Subject: [PATCH 342/501] [.gitignore] Ignore mp3 files [ci skip] --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d5f216b5f..d13551274 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ updates_key.pem *.mp4 *.m4a *.m4v +*.mp3 *.part *.swp test/testdata From 2bee7b25f39471c1ac5641b714e003bcf8335d15 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 May 2016 01:59:09 +0800 Subject: [PATCH 343/501] [Makefile] Cleanup m4a files [ci skip] --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d760e4576..3861b23d5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From 2a329110b90102720bf5de00355473586674040f Mon Sep 17 00:00:00 2001 From: venth <artur.krysiak.warszawa@gmail.com> Date: Tue, 19 Apr 2016 11:17:46 +0200 Subject: [PATCH 344/501] ignored intellij related files --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d13551274..a802c75a1 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,9 @@ test/testdata test/local_parameters.json .tox youtube-dl.zsh + +# IntelliJ related files .idea -.idea/* +*.iml + tmp/ From f574103d7ca08a63e0dc58fdd7efde0871b9b395 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 May 2016 09:03:17 +0600 Subject: [PATCH 345/501] [buildserver] Fix buildserver and make python2 compatible --- devscripts/buildserver.py | 62 +++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index 7c2f49f8b..dada6bfc7 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -1,17 +1,42 @@ #!/usr/bin/python3 -from http.server import HTTPServer, BaseHTTPRequestHandler -from socketserver import ThreadingMixIn import argparse import ctypes import functools +import shutil +import subprocess import sys +import tempfile import threading import traceback import os.path +sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__))))) +from youtube_dl.compat import ( + compat_http_server, + compat_str, + compat_urlparse, +) -class BuildHTTPServer(ThreadingMixIn, HTTPServer): +# These are not used outside of buildserver.py thus not in compat.py + +try: + import winreg as compat_winreg +except ImportError: # Python 2 + import _winreg as compat_winreg + +try: + import socketserver as compat_socketserver +except ImportError: # Python 2 + import SocketServer as compat_socketserver + +try: + compat_input = raw_input +except NameError: # Python 3 + compat_input = input + + +class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer): allow_reuse_address = True @@ -216,7 +241,7 @@ def main(args=None): srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) thr = threading.Thread(target=srv.serve_forever) thr.start() - input('Press ENTER to shut down') + compat_input('Press ENTER to shut down') srv.shutdown() thr.join() @@ -231,8 +256,6 @@ def rmtree(path): os.remove(fname) os.rmdir(path) -#============================================================================== - class BuildError(Exception): def __init__(self, output, code=500): @@ -249,15 +272,16 @@ class HTTPError(BuildError): class PythonBuilder(object): def __init__(self, **kwargs): - pythonVersion = kwargs.pop('python', '2.7') + python_version = kwargs.pop('python', '3.4') try: - key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion) + key = compat_winreg.OpenKey( + compat_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % python_version) try: - self.pythonPath, _ = _winreg.QueryValueEx(key, '') + self.pythonPath, _ = compat_winreg.QueryValueEx(key, '') finally: - _winreg.CloseKey(key) + compat_winreg.CloseKey(key) except Exception: - raise BuildError('No such Python version: %s' % pythonVersion) + raise BuildError('No such Python version: %s' % python_version) super(PythonBuilder, self).__init__(**kwargs) @@ -305,8 +329,10 @@ class YoutubeDLBuilder(object): def build(self): try: - subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], - cwd=self.buildPath) + proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath) + proc.wait() + #subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], + # cwd=self.buildPath) except subprocess.CalledProcessError as e: raise BuildError(e.output) @@ -369,12 +395,12 @@ class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, Clea pass -class BuildHTTPRequestHandler(BaseHTTPRequestHandler): +class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler): actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching. def do_GET(self): - path = urlparse.urlparse(self.path) - paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()]) + path = compat_urlparse.urlparse(self.path) + paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()]) action, _, path = path.path.strip('/').partition('/') if path: path = path.split('/') @@ -388,7 +414,7 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler): builder.close() except BuildError as e: self.send_response(e.code) - msg = unicode(e).encode('UTF-8') + msg = compat_str(e).encode('UTF-8') self.send_header('Content-Type', 'text/plain; charset=UTF-8') self.send_header('Content-Length', len(msg)) self.end_headers() @@ -400,7 +426,5 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler): else: self.send_response(500, 'Malformed URL') -#============================================================================== - if __name__ == '__main__': main() From 44c88923696d383bb1a74d9890e7e3126b846625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 May 2016 09:06:10 +0600 Subject: [PATCH 346/501] [devscripts/prepare_manpage] Fix manpage generation on Windows --- devscripts/prepare_manpage.py | 61 +++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 776e6556e..e3f6339b5 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -1,13 +1,46 @@ from __future__ import unicode_literals import io +import optparse import os.path -import sys import re ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) README_FILE = os.path.join(ROOT_DIR, 'README.md') +PREFIX = '''%YOUTUBE-DL(1) + +# NAME + +youtube\-dl \- download videos from youtube.com or other video platforms + +# SYNOPSIS + +**youtube-dl** \[OPTIONS\] URL [URL...] + +''' + + +def main(): + parser = optparse.OptionParser(usage='%prog OUTFILE.md') + options, args = parser.parse_args() + if len(args) != 1: + parser.error('Expected an output filename') + + outfile, = args + + with io.open(README_FILE, encoding='utf-8') as f: + readme = f.read() + + readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) + readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) + readme = PREFIX + readme + + readme = filter_options(readme) + + with io.open(outfile, 'w', encoding='utf-8') as outf: + outf.write(readme) + def filter_options(readme): ret = '' @@ -37,27 +70,5 @@ def filter_options(readme): return ret -with io.open(README_FILE, encoding='utf-8') as f: - readme = f.read() - -PREFIX = '''%YOUTUBE-DL(1) - -# NAME - -youtube\-dl \- download videos from youtube.com or other video platforms - -# SYNOPSIS - -**youtube-dl** \[OPTIONS\] URL [URL...] - -''' -readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) -readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) -readme = PREFIX + readme - -readme = filter_options(readme) - -if sys.version_info < (3, 0): - print(readme.encode('utf-8')) -else: - print(readme) +if __name__ == '__main__': + main() From 27f17c0eabde55cbaab613280f60c01f5ee01025 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 May 2016 09:11:16 +0600 Subject: [PATCH 347/501] [Makefile] Fix youtube-dl.1 target Now it accepts output filename as argument --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3861b23d5..6ee4ba4eb 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,7 @@ README.txt: README.md pandoc -f markdown -t plain README.md -o README.txt youtube-dl.1: README.md - $(PYTHON) devscripts/prepare_manpage.py >youtube-dl.1.temp.md + $(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1 rm -f youtube-dl.1.temp.md From 165e3561e9ec8f8a1a1037e4fdebe880cdbd92fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 May 2016 10:02:00 +0600 Subject: [PATCH 348/501] [devscripts/buildserver] Check Wow6432Node first when searching for python This allows building releases from 64bit OS --- devscripts/buildserver.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index dada6bfc7..2bd12da50 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -273,16 +273,25 @@ class HTTPError(BuildError): class PythonBuilder(object): def __init__(self, **kwargs): python_version = kwargs.pop('python', '3.4') - try: - key = compat_winreg.OpenKey( - compat_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % python_version) + python_path = None + for node in ('Wow6432Node\\', ''): try: - self.pythonPath, _ = compat_winreg.QueryValueEx(key, '') - finally: - compat_winreg.CloseKey(key) - except Exception: + key = compat_winreg.OpenKey( + compat_winreg.HKEY_LOCAL_MACHINE, + r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version)) + try: + python_path, _ = compat_winreg.QueryValueEx(key, '') + finally: + compat_winreg.CloseKey(key) + break + except Exception: + pass + + if not python_path: raise BuildError('No such Python version: %s' % python_version) + self.pythonPath = python_path + super(PythonBuilder, self).__init__(**kwargs) From f3fb420b827ce04dff101b64d81f8658fa2e5c73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 May 2016 11:49:14 +0600 Subject: [PATCH 349/501] [devscripts/release.sh] Check for wheel --- devscripts/release.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 7dd391b38..c34567f4c 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -6,7 +6,7 @@ # * the git config user.signingkey is properly set # You will need -# pip install coverage nose rsa +# pip install coverage nose rsa wheel # TODO # release notes @@ -35,6 +35,7 @@ if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $us if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi +if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi /bin/echo -e "\n### First of all, testing..." make clean From 9ed6d8c6c5b0c7a411d6b97d269a3e786875d66a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 May 2016 13:54:05 +0800 Subject: [PATCH 350/501] [youku] Extract resolution --- youtube_dl/extractor/youku.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 349ce0941..dbccbe228 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -275,6 +275,8 @@ class YoukuIE(InfoExtractor): 'format_id': self.get_format_name(fm), 'ext': self.parse_ext_l(fm), 'filesize': int(seg['size']), + 'width': stream.get('width'), + 'height': stream.get('height'), }) return { From 681b923b5ca04338dfacd4154f627255d6e27d3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 May 2016 23:36:42 +0700 Subject: [PATCH 351/501] [devscripts/release.sh] Allow passing buildserver address as cli option --- devscripts/release.sh | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index c34567f4c..cde4d0a39 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -15,10 +15,28 @@ set -e skip_tests=true -if [ "$1" = '--run-tests' ]; then - skip_tests=false - shift -fi +buildserver='localhost:8142' + +while true +do +case "$1" in + --run-tests) + skip_tests=false + shift + ;; + --buildserver) + buildserver="$2" + shift 2 + ;; + --*) + echo "ERROR: unknown option $1" + exit 1 + ;; + *) + break + ;; +esac +done if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi version="$1" @@ -67,7 +85,7 @@ git push origin "$version" REV=$(git rev-parse HEAD) make youtube-dl youtube-dl.tar.gz read -p "VM running? (y/n) " -n 1 -wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe +wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe mkdir -p "build/$version" mv youtube-dl youtube-dl.exe "build/$version" mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" From 56bd028a0f4b3809403d887012bb93bbc06296a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 May 2016 00:21:18 +0700 Subject: [PATCH 352/501] [devscripts/buildserver] Listen on all interfaces --- devscripts/buildserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index 2bd12da50..f7979c43e 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -216,7 +216,7 @@ def main(args=None): action='store_const', dest='action', const='service', help='Run as a Windows service') parser.add_argument('-b', '--bind', metavar='<host:port>', - action='store', default='localhost:8142', + action='store', default='0.0.0.0:8142', help='Bind to host:port (default %default)') options = parser.parse_args(args=args) From 917a3196f862ff785d15e595e03363c94e9d2e5b Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Mon, 30 May 2016 01:03:40 +0700 Subject: [PATCH 353/501] [README.md] Update c runtime dependency FAQ entry --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ef0e265c8..2b8a0fb85 100644 --- a/README.md +++ b/README.md @@ -784,9 +784,9 @@ means you're using an outdated version of Python. Please update to Python 2.6 or Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`. -### The exe throws a *Runtime error from Visual C++* +### The exe throws an error due to missing `MSVCR100.dll` -To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29). +To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555). ### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files? From 82674236520b55893f6767362a32f74a831362ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 May 2016 01:18:23 +0700 Subject: [PATCH 354/501] release 2016.05.30 --- .github/ISSUE_TEMPLATE.md | 6 +++--- README.md | 15 ++++++++------- docs/supportedsites.md | 16 +++++++++++----- youtube_dl/version.py | 2 +- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2d80d45b6..03dac8244 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.2** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.21.2 +[debug] youtube-dl version 2016.05.30 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/README.md b/README.md index 2b8a0fb85..7e18112de 100644 --- a/README.md +++ b/README.md @@ -73,8 +73,8 @@ which means you can modify it, redistribute it or use it however you like. repairs broken URLs, but emits an error if this is not possible instead of searching. --ignore-config Do not read configuration files. When given - in the global configuration file /etc - /youtube-dl.conf: Do not read the user + in the global configuration file + /etc/youtube-dl.conf: Do not read the user configuration in ~/.config/youtube- dl/config (%APPDATA%/youtube-dl/config.txt on Windows) @@ -256,11 +256,12 @@ which means you can modify it, redistribute it or use it however you like. jar in --cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information - permanently. By default $XDG_CACHE_HOME - /youtube-dl or ~/.cache/youtube-dl . At the - moment, only YouTube player files (for - videos with obfuscated signatures) are - cached, but that may change. + permanently. By default + $XDG_CACHE_HOME/youtube-dl or + ~/.cache/youtube-dl . At the moment, only + YouTube player files (for videos with + obfuscated signatures) are cached, but that + may change. --no-cache-dir Disable filesystem caching --rm-cache-dir Delete all filesystem cache files diff --git a/docs/supportedsites.md b/docs/supportedsites.md index cd6bfa51c..bbc647030 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -43,8 +43,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek**: Saarländischer Rundfunk - **ARD:mediathek** + - **ARD:mediathek**: Saarländischer Rundfunk - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -136,6 +136,7 @@ - **ComedyCentral** - **ComedyCentralShows**: The Daily Show / The Colbert Report - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED + - **Coub** - **Cracked** - **Crackle** - **Criterion** @@ -205,6 +206,7 @@ - **exfm**: ex.fm - **ExpoTV** - **ExtremeTube** + - **EyedoTV** - **facebook** - **faz.net** - **fc2** @@ -326,8 +328,8 @@ - **LePlaylist** - **LetvCloud**: 乐视云 - **Libsyn** + - **life**: Life.ru - **life:embed** - - **lifenews**: LIFE | NEWS - **limelight** - **limelight:channel** - **limelight:channel_list** @@ -512,6 +514,8 @@ - **R7** - **radio.de** - **radiobremen** + - **radiocanada** + - **RadioCanadaAudioVideo** - **radiofrance** - **RadioJavan** - **Rai** @@ -521,6 +525,7 @@ - **RedTube** - **RegioTV** - **Restudy** + - **Reuters** - **ReverbNation** - **Revision3** - **RICE** @@ -682,8 +687,8 @@ - **TVCArticle** - **tvigle**: Интернет-телевидение Tvigle.ru - **tvland.com** - - **tvp.pl** - - **tvp.pl:Series** + - **tvp**: Telewizja Polska + - **tvp:series** - **TVPlay**: TV3Play and related services - **Tweakers** - **twitch:chapter** @@ -766,7 +771,8 @@ - **VuClip** - **vulture.com** - **Walla** - - **WashingtonPost** + - **washingtonpost** + - **washingtonpost:article** - **wat.tv** - **WatchIndianPorn**: Watch Indian Porn - **WDR** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 522a56669..0c38ec4cf 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.21.2' +__version__ = '2016.05.30' From 86a52881c6211dd58503480dc62b4f2404cc0f6b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 29 May 2016 21:29:38 +0200 Subject: [PATCH 355/501] [travis] unsubscribe @phihag --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 998995845..136c339f0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,6 @@ script: nosetests test --verbose notifications: email: - filippo.valsorda@gmail.com - - phihag@phihag.de - yasoob.khld@gmail.com # irc: # channels: From f657b1a5f29e9f5eac7ca41b6e98c38cb3128183 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 May 2016 03:03:06 +0700 Subject: [PATCH 356/501] release 2016.05.30.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 03dac8244..1099b0c92 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.30 +[debug] youtube-dl version 2016.05.30.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0c38ec4cf..5f8542f8e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.30' +__version__ = '2016.05.30.1' From abbb2938fa08733e3a08f6d1917aa7687633b971 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 May 2016 03:12:12 +0700 Subject: [PATCH 357/501] release 2016.05.30.2 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1099b0c92..e3de48eb5 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30.2** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.30.1 +[debug] youtube-dl version 2016.05.30.2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5f8542f8e..ad6fb26c6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.30.1' +__version__ = '2016.05.30.2' From 197a5da1d01179f6a2d60e3c2017b0070e5abc8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 May 2016 03:26:26 +0700 Subject: [PATCH 358/501] [yandexmusic] Improve captcha detection --- youtube_dl/extractor/yandexmusic.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 0f78466e6..b37d0eab6 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -20,18 +20,24 @@ class YandexMusicBaseIE(InfoExtractor): error = response.get('error') if error: raise ExtractorError(error, expected=True) + if response.get('type') == 'captcha' or 'captcha' in response: + YandexMusicBaseIE._raise_captcha() + + @staticmethod + def _raise_captcha(): + raise ExtractorError( + 'YandexMusic has considered youtube-dl requests automated and ' + 'asks you to solve a CAPTCHA. You can either wait for some ' + 'time until unblocked and optionally use --sleep-interval ' + 'in future or alternatively you can go to https://music.yandex.ru/ ' + 'solve CAPTCHA, then export cookies and pass cookie file to ' + 'youtube-dl with --cookies', + expected=True) def _download_webpage(self, *args, **kwargs): webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs) if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage: - raise ExtractorError( - 'YandexMusic has considered youtube-dl requests automated and ' - 'asks you to solve a CAPTCHA. You can either wait for some ' - 'time until unblocked and optionally use --sleep-interval ' - 'in future or alternatively you can go to https://music.yandex.ru/ ' - 'solve CAPTCHA, then export cookies and pass cookie file to ' - 'youtube-dl with --cookies', - expected=True) + self._raise_captcha() return webpage def _download_json(self, *args, **kwargs): From 8ec2b2c41c7f3952ad9097085993d1f24f6b6776 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 May 2016 21:48:35 +0700 Subject: [PATCH 359/501] [options] Add --limit-rate alias for rate limiting option Closes #9644 In order to follow regular --verb-noun pattern and better conformity with wget and curl --- youtube_dl/options.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 38efd292d..14051b714 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -395,8 +395,8 @@ def parseOpts(overrideArguments=None): downloader = optparse.OptionGroup(parser, 'Download Options') downloader.add_option( - '-r', '--rate-limit', - dest='ratelimit', metavar='LIMIT', + '-r', '--limit-rate', '--rate-limit', + dest='ratelimit', metavar='RATE', help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)') downloader.add_option( '-R', '--retries', From e7d85c4ef7d2c74058d41ded1e2a6d6aa527dc9a Mon Sep 17 00:00:00 2001 From: Peter Rowlands <peter@pmrowla.com> Date: Tue, 31 May 2016 17:28:49 +0900 Subject: [PATCH 360/501] use /track/video/file to determine if video exists --- youtube_dl/extractor/afreecatv.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 0fcbea0d1..518c61f67 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -11,6 +11,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + xpath_element, xpath_text, ) @@ -84,9 +85,10 @@ class AfreecaTVIE(InfoExtractor): path='/api/video/get_video_info.php')) video_xml = self._download_xml(info_url, video_id) - if xpath_text(video_xml, './track/flag', default='FAIL') != 'SUCCEED': + if xpath_element(video_xml, './track/video/file') is None: raise ExtractorError('Specified AfreecaTV video does not exist', expected=True) + title = xpath_text(video_xml, './track/title', 'title') uploader = xpath_text(video_xml, './track/nickname', 'uploader') uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id') From 877032314fdf2d9b391325f96e3bc53a60ea067c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Jun 2016 18:37:34 +0800 Subject: [PATCH 361/501] [generic] Improve Kaltura detection Closes #4004 --- youtube_dl/extractor/generic.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 303e112d2..5cb188b20 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -918,6 +918,19 @@ class GenericIE(InfoExtractor): 'uploader_id': 'echojecka', }, }, + # Kaltura embed with single quotes + { + 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY', + 'info_dict': { + 'id': '0_izeg5utt', + 'ext': 'mp4', + 'title': '35871', + 'timestamp': 1355743100, + 'upload_date': '20121217', + 'uploader_id': 'batchUser', + }, + 'add_ie': ['Kaltura'], + }, # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', @@ -1903,7 +1916,7 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'Zapiks') # Look for Kaltura embeds - mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or + mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage)) if mobj is not None: return self.url_result(smuggle_url( From 28bab13348f84ac75e4d1362ce5828429bb7993f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Jun 2016 19:18:01 +0800 Subject: [PATCH 362/501] [generic,viewlift] Move a test case to the specialized extractor --- youtube_dl/extractor/generic.py | 12 ------------ youtube_dl/extractor/viewlift.py | 4 ++++ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5cb188b20..e478f86a8 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -881,18 +881,6 @@ class GenericIE(InfoExtractor): 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', } }, - # Kaltura embed - { - 'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15', - 'info_dict': { - 'id': '1_eergr3h1', - 'ext': 'mp4', - 'upload_date': '20150226', - 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com', - 'timestamp': int, - 'title': 'John Carlson Postgame 2/25/15', - }, - }, # Kaltura embed (different embed code) { 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014', diff --git a/youtube_dl/extractor/viewlift.py b/youtube_dl/extractor/viewlift.py index dd4a13a4a..19500eba8 100644 --- a/youtube_dl/extractor/viewlift.py +++ b/youtube_dl/extractor/viewlift.py @@ -141,6 +141,10 @@ class ViewLiftIE(ViewLiftBaseIE): }, { 'url': 'http://www.kesari.tv/news/video/1461919076414', 'only_matching': True, + }, { + # Was once Kaltura embed + 'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15', + 'only_matching': True, }] def _real_extract(self, url): From 0ff3749bfe6d149dd7250ea8df83387d3af40e0f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Jun 2016 19:23:09 +0800 Subject: [PATCH 363/501] [udn] Fix m3u8 and f4m extraction as well as improve --- youtube_dl/extractor/udn.py | 62 ++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py index ee35b7227..57dd73aef 100644 --- a/youtube_dl/extractor/udn.py +++ b/youtube_dl/extractor/udn.py @@ -2,10 +2,13 @@ from __future__ import unicode_literals import json +import re + from .common import InfoExtractor from ..utils import ( + determine_ext, + int_or_none, js_to_json, - ExtractorError, ) from ..compat import compat_urlparse @@ -16,13 +19,16 @@ class UDNEmbedIE(InfoExtractor): _VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL _TESTS = [{ 'url': 'http://video.udn.com/embed/news/300040', - 'md5': 'de06b4c90b042c128395a88f0384817e', 'info_dict': { 'id': '300040', 'ext': 'mp4', 'title': '生物老師男變女 全校挺"做自己"', 'thumbnail': 're:^https?://.*\.jpg$', - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'https://video.udn.com/embed/news/300040', 'only_matching': True, @@ -38,39 +44,53 @@ class UDNEmbedIE(InfoExtractor): page = self._download_webpage(url, video_id) options = json.loads(js_to_json(self._html_search_regex( - r'var options\s*=\s*([^;]+);', page, 'video urls dictionary'))) + r'var\s+options\s*=\s*([^;]+);', page, 'video urls dictionary'))) video_urls = options['video'] if video_urls.get('youtube'): return self.url_result(video_urls.get('youtube'), 'Youtube') - try: - del video_urls['youtube'] - except KeyError: - pass + formats = [] + for video_type, api_url in video_urls.items(): + if not api_url: + continue - formats = [{ - 'url': self._download_webpage( + video_url = self._download_webpage( compat_urlparse.urljoin(url, api_url), video_id, - 'retrieve url for %s video' % video_type), - 'format_id': video_type, - 'preference': 0 if video_type == 'mp4' else -1, - } for video_type, api_url in video_urls.items() if api_url] + note='retrieve url for %s video' % video_type) - if not formats: - raise ExtractorError('No videos found', expected=True) + ext = determine_ext(video_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, ext='mp4', m3u8_id='hls')) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id='hds')) + else: + mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+).mp4', video_url) + a_format = { + 'url': video_url, + # video_type may be 'mp4', which confuses YoutubeDL + 'format_id': 'http-' + video_type, + } + if mobj: + a_format.update({ + 'height': int_or_none(mobj.group('height')), + 'tbr': int_or_none(mobj.group('tbr')), + }) + formats.append(a_format) self._sort_formats(formats) - thumbnail = None - - if options.get('gallery') and len(options['gallery']): - thumbnail = options['gallery'][0].get('original') + thumbnails = [{ + 'url': img_url, + 'id': img_type, + } for img_type, img_url in options.get('gallery', [{}])[0].items() if img_url] return { 'id': video_id, 'formats': formats, 'title': options['title'], - 'thumbnail': thumbnail + 'thumbnails': thumbnails, } From 811586ebcfb04878ad3347706bfee020d0e3652b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Jun 2016 19:23:44 +0800 Subject: [PATCH 364/501] [generic] Update the UDNEmbed test case --- youtube_dl/extractor/generic.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e478f86a8..b4138381d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1033,14 +1033,18 @@ class GenericIE(InfoExtractor): }, # UDN embed { - 'url': 'http://www.udn.com/news/story/7314/822787', + 'url': 'https://video.udn.com/news/300346', 'md5': 'fd2060e988c326991037b9aff9df21a6', 'info_dict': { 'id': '300346', 'ext': 'mp4', 'title': '中一中男師變性 全校師生力挺', 'thumbnail': 're:^https?://.*\.jpg$', - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, # Ooyala embed { From dde1ce7c061cae123264eb555f1da98956923301 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Jun 2016 20:04:43 +0800 Subject: [PATCH 365/501] [tf1] Fix a regular expression (closes #9656) This is a Python bug fixed in 2.7.6 [1] [1] https://github.com/rg3/youtube-dl/issues/9656#issuecomment-222968594 --- youtube_dl/extractor/tf1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index aff5121b9..6c848dc6f 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -48,6 +48,6 @@ class TF1IE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) wat_id = self._html_search_regex( - r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:.*?)?\1', + r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8}).*?\1', webpage, 'wat id', group='id') return self.url_result('wat:%s' % wat_id, 'Wat') From 6a1df4fb5fb76710457b59195e8b530ba269f09f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Jun 2016 21:23:58 +0700 Subject: [PATCH 366/501] [spankwire] Add support for new URL format (Closes #9657) --- youtube_dl/extractor/spankwire.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 692fd78e8..92a7120a3 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -96,20 +96,18 @@ class SpankwireIE(InfoExtractor): formats = [] for height, video_url in zip(heights, video_urls): path = compat_urllib_parse_urlparse(video_url).path - _, quality = path.split('/')[4].split('_')[:2] - f = { - 'url': video_url, - 'height': height, - } - tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None) - if tbr: - f.update({ - 'tbr': int(tbr), - 'format_id': '%dp' % height, - }) + m = re.search(r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', path) + if m: + tbr = int(m.group('tbr')) + height = int(m.group('height')) else: - f['format_id'] = quality - formats.append(f) + tbr = None + formats.append({ + 'url': video_url, + 'format_id': '%dp' % height, + 'height': height, + 'tbr': tbr, + }) self._sort_formats(formats) age_limit = self._rta_search(webpage) From 6e6b9f600f2f447604f6108fb6486b73cc25def1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Jun 2016 01:10:23 +0700 Subject: [PATCH 367/501] [arte] Add support for playlists and rework tests (Closes #9632) --- youtube_dl/extractor/arte.py | 173 ++++++++++++++++++----------- youtube_dl/extractor/extractors.py | 1 + 2 files changed, 110 insertions(+), 64 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index e37fdae13..f40532929 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -61,10 +61,7 @@ class ArteTvIE(InfoExtractor): } -class ArteTVPlus7IE(InfoExtractor): - IE_NAME = 'arte.tv:+7' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)' - +class ArteTVBaseIE(InfoExtractor): @classmethod def _extract_url_info(cls, url): mobj = re.match(cls._VALID_URL, url) @@ -78,60 +75,6 @@ class ArteTVPlus7IE(InfoExtractor): video_id = mobj.group('id') return video_id, lang - def _real_extract(self, url): - video_id, lang = self._extract_url_info(url) - webpage = self._download_webpage(url, video_id) - return self._extract_from_webpage(webpage, video_id, lang) - - def _extract_from_webpage(self, webpage, video_id, lang): - patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']') - ids = (video_id, '') - # some pages contain multiple videos (like - # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D), - # so we first try to look for json URLs that contain the video id from - # the 'vid' parameter. - patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates] - json_url = self._html_search_regex( - patterns, webpage, 'json vp url', default=None) - if not json_url: - def find_iframe_url(webpage, default=NO_DEFAULT): - return self._html_search_regex( - r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1', - webpage, 'iframe url', group='url', default=default) - - iframe_url = find_iframe_url(webpage, None) - if not iframe_url: - embed_url = self._html_search_regex( - r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None) - if embed_url: - player = self._download_json( - embed_url, video_id, 'Downloading player page') - iframe_url = find_iframe_url(player['html']) - # en and es URLs produce react-based pages with different layout (e.g. - # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world) - if not iframe_url: - program = self._search_regex( - r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n', - webpage, 'program', default=None) - if program: - embed_html = self._parse_json(program, video_id) - if embed_html: - iframe_url = find_iframe_url(embed_html['embed_html']) - if iframe_url: - json_url = compat_parse_qs( - compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] - if json_url: - title = self._search_regex( - r'<h3[^>]+title=(["\'])(?P<title>.+?)\1', - webpage, 'title', default=None, group='title') - return self._extract_from_json_url(json_url, video_id, lang, title=title) - # Different kind of embed URL (e.g. - # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) - embed_url = self._search_regex( - r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', - webpage, 'embed url', group='url') - return self.url_result(embed_url) - def _extract_from_json_url(self, json_url, video_id, lang, title=None): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] @@ -235,6 +178,74 @@ class ArteTVPlus7IE(InfoExtractor): return info_dict +class ArteTVPlus7IE(ArteTVBaseIE): + IE_NAME = 'arte.tv:+7' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)' + + _TESTS = [{ + 'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url) + + def _real_extract(self, url): + video_id, lang = self._extract_url_info(url) + webpage = self._download_webpage(url, video_id) + return self._extract_from_webpage(webpage, video_id, lang) + + def _extract_from_webpage(self, webpage, video_id, lang): + patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']') + ids = (video_id, '') + # some pages contain multiple videos (like + # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D), + # so we first try to look for json URLs that contain the video id from + # the 'vid' parameter. + patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates] + json_url = self._html_search_regex( + patterns, webpage, 'json vp url', default=None) + if not json_url: + def find_iframe_url(webpage, default=NO_DEFAULT): + return self._html_search_regex( + r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1', + webpage, 'iframe url', group='url', default=default) + + iframe_url = find_iframe_url(webpage, None) + if not iframe_url: + embed_url = self._html_search_regex( + r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None) + if embed_url: + player = self._download_json( + embed_url, video_id, 'Downloading player page') + iframe_url = find_iframe_url(player['html']) + # en and es URLs produce react-based pages with different layout (e.g. + # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world) + if not iframe_url: + program = self._search_regex( + r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n', + webpage, 'program', default=None) + if program: + embed_html = self._parse_json(program, video_id) + if embed_html: + iframe_url = find_iframe_url(embed_html['embed_html']) + if iframe_url: + json_url = compat_parse_qs( + compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] + if json_url: + title = self._search_regex( + r'<h3[^>]+title=(["\'])(?P<title>.+?)\1', + webpage, 'title', default=None, group='title') + return self._extract_from_json_url(json_url, video_id, lang, title=title) + # Different kind of embed URL (e.g. + # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) + embed_url = self._search_regex( + r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', + webpage, 'embed url', group='url') + return self.url_result(embed_url) + + # It also uses the arte_vp_url url from the webpage to extract the information class ArteTVCreativeIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:creative' @@ -267,7 +278,7 @@ class ArteTVInfoIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:info' _VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere', 'info_dict': { 'id': '067528-000-A', @@ -275,7 +286,7 @@ class ArteTVInfoIE(ArteTVPlus7IE): 'title': 'Service civique, un cache misère ?', 'upload_date': '20160403', }, - } + }] class ArteTVFutureIE(ArteTVPlus7IE): @@ -300,6 +311,8 @@ class ArteTVDDCIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:ddc' _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)' + _TESTS = [] + def _real_extract(self, url): video_id, lang = self._extract_url_info(url) if lang == 'folge': @@ -318,7 +331,7 @@ class ArteTVConcertIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:concert' _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde', 'md5': '9ea035b7bd69696b67aa2ccaaa218161', 'info_dict': { @@ -328,14 +341,14 @@ class ArteTVConcertIE(ArteTVPlus7IE): 'upload_date': '20140128', 'description': 'md5:486eb08f991552ade77439fe6d82c305', }, - } + }] class ArteTVCinemaIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:cinema' _VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)' - _TEST = { + _TESTS = [{ 'url': 'http://cinema.arte.tv/de/node/38291', 'md5': '6b275511a5107c60bacbeeda368c3aa1', 'info_dict': { @@ -345,7 +358,7 @@ class ArteTVCinemaIE(ArteTVPlus7IE): 'upload_date': '20160122', 'description': 'md5:7f749bbb77d800ef2be11d54529b96bc', }, - } + }] class ArteTVMagazineIE(ArteTVPlus7IE): @@ -390,9 +403,41 @@ class ArteTVEmbedIE(ArteTVPlus7IE): ) ''' + _TESTS = [] + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') lang = mobj.group('lang') json_url = mobj.group('json_url') return self._extract_from_json_url(json_url, video_id, lang) + + +class ArteTVPlaylistIE(ArteTVBaseIE): + IE_NAME = 'arte.tv:playlist' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)' + + _TESTS = [{ + 'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV', + 'info_dict': { + 'id': 'PL-013263', + 'title': 'Areva & Uramin', + }, + 'playlist_mincount': 6, + }, { + 'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id, lang = self._extract_url_info(url) + collection = self._download_json( + 'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos' + % (lang, playlist_id), playlist_id) + title = collection.get('title') + description = collection.get('shortDescription') or collection.get('teaserText') + entries = [ + self._extract_from_json_url( + video['jsonUrl'], video.get('programId') or playlist_id, lang) + for video in collection['videos'] if video.get('jsonUrl')] + return self.playlist_result(entries, playlist_id, title, description) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dd4b2b838..dc21cfed9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -56,6 +56,7 @@ from .arte import ( ArteTVDDCIE, ArteTVMagazineIE, ArteTVEmbedIE, + ArteTVPlaylistIE, ) from .atresplayer import AtresPlayerIE from .atttechchannel import ATTTechChannelIE From 5e3856a2c5af0a622b74921c0d60acde53a664ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Jun 2016 01:19:57 +0700 Subject: [PATCH 368/501] release 2016.06.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- README.md | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e3de48eb5..ae98e0626 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30.2** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.30.2 +[debug] youtube-dl version 2016.06.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/README.md b/README.md index 7e18112de..253d51bc8 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,7 @@ which means you can modify it, redistribute it or use it however you like. (experimental) ## Download Options: - -r, --rate-limit LIMIT Maximum download rate in bytes per second + -r, --limit-rate RATE Maximum download rate in bytes per second (e.g. 50K or 4.2M) -R, --retries RETRIES Number of retries (default is 10), or "infinite". diff --git a/docs/supportedsites.md b/docs/supportedsites.md index bbc647030..dcbc632a1 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -55,6 +55,7 @@ - **arte.tv:future** - **arte.tv:info** - **arte.tv:magazine** + - **arte.tv:playlist** - **AtresPlayer** - **ATTTechChannel** - **AudiMedia** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ad6fb26c6..fba427dde 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.30.2' +__version__ = '2016.06.02' From f4e4aa9b6b7057af400ad404efcca51669012b73 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 1 Jun 2016 21:18:57 +0100 Subject: [PATCH 369/501] [revision3:embed] Add new extractor --- youtube_dl/extractor/revision3.py | 132 ++++++++++++++---------------- 1 file changed, 63 insertions(+), 69 deletions(-) diff --git a/youtube_dl/extractor/revision3.py b/youtube_dl/extractor/revision3.py index 99979ebe1..833d8a2f0 100644 --- a/youtube_dl/extractor/revision3.py +++ b/youtube_dl/extractor/revision3.py @@ -13,8 +13,64 @@ from ..utils import ( ) +class Revision3EmbedIE(InfoExtractor): + IE_NAME = 'revision3:embed' + _VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)' + _TEST = { + 'url': 'http://api.seekernetwork.com/player/embed?videoId=67558', + 'md5': '83bcd157cab89ad7318dd7b8c9cf1306', + 'info_dict': { + 'id': '67558', + 'ext': 'mp4', + 'title': 'The Pros & Cons Of Zoos', + 'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', + 'uploader_id': 'dnews', + 'uploader': 'DNews', + } + } + _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('playlist_id') + playlist_type = mobj.group('playlist_type') or 'video_id' + video_data = self._download_json( + 'http://revision3.com/api/getPlaylist.json', playlist_id, query={ + 'api_key': self._API_KEY, + 'codecs': 'h264,vp8,theora', + playlist_type: playlist_id, + })['items'][0] + + formats = [] + for vcodec, media in video_data['media'].items(): + for quality_id, quality in media.items(): + if quality_id == 'hls': + formats.extend(self._extract_m3u8_formats( + quality['url'], playlist_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': quality['url'], + 'format_id': '%s-%s' % (vcodec, quality_id), + 'tbr': int_or_none(quality.get('bitrate')), + 'vcodec': vcodec, + }) + self._sort_formats(formats) + + return { + 'id': playlist_id, + 'title': unescapeHTML(video_data['title']), + 'description': unescapeHTML(video_data.get('summary')), + 'uploader': video_data.get('show', {}).get('name'), + 'uploader_id': video_data.get('show', {}).get('slug'), + 'duration': int_or_none(video_data.get('duration')), + 'formats': formats, + } + + class Revision3IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|testtube|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)' + IE_NAME = 'revision' + _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)' _TESTS = [{ 'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016', 'md5': 'd94a72d85d0a829766de4deb8daaf7df', @@ -32,52 +88,14 @@ class Revision3IE(InfoExtractor): } }, { # Show - 'url': 'http://testtube.com/brainstuff', - 'info_dict': { - 'id': '251', - 'title': 'BrainStuff', - 'description': 'Whether the topic is popcorn or particle physics, you can count on the HowStuffWorks team to explore-and explain-the everyday science in the world around us on BrainStuff.', - }, - 'playlist_mincount': 93, - }, { - 'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial', - 'info_dict': { - 'id': '58227', - 'display_id': 'dnews/5-weird-ways-plants-can-eat-animals', - 'duration': 275, - 'ext': 'webm', - 'title': '5 Weird Ways Plants Can Eat Animals', - 'description': 'Why have some plants evolved to eat meat?', - 'upload_date': '20150120', - 'timestamp': 1421763300, - 'uploader': 'DNews', - 'uploader_id': 'dnews', - }, - }, { - 'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min', - 'info_dict': { - 'id': '71618', - 'ext': 'mp4', - 'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min', - 'title': 'The Israel-Palestine Conflict Explained in Ten Minutes', - 'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start', - 'uploader': 'Editors\' Picks', - 'uploader_id': 'tt-editors-picks', - 'timestamp': 1453309200, - 'upload_date': '20160120', - }, - 'add_ie': ['Youtube'], + 'url': 'http://revision3.com/variant', + 'only_matching': True, }, { # Tag - 'url': 'http://testtube.com/tech-news', - 'info_dict': { - 'id': '21018', - 'title': 'tech news', - }, - 'playlist_mincount': 9, + 'url': 'http://revision3.com/vr', + 'only_matching': True, }] _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s' - _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62' def _real_extract(self, url): domain, display_id = re.match(self._VALID_URL, url).groups() @@ -119,33 +137,9 @@ class Revision3IE(InfoExtractor): }) return info - video_data = self._download_json( - 'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id), - video_id)['items'][0] - - formats = [] - for vcodec, media in video_data['media'].items(): - for quality_id, quality in media.items(): - if quality_id == 'hls': - formats.extend(self._extract_m3u8_formats( - quality['url'], video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'url': quality['url'], - 'format_id': '%s-%s' % (vcodec, quality_id), - 'tbr': int_or_none(quality.get('bitrate')), - 'vcodec': vcodec, - }) - self._sort_formats(formats) - info.update({ - 'title': unescapeHTML(video_data['title']), - 'description': unescapeHTML(video_data.get('summary')), - 'uploader': video_data.get('show', {}).get('name'), - 'uploader_id': video_data.get('show', {}).get('slug'), - 'duration': int_or_none(video_data.get('duration')), - 'formats': formats, + '_type': 'url_transparent', + 'url': 'revision3:%s' % video_id, }) return info else: From 4a684895c0227bf18896eae36e693d7046aacaf4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 1 Jun 2016 21:20:02 +0100 Subject: [PATCH 370/501] [seeker] Add new extractor(closes #9619) --- youtube_dl/extractor/extractors.py | 6 +++- youtube_dl/extractor/seeker.py | 57 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/seeker.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dc21cfed9..9dd55bd70 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -639,7 +639,10 @@ from .regiotv import RegioTVIE from .restudy import RestudyIE from .reuters import ReutersIE from .reverbnation import ReverbNationIE -from .revision3 import Revision3IE +from .revision3 import ( + Revision3EmbedIE, + Revision3IE, +) from .rice import RICEIE from .ringtv import RingTVIE from .ro220 import Ro220IE @@ -678,6 +681,7 @@ from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .screenjunkies import ScreenJunkiesIE from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE +from .seeker import SeekerIE from .senateisvp import SenateISVPIE from .sendtonews import SendtoNewsIE from .servingsys import ServingSysIE diff --git a/youtube_dl/extractor/seeker.py b/youtube_dl/extractor/seeker.py new file mode 100644 index 000000000..3b9c65e7e --- /dev/null +++ b/youtube_dl/extractor/seeker.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class SeekerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html' + _TESTS = [{ + # player.loadRevision3Item + 'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html', + 'md5': '30c1dc4030cc715cf05b423d0947ac18', + 'info_dict': { + 'id': '76243', + 'ext': 'webm', + 'title': 'Should Trump Be Required To Release His Tax Returns?', + 'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?', + 'uploader': 'Seeker Daily', + 'uploader_id': 'seekerdaily', + } + }, { + 'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html', + 'playlist': [ + { + 'md5': '83bcd157cab89ad7318dd7b8c9cf1306', + 'info_dict': { + 'id': '67558', + 'ext': 'mp4', + 'title': 'The Pros & Cons Of Zoos', + 'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', + 'uploader': 'DNews', + 'uploader_id': 'dnews', + }, + } + ], + 'info_dict': { + 'id': '1834116536', + 'title': 'After Gorilla Killing, Changes Ahead for Zoos', + 'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.', + }, + }] + + def _real_extract(self, url): + display_id, article_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, display_id) + mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage) + if mobj: + playlist_type, playlist_id = mobj.groups() + return self.url_result( + 'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id) + else: + entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall( + r'<iframe[^>]+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)] + return self.playlist_result( + entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage)) From 0ea590076fd3b714b6a3345a28ac61d8be5e2afd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 17:22:40 +0800 Subject: [PATCH 371/501] [utils] Always decode Location header escape_url is broken for bytes-like objects --- youtube_dl/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index cfb2d1bf5..6ab1747b3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -861,6 +861,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 if sys.version_info >= (3, 0): location = location.encode('iso-8859-1').decode('utf-8') + else: + location = location.decode('utf-8') location_escaped = escape_url(location) if location != location_escaped: del resp.headers['Location'] From 8c32e5dc32f401ae22fa7087f3f3b1c9d6563835 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 17:24:40 +0800 Subject: [PATCH 372/501] [test/test_utils] Add test for #9588 --- test/test_http.py | 55 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/test/test_http.py b/test/test_http.py index 15e0ad369..6b8493e5e 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -16,6 +16,15 @@ import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) +def http_server_port(httpd): + if os.name == 'java': + # In Jython SSLSocket is not a subclass of socket.socket + sock = httpd.socket.sock + else: + sock = httpd.socket + return sock.getsockname()[1] + + class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): def log_message(self, format, *args): pass @@ -31,6 +40,22 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): self.send_header('Content-Type', 'video/mp4') self.end_headers() self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]') + elif self.path == '/302': + if sys.version_info[0] == 3: + # XXX: Python 3 http server does not allow non-ASCII header values + self.send_response(404) + self.end_headers() + return + + new_url = 'http://localhost:%d/中文.html' % http_server_port(self.server) + self.send_response(302) + self.send_header(b'Location', new_url.encode('utf-8')) + self.end_headers() + elif self.path == '/%E4%B8%AD%E6%96%87.html': + self.send_response(200) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(b'<html><video src="/vid.mp4" /></html>') else: assert False @@ -47,18 +72,32 @@ class FakeLogger(object): class TestHTTP(unittest.TestCase): + def setUp(self): + self.httpd = compat_http_server.HTTPServer( + ('localhost', 0), HTTPTestRequestHandler) + self.port = http_server_port(self.httpd) + self.server_thread = threading.Thread(target=self.httpd.serve_forever) + self.server_thread.daemon = True + self.server_thread.start() + + def test_unicode_path_redirection(self): + # XXX: Python 3 http server does not allow non-ASCII header values + if sys.version_info[0] == 3: + return + + ydl = YoutubeDL({'logger': FakeLogger()}) + r = ydl.extract_info('http://localhost:%d/302' % self.port) + self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port) + + +class TestHTTPS(unittest.TestCase): def setUp(self): certfn = os.path.join(TEST_DIR, 'testcert.pem') self.httpd = compat_http_server.HTTPServer( ('localhost', 0), HTTPTestRequestHandler) self.httpd.socket = ssl.wrap_socket( self.httpd.socket, certfile=certfn, server_side=True) - if os.name == 'java': - # In Jython SSLSocket is not a subclass of socket.socket - sock = self.httpd.socket.sock - else: - sock = self.httpd.socket - self.port = sock.getsockname()[1] + self.port = http_server_port(self.httpd) self.server_thread = threading.Thread(target=self.httpd.serve_forever) self.server_thread.daemon = True self.server_thread.start() @@ -94,14 +133,14 @@ class TestProxy(unittest.TestCase): def setUp(self): self.proxy = compat_http_server.HTTPServer( ('localhost', 0), _build_proxy_handler('normal')) - self.port = self.proxy.socket.getsockname()[1] + self.port = http_server_port(self.proxy) self.proxy_thread = threading.Thread(target=self.proxy.serve_forever) self.proxy_thread.daemon = True self.proxy_thread.start() self.cn_proxy = compat_http_server.HTTPServer( ('localhost', 0), _build_proxy_handler('cn')) - self.cn_port = self.cn_proxy.socket.getsockname()[1] + self.cn_port = http_server_port(self.cn_proxy) self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever) self.cn_proxy_thread.daemon = True self.cn_proxy_thread.start() From 54fb1996812fa09f0f81ac28f42647e7706212b2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 May 2016 19:24:28 +0800 Subject: [PATCH 373/501] [test/test_http] Fix getsockname() on Jython --- test/test_http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_http.py b/test/test_http.py index 6b8493e5e..5076ced51 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -17,7 +17,7 @@ TEST_DIR = os.path.dirname(os.path.abspath(__file__)) def http_server_port(httpd): - if os.name == 'java': + if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): # In Jython SSLSocket is not a subclass of socket.socket sock = httpd.socket.sock else: From 9a4aec8b7ea2c0863bc03ba8f3d3e69a61e77c80 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 May 2016 19:25:25 +0800 Subject: [PATCH 374/501] [utils] Use bytes-like objects as header values on Python 2 --- youtube_dl/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6ab1747b3..26f21602c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -866,6 +866,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): location_escaped = escape_url(location) if location != location_escaped: del resp.headers['Location'] + if sys.version_info < (3, 0): + location_escaped = location_escaped.encode('utf-8') resp.headers['Location'] = location_escaped return resp From b96f007eeb432cdd118fb4cfa027dfa36b0ea0f2 Mon Sep 17 00:00:00 2001 From: bzc6p <bzc6p@users.noreply.github.com> Date: Thu, 2 Jun 2016 11:39:32 +0200 Subject: [PATCH 375/501] =?UTF-8?q?Added=20sanitization=20support=20for=20?= =?UTF-8?q?Hungarian=20letters=20=C5=90=20and=20=C5=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index a697232a8..feef80465 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -157,8 +157,8 @@ class TestUtil(unittest.TestCase): self.assertTrue(sanitize_filename(':', restricted=True) != '') self.assertEqual(sanitize_filename( - 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', restricted=True), - 'AAAAAAAECEEEEIIIIDNOOOOOOOEUUUUYPssaaaaaaaeceeeeiiiionoooooooeuuuuypy') + 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True), + 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy') def test_sanitize_ids(self): self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') From c88270271e65be591f7e194b71728256644b8684 Mon Sep 17 00:00:00 2001 From: bzc6p <bzc6p@users.noreply.github.com> Date: Thu, 2 Jun 2016 11:51:48 +0200 Subject: [PATCH 376/501] =?UTF-8?q?Added=20sanitization=20support=20for=20?= =?UTF-8?q?Hungarian=20letters=20=C5=90=20and=20=C5=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- youtube_dl/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 26f21602c..89234b39d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -105,9 +105,9 @@ KNOWN_EXTENSIONS = ( 'f4f', 'f4m', 'm3u8', 'smil') # needed for sanitizing filenames in restricted mode -ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', - itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOO', ['OE'], 'UUUUYP', ['ss'], - 'aaaaaa', ['ae'], 'ceeeeiiiionoooooo', ['oe'], 'uuuuypy'))) +ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', + itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], + 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) def preferredencoding(): From ad73083ff05eafa64e07500fd42306ac349bd76b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 2 Jun 2016 19:27:57 +0800 Subject: [PATCH 377/501] [bilibili] Add _part%d suffixes back (closes #9660) --- youtube_dl/extractor/bilibili.py | 59 ++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 71a54b4f4..910e539e4 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -46,6 +46,62 @@ class BiliBiliIE(InfoExtractor): 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', }, 'playlist_count': 9, + }, { + 'url': 'http://www.bilibili.com/video/av4808130/', + 'info_dict': { + 'id': '4808130', + 'title': '【长篇】哆啦A梦443【钉铛】', + 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', + }, + 'playlist': [{ + 'md5': '55cdadedf3254caaa0d5d27cf20a8f9c', + 'info_dict': { + 'id': '4808130_part1', + 'ext': 'flv', + 'title': '【长篇】哆啦A梦443【钉铛】', + 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', + 'timestamp': 1464564180, + 'upload_date': '20160529', + 'uploader': '喜欢拉面', + 'uploader_id': '151066', + }, + }, { + 'md5': '926f9f67d0c482091872fbd8eca7ea3d', + 'info_dict': { + 'id': '4808130_part2', + 'ext': 'flv', + 'title': '【长篇】哆啦A梦443【钉铛】', + 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', + 'timestamp': 1464564180, + 'upload_date': '20160529', + 'uploader': '喜欢拉面', + 'uploader_id': '151066', + }, + }, { + 'md5': '4b7b225b968402d7c32348c646f1fd83', + 'info_dict': { + 'id': '4808130_part3', + 'ext': 'flv', + 'title': '【长篇】哆啦A梦443【钉铛】', + 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', + 'timestamp': 1464564180, + 'upload_date': '20160529', + 'uploader': '喜欢拉面', + 'uploader_id': '151066', + }, + }, { + 'md5': '7b795e214166501e9141139eea236e91', + 'info_dict': { + 'id': '4808130_part4', + 'ext': 'flv', + 'title': '【长篇】哆啦A梦443【钉铛】', + 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', + 'timestamp': 1464564180, + 'upload_date': '20160529', + 'uploader': '喜欢拉面', + 'uploader_id': '151066', + }, + }], }] # BiliBili blocks keys from time to time. The current key is extracted from @@ -144,6 +200,9 @@ class BiliBiliIE(InfoExtractor): if len(entries) == 1: return entries[0] else: + for idx, entry in enumerate(entries): + entry['id'] = '%s_part%d' % (video_id, (idx + 1)) + return { '_type': 'multi_video', 'id': video_id, From 9c3c447eb389726d98189d972a2d772ef729132d Mon Sep 17 00:00:00 2001 From: TRox1972 <archcr8@gmail.com> Date: Tue, 17 May 2016 16:21:52 +0200 Subject: [PATCH 378/501] [loc] Add extractor (Closes #3188) Added extractor of loc.gov, which closes #3188. I am not an experienced programmer, so I am sure I did a bunch of mistakes, but the extractor works (for me at least). [LibraryOfCongress] don't use video_id for _search_regex() [LibraryOfCongress] Improvements --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/libraryofcongress.py | 65 +++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 youtube_dl/extractor/libraryofcongress.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9dd55bd70..3b5143ace 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -382,6 +382,7 @@ from .leeco import ( LePlaylistIE, LetvCloudIE, ) +from .libraryofcongress import LibraryOfCongressIE from .libsyn import LibsynIE from .lifenews import ( LifeNewsIE, diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py new file mode 100644 index 000000000..0c34dbce3 --- /dev/null +++ b/youtube_dl/extractor/libraryofcongress.py @@ -0,0 +1,65 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import determine_ext + + +class LibraryOfCongressIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?loc\.gov/item/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://loc.gov/item/90716351/', + 'info_dict': { + 'id': '90716351', + 'ext': 'mp4', + 'title': 'Pa\'s trip to Mars /' + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'https://www.loc.gov/item/97516576/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + self.report_extraction(video_id) + json_id = self._search_regex('media-player-([0-9A-Z]{32})', webpage, 'json id') + + data = self._parse_json(self._download_webpage( + 'https://media.loc.gov/services/v1/media?id=%s' % json_id, + video_id), video_id) + data = data['mediaObject'] + + media_url = data['derivatives'][0]['derivativeUrl'] + media_url = media_url.replace('rtmp', 'https') + + is_video = data['mediaType'].lower() == 'v' + if not determine_ext(media_url) in ('mp4', 'mp3'): + media_url += '.mp4' if is_video else '.mp3' + + if media_url.index('vod/mp4:') > -1: + media_url = media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8' + elif url.index('vod/mp3:') > -1: + media_url = media_url.replace('vod/mp3:', '') + + formats = [] + if determine_ext(media_url) == 'm3u8': + formats = self._extract_m3u8_formats(media_url, video_id, ext='mp4') + elif determine_ext(media_url) is 'mp3': + formats.append({ + 'url': media_url, + 'ext': 'mp3', + }) + + return { + 'id': video_id, + 'thumbnail': self._og_search_thumbnail(webpage), + 'title': self._og_search_title(webpage), + 'formats': formats, + } From 7f3c3dfa52769d1f44c1f1031449118c564a92bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Jun 2016 23:19:11 +0700 Subject: [PATCH 379/501] [loc] Improve (Closes #9521) --- youtube_dl/extractor/libraryofcongress.py | 87 ++++++++++++++--------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index 0c34dbce3..d311f9946 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -3,63 +3,82 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import determine_ext +from ..utils import ( + determine_ext, + float_or_none, + int_or_none, +) class LibraryOfCongressIE(InfoExtractor): + IE_NAME = 'loc' + IE_DESC = 'Library of Congress' _VALID_URL = r'https?://(?:www\.)?loc\.gov/item/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://loc.gov/item/90716351/', + _TEST = { + 'url': 'http://loc.gov/item/90716351/', + 'md5': '353917ff7f0255aa6d4b80a034833de8', 'info_dict': { 'id': '90716351', 'ext': 'mp4', - 'title': 'Pa\'s trip to Mars /' + 'title': "Pa's trip to Mars", + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 0, + 'view_count': int, }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, { - 'url': 'https://www.loc.gov/item/97516576/', - 'only_matching': True, - }] + } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - self.report_extraction(video_id) - json_id = self._search_regex('media-player-([0-9A-Z]{32})', webpage, 'json id') + media_id = self._search_regex( + (r'id=(["\'])media-player-(?P<id>.+?)\1', + r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1', + r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1'), + webpage, 'media id', group='id') - data = self._parse_json(self._download_webpage( - 'https://media.loc.gov/services/v1/media?id=%s' % json_id, - video_id), video_id) - data = data['mediaObject'] + data = self._parse_json( + self._download_webpage( + 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id, + video_id), + video_id)['mediaObject'] - media_url = data['derivatives'][0]['derivativeUrl'] + derivative = data['derivatives'][0] + media_url = derivative['derivativeUrl'] + + # Following algorithm was extracted from setAVSource js function + # found in webpage media_url = media_url.replace('rtmp', 'https') - is_video = data['mediaType'].lower() == 'v' - if not determine_ext(media_url) in ('mp4', 'mp3'): + is_video = data.get('mediaType', 'v').lower() == 'v' + ext = determine_ext(media_url) + if ext not in ('mp4', 'mp3'): media_url += '.mp4' if is_video else '.mp3' - if media_url.index('vod/mp4:') > -1: - media_url = media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8' - elif url.index('vod/mp3:') > -1: - media_url = media_url.replace('vod/mp3:', '') + if 'vod/mp4:' in media_url: + formats = [{ + 'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8', + 'format_id': 'hls', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + }] + elif 'vod/mp3:' in media_url: + formats = [{ + 'url': media_url.replace('vod/mp3:', ''), + 'vcodec': 'none', + }] - formats = [] - if determine_ext(media_url) == 'm3u8': - formats = self._extract_m3u8_formats(media_url, video_id, ext='mp4') - elif determine_ext(media_url) is 'mp3': - formats.append({ - 'url': media_url, - 'ext': 'mp3', - }) + self._sort_formats(formats) + + title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(webpage) + duration = float_or_none(data.get('duration')) + view_count = int_or_none(data.get('viewCount')) return { 'id': video_id, + 'title': title, 'thumbnail': self._og_search_thumbnail(webpage), - 'title': self._og_search_title(webpage), + 'duration': duration, + 'view_count': view_count, 'formats': formats, } From bf4c6a38e1a98606b269d70ccc65c7ec5d47ec07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Jun 2016 23:25:24 +0700 Subject: [PATCH 380/501] release 2016.06.03 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 7 +++++-- youtube_dl/version.py | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ae98e0626..e593ee78a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.03** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.02 +[debug] youtube-dl version 2016.06.03 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index dcbc632a1..619bd0825 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -43,8 +43,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek** - **ARD:mediathek**: Saarländischer Rundfunk + - **ARD:mediathek** - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -339,6 +339,7 @@ - **livestream** - **livestream:original** - **LnkGo** + - **loc**: Library of Congress - **LocalNews8** - **LoveHomePorn** - **lrt.lt** @@ -528,7 +529,8 @@ - **Restudy** - **Reuters** - **ReverbNation** - - **Revision3** + - **revision** + - **revision3:embed** - **RICE** - **RingTV** - **RottenTomatoes** @@ -567,6 +569,7 @@ - **ScreencastOMatic** - **ScreenJunkies** - **ScreenwaveMedia** + - **Seeker** - **SenateISVP** - **SendtoNews** - **ServingSys** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fba427dde..d24d06f4a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.02' +__version__ = '2016.06.03' From 76e9cd7f24f6b175e4cce85082647403266ed233 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Jun 2016 23:43:34 +0700 Subject: [PATCH 381/501] [loc] Add support for another URL schema and simplify --- youtube_dl/extractor/libraryofcongress.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index d311f9946..a5f22b204 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -13,8 +13,8 @@ from ..utils import ( class LibraryOfCongressIE(InfoExtractor): IE_NAME = 'loc' IE_DESC = 'Library of Congress' - _VALID_URL = r'https?://(?:www\.)?loc\.gov/item/(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)' + _TESTS = [{ 'url': 'http://loc.gov/item/90716351/', 'md5': '353917ff7f0255aa6d4b80a034833de8', 'info_dict': { @@ -25,7 +25,10 @@ class LibraryOfCongressIE(InfoExtractor): 'duration': 0, 'view_count': int, }, - } + }, { + 'url': 'https://www.loc.gov/today/cyberlc/feature_wdesc.php?rec=5578', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -34,13 +37,12 @@ class LibraryOfCongressIE(InfoExtractor): media_id = self._search_regex( (r'id=(["\'])media-player-(?P<id>.+?)\1', r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1', - r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1'), + r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1', + r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1'), webpage, 'media id', group='id') - data = self._parse_json( - self._download_webpage( - 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id, - video_id), + data = self._download_json( + 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id, video_id)['mediaObject'] derivative = data['derivatives'][0] @@ -77,7 +79,7 @@ class LibraryOfCongressIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'duration': duration, 'view_count': view_count, 'formats': formats, From c917106be4d6d98ce7504d71a32b58ddca2bc03d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Jun 2016 23:55:22 +0700 Subject: [PATCH 382/501] [loc] Extract subtites --- youtube_dl/extractor/libraryofcongress.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index a5f22b204..49351759e 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -15,6 +15,7 @@ class LibraryOfCongressIE(InfoExtractor): IE_DESC = 'Library of Congress' _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)' _TESTS = [{ + # embedded via <div class="media-player" 'url': 'http://loc.gov/item/90716351/', 'md5': '353917ff7f0255aa6d4b80a034833de8', 'info_dict': { @@ -26,8 +27,19 @@ class LibraryOfCongressIE(InfoExtractor): 'view_count': int, }, }, { + # webcast embedded via mediaObjectId 'url': 'https://www.loc.gov/today/cyberlc/feature_wdesc.php?rec=5578', - 'only_matching': True, + 'info_dict': { + 'id': '5578', + 'ext': 'mp4', + 'title': 'Help! Preservation Training Needs Here, There & Everywhere', + 'duration': 3765, + 'view_count': int, + 'subtitles': 'mincount:1', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -76,6 +88,14 @@ class LibraryOfCongressIE(InfoExtractor): duration = float_or_none(data.get('duration')) view_count = int_or_none(data.get('viewCount')) + subtitles = {} + cc_url = data.get('ccUrl') + if cc_url: + subtitles.setdefault('en', []).append({ + 'url': cc_url, + 'ext': 'ttml', + }) + return { 'id': video_id, 'title': title, @@ -83,4 +103,5 @@ class LibraryOfCongressIE(InfoExtractor): 'duration': duration, 'view_count': view_count, 'formats': formats, + 'subtitles': subtitles, } From 4d8856d511aef11b5dbeb9f6523c2a117bdbb85d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Jun 2016 00:26:03 +0700 Subject: [PATCH 383/501] [loc] Extract direct download links --- youtube_dl/extractor/libraryofcongress.py | 38 ++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index 49351759e..0a94366fd 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -1,12 +1,15 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( determine_ext, float_or_none, int_or_none, + parse_filesize, ) @@ -40,6 +43,20 @@ class LibraryOfCongressIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + # with direct download links + 'url': 'https://www.loc.gov/item/78710669/', + 'info_dict': { + 'id': '78710669', + 'ext': 'mp4', + 'title': 'La vie et la passion de Jesus-Christ', + 'duration': 0, + 'view_count': int, + 'formats': 'mincount:4', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -60,6 +77,9 @@ class LibraryOfCongressIE(InfoExtractor): derivative = data['derivatives'][0] media_url = derivative['derivativeUrl'] + title = derivative.get('shortName') or data.get('shortName') or self._og_search_title( + webpage) + # Following algorithm was extracted from setAVSource js function # found in webpage media_url = media_url.replace('rtmp', 'https') @@ -75,6 +95,7 @@ class LibraryOfCongressIE(InfoExtractor): 'format_id': 'hls', 'ext': 'mp4', 'protocol': 'm3u8_native', + 'quality': 1, }] elif 'vod/mp3:' in media_url: formats = [{ @@ -82,9 +103,24 @@ class LibraryOfCongressIE(InfoExtractor): 'vcodec': 'none', }] + download_urls = set() + for m in re.finditer( + r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?: |\s+)\((?P<size>.+?)\))?\s*<', webpage): + format_id = m.group('id').lower() + if format_id == 'gif': + continue + download_url = m.group('url') + if download_url in download_urls: + continue + download_urls.add(download_url) + formats.append({ + 'url': download_url, + 'format_id': format_id, + 'filesize_approx': parse_filesize(m.group('size')), + }) + self._sort_formats(formats) - title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(webpage) duration = float_or_none(data.get('duration')) view_count = int_or_none(data.get('viewCount')) From 762d44c9567af424b2731cb643429ddd8e76d704 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Jun 2016 04:57:16 +0700 Subject: [PATCH 384/501] [channel9] Add support for rss links (Closes #9673) --- youtube_dl/extractor/channel9.py | 123 ++++++++++++++++++------------- 1 file changed, 70 insertions(+), 53 deletions(-) diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index c74553dcf..34d4e6156 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -20,54 +20,64 @@ class Channel9IE(InfoExtractor): ''' IE_DESC = 'Channel 9' IE_NAME = 'channel9' - _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?' + _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)' - _TESTS = [ - { - 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', - 'md5': 'bbd75296ba47916b754e73c3a4bbdf10', - 'info_dict': { - 'id': 'Events/TechEd/Australia/2013/KOS002', - 'ext': 'mp4', - 'title': 'Developer Kick-Off Session: Stuff We Love', - 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f', - 'duration': 4576, - 'thumbnail': 're:http://.*\.jpg', - 'session_code': 'KOS002', - 'session_day': 'Day 1', - 'session_room': 'Arena 1A', - 'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen'], - }, + _TESTS = [{ + 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', + 'md5': 'bbd75296ba47916b754e73c3a4bbdf10', + 'info_dict': { + 'id': 'Events/TechEd/Australia/2013/KOS002', + 'ext': 'mp4', + 'title': 'Developer Kick-Off Session: Stuff We Love', + 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f', + 'duration': 4576, + 'thumbnail': 're:http://.*\.jpg', + 'session_code': 'KOS002', + 'session_day': 'Day 1', + 'session_room': 'Arena 1A', + 'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', + 'Mads Kristensen'], }, - { - 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', - 'md5': 'b43ee4529d111bc37ba7ee4f34813e68', - 'info_dict': { - 'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing', - 'ext': 'mp4', - 'title': 'Self-service BI with Power BI - nuclear testing', - 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', - 'duration': 1540, - 'thumbnail': 're:http://.*\.jpg', - 'authors': ['Mike Wilmot'], - }, + }, { + 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', + 'md5': 'b43ee4529d111bc37ba7ee4f34813e68', + 'info_dict': { + 'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing', + 'ext': 'mp4', + 'title': 'Self-service BI with Power BI - nuclear testing', + 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', + 'duration': 1540, + 'thumbnail': 're:http://.*\.jpg', + 'authors': ['Mike Wilmot'], }, - { - # low quality mp4 is best - 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', - 'info_dict': { - 'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', - 'ext': 'mp4', - 'title': 'Ranges for the Standard Library', - 'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d', - 'duration': 5646, - 'thumbnail': 're:http://.*\.jpg', - }, - 'params': { - 'skip_download': True, - }, - } - ] + }, { + # low quality mp4 is best + 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', + 'info_dict': { + 'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', + 'ext': 'mp4', + 'title': 'Ranges for the Standard Library', + 'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d', + 'duration': 5646, + 'thumbnail': 're:http://.*\.jpg', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS', + 'info_dict': { + 'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b', + 'title': 'Channel 9', + }, + 'playlist_count': 2, + }, { + 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS', + 'only_matching': True, + }, { + 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman', + 'only_matching': True, + }] _RSS_URL = 'http://channel9.msdn.com/%s/RSS' @@ -254,22 +264,30 @@ class Channel9IE(InfoExtractor): return self.playlist_result(contents) - def _extract_list(self, content_path): - rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS') + def _extract_list(self, video_id, rss_url=None): + if not rss_url: + rss_url = self._RSS_URL % video_id + rss = self._download_xml(rss_url, video_id, 'Downloading RSS') entries = [self.url_result(session_url.text, 'Channel9') for session_url in rss.findall('./channel/item/link')] title_text = rss.find('./channel/title').text - return self.playlist_result(entries, content_path, title_text) + return self.playlist_result(entries, video_id, title_text) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) content_path = mobj.group('contentpath') + rss = mobj.group('rss') - webpage = self._download_webpage(url, content_path, 'Downloading web page') + if rss: + return self._extract_list(content_path, url) - page_type_m = re.search(r'<meta name="WT.entryid" content="(?P<pagetype>[^:]+)[^"]+"/>', webpage) - if page_type_m is not None: - page_type = page_type_m.group('pagetype') + webpage = self._download_webpage( + url, content_path, 'Downloading web page') + + page_type = self._search_regex( + r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2', + webpage, 'page type', default=None, group='pagetype') + if page_type: if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content return self._extract_entry_item(webpage, content_path) elif page_type == 'Session': # Event session page, may contain downloadable content @@ -278,6 +296,5 @@ class Channel9IE(InfoExtractor): return self._extract_list(content_path) else: raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True) - else: # Assuming list return self._extract_list(content_path) From cad88f96dc8eaa845a458f0b80e92c1ba36c5491 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 4 Jun 2016 11:42:52 +0200 Subject: [PATCH 385/501] disable uploading to yt-dl.org for now --- devscripts/release.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index cde4d0a39..1a7b1e054 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -97,8 +97,10 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done -scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ -ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" + +echo 'TODO: upload on GitHub' +exit 1 + ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" /bin/echo -e "\n### Now switching to gh-pages..." From 7def35712a7047578643f18eaf6dda79fd8c9291 Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Sat, 21 May 2016 17:48:17 +0200 Subject: [PATCH 386/501] [vidio] Add extractor (Closes #7195) [Vidio] fix fallback value and wrap duration in int_or_none [Vidio] don't use video_id for _html_search_regex() --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vidio.py | 48 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 youtube_dl/extractor/vidio.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3b5143ace..ed4e39574 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -910,6 +910,7 @@ from .videomore import ( ) from .videopremium import VideoPremiumIE from .videott import VideoTtIE +from .vidio import VidioIE from .vidme import ( VidmeIE, VidmeUserIE, diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py new file mode 100644 index 000000000..d17c663fd --- /dev/null +++ b/youtube_dl/extractor/vidio.py @@ -0,0 +1,48 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor + +from ..utils import int_or_none + + +class VidioIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d{6})-(?P<display_id>[^/?]+)' + _TEST = { + 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015', + 'info_dict': { + 'id': '165683', + 'title': 'DJ_AMBRED - Booyah (Live 2015)', + 'ext': 'mp4', + 'thumbnail': 'https://cdn0-a.production.vidio.static6.com/uploads/video/image/165683/dj_ambred-booyah-live-2015-bfb2ba.jpg', + 'description': 'md5:27dc15f819b6a78a626490881adbadf8', + 'duration': 149, + }, + 'params': { + # m3u8 download + 'skip_download': True + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id, display_id = mobj.group('id', 'display_id') + + webpage = self._download_webpage(url, display_id) + + video_data = self._parse_json(self._html_search_regex( + r'data-json-clips\s*=\s*"\[(.+)\]"', webpage, 'video data'), display_id) + + formats = self._extract_m3u8_formats( + video_data['sources'][0]['file'], + display_id, ext='mp4') + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'formats': formats, + 'thumbnail': video_data.get('image'), + 'description': self._og_search_description(webpage), + 'duration': int_or_none(video_data.get('clip_duration')), + } From 0fc832e1b2c8f48298e135d42818a16bfba4d3ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Jun 2016 16:47:43 +0700 Subject: [PATCH 387/501] [vidio] Improve (Closes #9562) --- youtube_dl/extractor/vidio.py | 65 ++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py index d17c663fd..6898042de 100644 --- a/youtube_dl/extractor/vidio.py +++ b/youtube_dl/extractor/vidio.py @@ -2,28 +2,30 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .common import InfoExtractor from ..utils import int_or_none class VidioIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d{6})-(?P<display_id>[^/?]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d+)-(?P<display_id>[^/?#&]+)' + _TESTS = [{ 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015', + 'md5': 'cd2801394afc164e9775db6a140b91fe', 'info_dict': { 'id': '165683', - 'title': 'DJ_AMBRED - Booyah (Live 2015)', + 'display_id': 'dj_ambred-booyah-live-2015', 'ext': 'mp4', - 'thumbnail': 'https://cdn0-a.production.vidio.static6.com/uploads/video/image/165683/dj_ambred-booyah-live-2015-bfb2ba.jpg', + 'title': 'DJ_AMBRED - Booyah (Live 2015)', 'description': 'md5:27dc15f819b6a78a626490881adbadf8', - 'duration': 149, + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 149, + 'like_count': int, }, - 'params': { - # m3u8 download - 'skip_download': True - } - } + }, { + 'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -31,18 +33,41 @@ class VidioIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - video_data = self._parse_json(self._html_search_regex( - r'data-json-clips\s*=\s*"\[(.+)\]"', webpage, 'video data'), display_id) + title = self._og_search_title(webpage) - formats = self._extract_m3u8_formats( - video_data['sources'][0]['file'], - display_id, ext='mp4') + m3u8_url, duration, thumbnail = [None] * 3 + + clips = self._parse_json( + self._html_search_regex( + r'data-json-clips\s*=\s*(["\'])(?P<data>\[.+?\])\1', + webpage, 'video data', default='[]', group='data'), + display_id, fatal=False) + if clips: + clip = clips[0] + m3u8_url = clip.get('sources', [{}])[0].get('file') + duration = clip.get('clip_duration') + thumbnail = clip.get('image') + + m3u8_url = m3u8_url or self._search_regex( + r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>.+?)\1', webpage, 'hls url') + formats = self._extract_m3u8_formats(m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native') + + duration = int_or_none(duration or self._search_regex( + r'data-video-duration=(["\'])(?P<duartion>\d+)\1', webpage, 'duration')) + thumbnail = thumbnail or self._og_search_thumbnail(webpage) + + like_count = int_or_none(self._search_regex( + (r'<span[^>]+data-comment-vote-count=["\'](\d+)', + r'<span[^>]+class=["\'].*?\blike(?:__|-)count\b.*?["\'][^>]*>\s*(\d+)'), + webpage, 'like count', fatal=False)) return { 'id': video_id, - 'title': self._og_search_title(webpage), - 'formats': formats, - 'thumbnail': video_data.get('image'), + 'display_id': display_id, + 'title': title, 'description': self._og_search_description(webpage), - 'duration': int_or_none(video_data.get('clip_duration')), + 'thumbnail': thumbnail, + 'duration': duration, + 'like_count': like_count, + 'formats': formats, } From 1ae6c83bceb6dbc7093fe35ddafcde08dd0151a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 00:43:55 +0700 Subject: [PATCH 388/501] [compat] Add compat_input --- youtube_dl/compat.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 06e5f3ff6..fabac9fd2 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -482,6 +482,11 @@ if sys.version_info < (3, 0) and sys.platform == 'win32': else: compat_getpass = getpass.getpass +try: + compat_input = raw_input +except NameError: # Python 3 + compat_input = input + # Python < 2.6.5 require kwargs to be bytes try: def _testfunc(x): From e92b552a102f509066a605b26d6df38eb73764b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 00:44:51 +0700 Subject: [PATCH 389/501] [devscripts/buildserver] Use compat_input from compat --- devscripts/buildserver.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index f7979c43e..fc99c3213 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -13,6 +13,7 @@ import os.path sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__))))) from youtube_dl.compat import ( + compat_input, compat_http_server, compat_str, compat_urlparse, @@ -30,11 +31,6 @@ try: except ImportError: # Python 2 import SocketServer as compat_socketserver -try: - compat_input = raw_input -except NameError: # Python 3 - compat_input = input - class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer): allow_reuse_address = True From db56f281d9c5d57cb2c44a2ea356a9a0a12b3b4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 00:47:26 +0700 Subject: [PATCH 390/501] [devscripts/create-github-release] Add script for releasing on GitHub Yet only Basic authentication is supported either via .netrc or by manual input --- devscripts/create-github-release.py | 112 ++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 devscripts/create-github-release.py diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py new file mode 100644 index 000000000..f74d39490 --- /dev/null +++ b/devscripts/create-github-release.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import base64 +import json +import mimetypes +import netrc +import optparse +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.compat import ( + compat_basestring, + compat_input, + compat_getpass, + compat_print, + compat_urllib_request, +) +from youtube_dl.utils import ( + make_HTTPS_handler, + sanitized_Request, +) + + +class GitHubReleaser(object): + _API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases' + _UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s' + _NETRC_MACHINE = 'github.com' + + def __init__(self, debuglevel=0): + self._init_github_account() + https_handler = make_HTTPS_handler({}, debuglevel=debuglevel) + self._opener = compat_urllib_request.build_opener(https_handler) + + def _init_github_account(self): + try: + info = netrc.netrc().authenticators(self._NETRC_MACHINE) + if info is not None: + self._username = info[0] + self._password = info[2] + compat_print('Using GitHub credentials found in .netrc...') + return + else: + compat_print('No GitHub credentials found in .netrc') + except (IOError, netrc.NetrcParseError): + compat_print('Unable to parse .netrc') + self._username = compat_input( + 'Type your GitHub username or email address and press [Return]: ') + self._password = compat_getpass( + 'Type your GitHub password and press [Return]: ') + + def _call(self, req): + if isinstance(req, compat_basestring): + req = sanitized_Request(req) + # Authorizing manually since GitHub does not response with 401 with + # WWW-Authenticate header set (see + # https://developer.github.com/v3/#basic-authentication) + b64 = base64.b64encode( + ('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii') + req.add_header('Authorization', 'Basic %s' % b64) + response = self._opener.open(req).read().decode('utf-8') + return json.loads(response) + + def list_releases(self): + return self._call(self._API_URL) + + def create_release(self, tag_name, name=None, body='', draft=False, prerelease=False): + data = { + 'tag_name': tag_name, + 'target_commitish': 'master', + 'name': name, + 'body': body, + 'draft': draft, + 'prerelease': prerelease, + } + req = sanitized_Request(self._API_URL, json.dumps(data).encode('utf-8')) + return self._call(req) + + def create_asset(self, release_id, asset): + asset_name = os.path.basename(asset) + url = self._UPLOADS_URL % (release_id, asset_name) + # Our files are small enough to be loaded directly into memory. + data = open(asset, 'rb').read() + req = sanitized_Request(url, data) + mime_type, _ = mimetypes.guess_type(asset_name) + req.add_header('Content-Type', mime_type or 'application/octet-stream') + return self._call(req) + + +def main(): + parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH') + options, args = parser.parse_args() + if len(args) != 2: + parser.error('Expected a version and a build directory') + + version, build_path = args + + releaser = GitHubReleaser(debuglevel=0) + + new_release = releaser.create_release( + version, name='youtube-dl %s' % version, draft=True, prerelease=True) + release_id = new_release['id'] + + for asset in os.listdir(build_path): + compat_print('Uploading %s...' % asset) + releaser.create_asset(release_id, os.path.join(build_path, asset)) + + +if __name__ == '__main__': + main() From 39b32571df802ef869db1067454aa654f3f66235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 00:48:33 +0700 Subject: [PATCH 391/501] [devscripts/release.sh] Release to GitHub --- devscripts/release.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 1a7b1e054..87e8eda50 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -95,17 +95,16 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" (cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS) (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) -/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." +/bin/echo -e "\n### Signing and uploading the new binaries to GitHub..." for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done -echo 'TODO: upload on GitHub' -exit 1 +ROOT=$(pwd) +python devscripts/create-github-release.py $version "$ROOT/build/$version" ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" /bin/echo -e "\n### Now switching to gh-pages..." git clone --branch gh-pages --single-branch . build/gh-pages -ROOT=$(pwd) ( set -e ORIGIN_URL=$(git config --get remote.origin.url) From 2c347352677f023678ffd488a51b19f54b97fa36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 01:44:13 +0700 Subject: [PATCH 392/501] [youtube] Add itags 256 and 258 --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f3f102c30..6c9f77d95 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -344,6 +344,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'}, '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'}, '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'}, + '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, + '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, # Dash webm '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, From 1e236d7e2350e055bbe230b12490e4369aaa0956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 03:16:05 +0700 Subject: [PATCH 393/501] [downloader/hls] Do not rely on EXT-X-PLAYLIST-TYPE:EVENT --- youtube_dl/downloader/hls.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 62136ee54..049fb78ce 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -23,11 +23,17 @@ class HlsFD(FragmentFD): UNSUPPORTED_FEATURES = ( r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1] r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] + # Live streams heuristic does not always work (e.g. geo restricted to Germany # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] - r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of - # event media playlists [4] + + # This heuristic also is not correct since segments may not be appended as well. + # Twitch vods have EXT-X-PLAYLIST-TYPE:EVENT despite no segments will definitely + # be appended to the end of the playlist. + # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of + # event media playlists [4] + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 From 631d4c87ee84183917fcdf5db59e1cd1bb48d9a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 03:19:44 +0700 Subject: [PATCH 394/501] [twitch:vod] Use native hls --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index f7b98e190..d898f14c3 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -260,7 +260,7 @@ class TwitchVodIE(TwitchItemBaseIE): 'nauth': access_token['token'], 'nauthsig': access_token['sig'], })), - item_id, 'mp4') + item_id, 'mp4', entry_protocol='m3u8_native') self._prefer_source(formats) info['formats'] = formats From 51c4d85ce788497584bd056d571ed9b7b24c9651 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 03:21:43 +0700 Subject: [PATCH 395/501] [downloader/hls] PEP 8 --- youtube_dl/downloader/hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 049fb78ce..8e4a7189a 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -32,7 +32,7 @@ class HlsFD(FragmentFD): # Twitch vods have EXT-X-PLAYLIST-TYPE:EVENT despite no segments will definitely # be appended to the end of the playlist. # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of - # event media playlists [4] + # # event media playlists [4] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 From 633b444fd29aa9d8b3ba722285ae2475ae66595f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 03:31:10 +0700 Subject: [PATCH 396/501] [downloader/hls] Correct comment on twitch vods --- youtube_dl/downloader/hls.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8e4a7189a..54f2108e9 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -29,8 +29,8 @@ class HlsFD(FragmentFD): # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] # This heuristic also is not correct since segments may not be appended as well. - # Twitch vods have EXT-X-PLAYLIST-TYPE:EVENT despite no segments will definitely - # be appended to the end of the playlist. + # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite + # no segments will definitely be appended to the end of the playlist. # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of # # event media playlists [4] From 71b9cb3107e156c7f17ec4cdf1d09421cb4dd4b1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 4 Jun 2016 22:55:15 +0200 Subject: [PATCH 397/501] extend FAQ (#9696) --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 253d51bc8..91078eee8 100644 --- a/README.md +++ b/README.md @@ -842,6 +842,12 @@ It is *not* possible to detect whether a URL is supported or not. That's because If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program. +# Why do I need to go through that much red tape when filing bugs? + +Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was alrady reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl. + +youtube-dl is an open-source project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `youtube-dl -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of youtube-dl is current. + # DEVELOPER INSTRUCTIONS Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution. From bc270284b56b8ce7623b22b5c5cbf8d0d390c09e Mon Sep 17 00:00:00 2001 From: Ryan Schmidt <github@ryandesign.com> Date: Sat, 4 Jun 2016 21:30:22 -0500 Subject: [PATCH 398/501] Update README.md to mention MacPorts --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 91078eee8..f60e7ce33 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,14 @@ If you do not have curl, you can alternatively use a recent wget: Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`). -OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/). +OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/): brew install youtube-dl +Or with [MacPorts](https://www.macports.org/): + + sudo port install youtube-dl + You can also use pip: sudo pip install youtube-dl From 8f1aaa97a1e3eb60749f8046f2f0b1a0749d007c Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 5 Jun 2016 11:19:44 +0700 Subject: [PATCH 399/501] [README.md] Update pypi instructions --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f60e7ce33..e7240f41a 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,13 @@ If you do not have curl, you can alternatively use a recent wget: Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`). -OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/): +You can also use pip: + + sudo pip install --upgrade youtube-dl + +This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information. + +OS X users can install youtube-dl with [Homebrew](http://brew.sh/): brew install youtube-dl @@ -35,10 +41,6 @@ Or with [MacPorts](https://www.macports.org/): sudo port install youtube-dl -You can also use pip: - - sudo pip install youtube-dl - Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html). # DESCRIPTION From 47f12ad3e39ebe714abec7e7588e8e411e2841b5 Mon Sep 17 00:00:00 2001 From: Tobias Salzmann <eun@su.am> Date: Sun, 5 Jun 2016 11:04:55 +0200 Subject: [PATCH 400/501] curl: follow redirect --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e7240f41a..205c485d0 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms To install it right away for all UNIX users (Linux, OS X, etc.), type: - sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl + sudo curl -L https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl If you do not have curl, you can alternatively use a recent wget: From 7b0d1c28597bd38567e5b4e853f669a5a601c6e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 21:01:20 +0700 Subject: [PATCH 401/501] [__init__] Use write_string instead of compat_string (Closes #9689) --- youtube_dl/__init__.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5df965191..4905674ad 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -18,7 +18,6 @@ from .options import ( from .compat import ( compat_expanduser, compat_getpass, - compat_print, compat_shlex_split, workaround_optparse_bug9161, ) @@ -76,7 +75,7 @@ def _real_main(argv=None): # Dump user agent if opts.dump_user_agent: - compat_print(std_headers['User-Agent']) + write_string(std_headers['User-Agent'] + '\n', out=sys.stdout) sys.exit(0) # Batch file verification @@ -101,10 +100,10 @@ def _real_main(argv=None): if opts.list_extractors: for ie in list_extractors(opts.age_limit): - compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '')) + write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout) matchedUrls = [url for url in all_urls if ie.suitable(url)] for mu in matchedUrls: - compat_print(' ' + mu) + write_string(' ' + mu + '\n', out=sys.stdout) sys.exit(0) if opts.list_extractor_descriptions: for ie in list_extractors(opts.age_limit): @@ -117,7 +116,7 @@ def _real_main(argv=None): _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') _COUNTS = ('', '5', '10', 'all') desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) - compat_print(desc) + write_string(desc + '\n', out=sys.stdout) sys.exit(0) # Conflicting, missing and erroneous options From 244fe977fec880f1bce55683437a711e12075b72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 6 Jun 2016 02:52:58 +0700 Subject: [PATCH 402/501] [options] Add --load-info-json alias for symmetry with --write-info-json --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 14051b714..99ce4131f 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -668,7 +668,7 @@ def parseOpts(overrideArguments=None): action='store_true', dest='writeannotations', default=False, help='Write video annotations to a .annotations.xml file') filesystem.add_option( - '--load-info', + '--load-info-json', '--load-info', dest='load_info_filename', metavar='FILE', help='JSON file containing the video information (created with the "--write-info-json" option)') filesystem.add_option( From db59b37d0bb2bbb4894f28b6b65d1d7f5496444d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 6 Jun 2016 03:02:11 +0700 Subject: [PATCH 403/501] [devscripts/create-github-release] Make full published releases by default --- devscripts/create-github-release.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py index f74d39490..3b8021e74 100644 --- a/devscripts/create-github-release.py +++ b/devscripts/create-github-release.py @@ -97,10 +97,9 @@ def main(): version, build_path = args - releaser = GitHubReleaser(debuglevel=0) + releaser = GitHubReleaser() - new_release = releaser.create_release( - version, name='youtube-dl %s' % version, draft=True, prerelease=True) + new_release = releaser.create_release(version, name='youtube-dl %s' % version) release_id = new_release['id'] for asset in os.listdir(build_path): From e67f6880257068c395d38e24a5e13f69902e1e4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 5 Jun 2016 23:16:08 +0200 Subject: [PATCH 404/501] [compat] Add 'compat_input' to __all__ --- youtube_dl/compat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index fabac9fd2..e3cab4dd0 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -628,6 +628,7 @@ __all__ = [ 'compat_html_entities', 'compat_http_client', 'compat_http_server', + 'compat_input', 'compat_itertools_count', 'compat_kwargs', 'compat_ord', From 345dec937fcc2b9ae106e91f4c01568c8c7e41f8 Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi <kagami@genshiken.org> Date: Tue, 7 Jun 2016 14:39:21 +0300 Subject: [PATCH 405/501] [vlive] Acknowledge vlive+ streams statuses Same as common statuses just with "PRODUCT_" prefix: PRODUCE_LIVE_END, PRODUCT_COMING_SOON, etc. --- youtube_dl/extractor/vlive.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 147f52d45..8d671cca7 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -9,6 +9,7 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + remove_start, ) from ..compat import compat_urllib_parse_urlencode @@ -39,6 +40,7 @@ class VLiveIE(InfoExtractor): webpage, 'video params') status, _, _, live_params, long_video_id, key = re.split( r'"\s*,\s*"', video_params)[2:8] + status = remove_start(status, 'PRODUCT_') if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR': live_params = self._parse_json('"%s"' % live_params, video_id) From 74193838f71addcb08a9f56a7fad8c2e7df298ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 7 Jun 2016 22:12:20 +0700 Subject: [PATCH 406/501] [canal+] Improve extraction (Closes #9718) --- youtube_dl/extractor/canalplus.py | 33 ++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 25b2d4efe..8d0f91158 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -4,11 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse from ..utils import ( ExtractorError, HEADRequest, unified_strdate, - url_basename, qualities, int_or_none, ) @@ -16,13 +16,25 @@ from ..utils import ( class CanalplusIE(InfoExtractor): IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv' - _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))' + _VALID_URL = r'''(?x) + https?:// + (?: + (?: + (?:(?:www|m)\.)?canalplus\.fr| + (?:www\.)?piwiplus\.fr| + (?:www\.)?d8\.tv| + (?:www\.)?itele\.fr + )/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?| + player\.canalplus\.fr/#/(?P<id>\d+) + ) + + ''' _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json' _SITE_ID_MAP = { - 'canalplus.fr': 'cplus', - 'piwiplus.fr': 'teletoon', - 'd8.tv': 'd8', - 'itele.fr': 'itele', + 'canalplus': 'cplus', + 'piwiplus': 'teletoon', + 'd8': 'd8', + 'itele': 'itele', } _TESTS = [{ @@ -65,16 +77,19 @@ class CanalplusIE(InfoExtractor): 'description': 'md5:8216206ec53426ea6321321f3b3c16db', 'upload_date': '20150211', }, + }, { + 'url': 'http://m.canalplus.fr/?vid=1398231', + 'only_matching': True, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.groupdict().get('id') + video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid') - site_id = self._SITE_ID_MAP[mobj.group('site') or 'canal'] + site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]] # Beware, some subclasses do not define an id group - display_id = url_basename(mobj.group('path')) + display_id = mobj.group('display_id') or video_id if video_id is None: webpage = self._download_webpage(url, display_id) From 3d9b3605a35eb48bd20e569ed9ce9d706e457ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 7 Jun 2016 22:26:18 +0700 Subject: [PATCH 407/501] [canal+] Update tests --- youtube_dl/extractor/canalplus.py | 48 +++++++++++++++++-------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 8d0f91158..605c5e957 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -38,14 +38,14 @@ class CanalplusIE(InfoExtractor): } _TESTS = [{ - 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092', - 'md5': '12164a6f14ff6df8bd628e8ba9b10b78', + 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814', + 'md5': '41f438a4904f7664b91b4ed0dec969dc', 'info_dict': { - 'id': '1263092', + 'id': '1192814', 'ext': 'mp4', - 'title': 'Le Zapping - 13/05/15', - 'description': 'md5:09738c0d06be4b5d06a0940edb0da73f', - 'upload_date': '20150513', + 'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014", + 'description': "Toute l'année 2014 dans un Zapping exceptionnel !", + 'upload_date': '20150105', }, }, { 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', @@ -58,24 +58,28 @@ class CanalplusIE(InfoExtractor): }, 'skip': 'Only works from France', }, { - 'url': 'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html', + 'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231', 'info_dict': { - 'id': '966289', - 'ext': 'flv', - 'title': 'Campagne intime - Documentaire exceptionnel', - 'description': 'md5:d2643b799fb190846ae09c61e59a859f', - 'upload_date': '20131108', - }, - 'skip': 'videos get deleted after a while', - }, { - 'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559', - 'md5': '38b8f7934def74f0d6f3ba6c036a5f82', - 'info_dict': { - 'id': '1213714', + 'id': '1390231', 'ext': 'mp4', - 'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45', - 'description': 'md5:8216206ec53426ea6321321f3b3c16db', - 'upload_date': '20150211', + 'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité", + 'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6', + 'upload_date': '20160512', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224', + 'info_dict': { + 'id': '1398334', + 'ext': 'mp4', + 'title': "L'invité de Bruce Toussaint du 07/06/2016 - ", + 'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324', + 'upload_date': '20160607', + }, + 'params': { + 'skip_download': True, }, }, { 'url': 'http://m.canalplus.fr/?vid=1398231', From 57b6e9652e27aa46395dab6238e54d63746f9a0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 7 Jun 2016 22:32:08 +0700 Subject: [PATCH 408/501] [canal+] Add support for d17.tv --- youtube_dl/extractor/canalplus.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 605c5e957..61463f249 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -23,6 +23,7 @@ class CanalplusIE(InfoExtractor): (?:(?:www|m)\.)?canalplus\.fr| (?:www\.)?piwiplus\.fr| (?:www\.)?d8\.tv| + (?:www\.)?d17\.tv| (?:www\.)?itele\.fr )/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?| player\.canalplus\.fr/#/(?P<id>\d+) @@ -34,6 +35,7 @@ class CanalplusIE(InfoExtractor): 'canalplus': 'cplus', 'piwiplus': 'teletoon', 'd8': 'd8', + 'd17': 'd17', 'itele': 'itele', } @@ -84,6 +86,9 @@ class CanalplusIE(InfoExtractor): }, { 'url': 'http://m.canalplus.fr/?vid=1398231', 'only_matching': True, + }, { + 'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061', + 'only_matching': True, }] def _real_extract(self, url): From a6571f1073eab6c9a4cc9800a0bff31cf12fe09f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 00:19:33 +0800 Subject: [PATCH 409/501] [common] Fix <bootstrapInfo> detection in F4M manifests Regression since 0a5685b26fae0940f14cb063a6e4fc6986f9c124 --- youtube_dl/extractor/common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 57793537b..bfd432160 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -45,6 +45,7 @@ from ..utils import ( unescapeHTML, unified_strdate, url_basename, + xpath_element, xpath_text, xpath_with_ns, determine_protocol, @@ -1030,7 +1031,7 @@ class InfoExtractor(object): if base_url: base_url = base_url.strip() - bootstrap_info = xpath_text( + bootstrap_info = xpath_element( manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'], 'bootstrap info', default=None) @@ -1085,7 +1086,7 @@ class InfoExtractor(object): formats.append({ 'format_id': format_id, 'url': manifest_url, - 'ext': 'flv' if bootstrap_info else None, + 'ext': 'flv' if bootstrap_info is not None else None, 'tbr': tbr, 'width': width, 'height': height, From a4a8201c02d06bff384ecb66a257dbec0652ff52 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 00:25:51 +0800 Subject: [PATCH 410/501] [wdr] Update _TESTS --- youtube_dl/extractor/wdr.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 1e729cb7c..6174eb19f 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -21,7 +21,7 @@ class WDRIE(InfoExtractor): _TESTS = [ { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html', - 'md5': 'e58c39c3e30077141d258bf588700a7b', + # HDS download, MD5 is unstable 'info_dict': { 'id': 'mdb-1058683', 'ext': 'flv', @@ -35,7 +35,6 @@ class WDRIE(InfoExtractor): 'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml' }]}, }, - 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/mediathek/audio/wdr3/wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100.html', @@ -51,7 +50,6 @@ class WDRIE(InfoExtractor): 'is_live': False, 'subtitles': {} }, - 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/mediathek/video/live/index.html', @@ -90,7 +88,7 @@ class WDRIE(InfoExtractor): }, { 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5', - 'md5': 'ca365705551e4bd5217490f3b0591290', + # HDS download, MD5 is unstable 'info_dict': { 'id': 'mdb-186083', 'ext': 'flv', @@ -98,9 +96,6 @@ class WDRIE(InfoExtractor): 'title': 'Sachgeschichte - Achterbahn ', 'description': '- Die Sendung mit der Maus -', }, - 'params': { - 'skip_download': True, # the file has different versions :( - }, }, ] From a26a9d62396641364690974de9c859cf26f9acf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 7 Jun 2016 23:53:08 +0700 Subject: [PATCH 411/501] [livestream:event] Ensure video id is string (Closes #9721) --- youtube_dl/extractor/livestream.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 0edc06c43..bc7894bf1 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -203,9 +203,10 @@ class LivestreamIE(InfoExtractor): if not videos_info: break for v in videos_info: + v_id = compat_str(v['id']) entries.append(self.url_result( - 'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v['id']), - 'Livestream', v['id'], v['caption'])) + 'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v_id), + 'Livestream', v_id, v.get('caption'))) last_video = videos_info[-1]['id'] return self.playlist_result(entries, event_id, event_data['full_name']) From 33d9f3707ccccfe8d73c1b398f198792e80a259f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 8 Jun 2016 02:22:04 +0700 Subject: [PATCH 412/501] [thesixtyone] Relax _VALID_URL (Closes #9714) --- youtube_dl/extractor/thesixtyone.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/thesixtyone.py b/youtube_dl/extractor/thesixtyone.py index d8b1fd281..d63aef5de 100644 --- a/youtube_dl/extractor/thesixtyone.py +++ b/youtube_dl/extractor/thesixtyone.py @@ -12,7 +12,7 @@ class TheSixtyOneIE(InfoExtractor): s| song/comments/list| song - )/(?P<id>[A-Za-z0-9]+)/?$''' + )/(?:[^/]+/)?(?P<id>[A-Za-z0-9]+)/?$''' _SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}' _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}/thesixtyone_production/audio/{0:}_stream' _THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop' @@ -45,6 +45,10 @@ class TheSixtyOneIE(InfoExtractor): 'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/', 'only_matching': True, }, + { + 'url': 'http://www.thesixtyone.com/maryatmidnight/song/StrawberriesandCream/yvWtLp0c4GQ/', + 'only_matching': True, + }, ] _DECODE_MAP = { From 7264e385912951167c27b40df5fd22010d594b12 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 14:29:53 +0800 Subject: [PATCH 413/501] [bilibili] Fix for videos without upload time (closes #9710) --- youtube_dl/extractor/bilibili.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 910e539e4..b17047b39 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -102,6 +102,22 @@ class BiliBiliIE(InfoExtractor): 'uploader_id': '151066', }, }], + }, { + # Missing upload time + 'url': 'http://www.bilibili.com/video/av1867637/', + 'info_dict': { + 'id': '2880301', + 'ext': 'flv', + 'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】', + 'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】', + 'uploader': '黑夜为猫', + 'uploader_id': '610729', + }, + 'params': { + # Just to test metadata extraction + 'skip_download': True, + }, + 'expected_warnings': ['upload time'], }] # BiliBili blocks keys from time to time. The current key is extracted from @@ -172,6 +188,7 @@ class BiliBiliIE(InfoExtractor): description = self._html_search_meta('description', webpage) datetime_str = self._html_search_regex( r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False) + timestamp = None if datetime_str: timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple()) From 50ce1c331c736d8219f3bf631ff069b9aecc48e3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 14:43:52 +0800 Subject: [PATCH 414/501] [downloader/external] Add another env for proxies in ffmpeg/avconv Related sources: https://git.libav.org/?p=libav.git;a=blob;f=libavformat/http.c;h=8fe8d11e1edfdbb04a8726db2c49cfef3f572aac;hb=HEAD#l152 https://git.libav.org/?p=libav.git;a=blob;f=libavformat/tls.c;h=fab243e93e20034e88e619188c13a44a5d8ccdb9;hb=HEAD#l63 https://github.com/FFmpeg/FFmpeg/blob/f8e89d8/libavformat/http.c#L191 https://github.com/FFmpeg/FFmpeg/blob/f8e89d8/libavformat/tls.c#L92 --- youtube_dl/downloader/external.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 3a73cee1c..3ff1f9ed4 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -210,6 +210,7 @@ class FFmpegFD(ExternalFD): # args += ['-http_proxy', proxy] env = os.environ.copy() compat_setenv('HTTP_PROXY', proxy, env=env) + compat_setenv('http_proxy', proxy, env=env) protocol = info_dict.get('protocol') From 22a0a95247c30b346592b6e3d464776bceb3b934 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 20:47:39 +0800 Subject: [PATCH 415/501] [theplatform] Some NBC videos require an additional cookie Related: #9578 --- youtube_dl/extractor/theplatform.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 02dbef913..5793ec6ef 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -14,11 +14,13 @@ from ..compat import ( compat_urllib_parse_urlparse, ) from ..utils import ( + determine_ext, ExtractorError, float_or_none, int_or_none, sanitized_Request, unsmuggle_url, + update_url_query, xpath_with_ns, mimetype2ext, find_xpath_attr, @@ -48,6 +50,12 @@ class ThePlatformBaseIE(OnceIE): if OnceIE.suitable(_format['url']): formats.extend(self._extract_once_formats(_format['url'])) else: + media_url = _format['url'] + if determine_ext(media_url) == 'm3u8': + hdnea2 = self._get_cookies(media_url).get('hdnea2') + if hdnea2: + _format['url'] = update_url_query(media_url, {'hdnea3': hdnea2.value}) + formats.append(_format) subtitles = self._parse_smil_subtitles(meta, default_ns) From e6e90515db983ca447cf7a59bbc153907d4fff4a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 20:50:01 +0800 Subject: [PATCH 416/501] [nbc] Add the test case from #9578 Closes #9578 --- youtube_dl/extractor/nbc.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 46504cd5f..f27c7f139 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -67,6 +67,23 @@ class NBCIE(InfoExtractor): # This video has expired but with an escaped embedURL 'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515', 'only_matching': True, + }, + { + # HLS streams requires the 'hdnea3' cookie + 'url': 'http://www.nbc.com/Kings/video/goliath/n1806', + 'info_dict': { + 'id': 'n1806', + 'ext': 'mp4', + 'title': 'Goliath', + 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.', + 'timestamp': 1237100400, + 'upload_date': '20090315', + 'uploader': 'NBCU-COM', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Only works from US', } ] From fc0a45fa416ad3e3ecf5936061efbb0328afa6b5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 21:12:14 +0800 Subject: [PATCH 417/501] [twitter] Detect suspended accounts and update _TESTS --- youtube_dl/extractor/twitter.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index ea673054f..129103c64 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -207,6 +207,7 @@ class TwitterIE(InfoExtractor): 'uploader_id': 'giphz', }, 'expected_warnings': ['height', 'width'], + 'skip': 'Account suspended', }, { 'url': 'https://twitter.com/starwars/status/665052190608723968', 'md5': '39b7199856dee6cd4432e72c74bc69d4', @@ -278,7 +279,11 @@ class TwitterIE(InfoExtractor): user_id = mobj.group('user_id') twid = mobj.group('id') - webpage = self._download_webpage(self._TEMPLATE_URL % (user_id, twid), twid) + webpage, urlh = self._download_webpage_handle( + self._TEMPLATE_URL % (user_id, twid), twid) + + if 'twitter.com/account/suspended' in urlh.geturl(): + raise ExtractorError('Account suspended by Twitter.', expected=True) username = remove_end(self._og_search_title(webpage), ' on Twitter') From c6308b3153acc57300f750f0061c63ffcba4d150 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 21:28:10 +0800 Subject: [PATCH 418/501] [twitter] Fix extraction for videos with HLS streams Closes #9623 --- youtube_dl/extractor/twitter.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 129103c64..76421e533 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, float_or_none, xpath_text, remove_end, @@ -116,13 +117,16 @@ class TwitterCardIE(TwitterBaseIE): video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source') if video_url: - f = { - 'url': video_url, - } + if determine_ext(video_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls')) + else: + f = { + 'url': video_url, + } - _search_dimensions_in_video_url(f, video_url) + _search_dimensions_in_video_url(f, video_url) - formats.append(f) + formats.append(f) vmap_url = config.get('vmapUrl') or config.get('vmap_url') if vmap_url: @@ -263,7 +267,6 @@ class TwitterIE(InfoExtractor): 'add_ie': ['Vine'], }, { 'url': 'https://twitter.com/captainamerica/status/719944021058060289', - # md5 constantly changes 'info_dict': { 'id': '719944021058060289', 'ext': 'mp4', @@ -272,6 +275,9 @@ class TwitterIE(InfoExtractor): 'uploader_id': 'captainamerica', 'uploader': 'Captain America', }, + 'params': { + 'skip_download': True, # requires ffmpeg + }, }] def _real_extract(self, url): From 6da8d7de69af144a96e9e50168e66f66af54129f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 21:48:12 +0800 Subject: [PATCH 419/501] [twitter] Update _TESTS --- youtube_dl/extractor/twitter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 76421e533..b73842986 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -53,7 +53,7 @@ class TwitterCardIE(TwitterBaseIE): 'id': 'dq4Oj5quskI', 'ext': 'mp4', 'title': 'Ubuntu 11.10 Overview', - 'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/', + 'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10...', 'upload_date': '20111013', 'uploader': 'OMG! Ubuntu!', 'uploader_id': 'omgubuntu', @@ -244,10 +244,10 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'jay - BEAT PROD: @suhmeduh #Damndaniel', - 'description': 'jay on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', + 'title': 'Donte The Dumbass - BEAT PROD: @suhmeduh #Damndaniel', + 'description': 'Donte The Dumbass on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', 'thumbnail': 're:^https?://.*\.jpg', - 'uploader': 'jay', + 'uploader': 'Donte The Dumbass', 'uploader_id': 'jaydingeer', }, 'params': { From 411c590a1f997f9efd71be8f434821acbf33a35f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 23:45:46 +0800 Subject: [PATCH 420/501] [youku:show] Add new extractor --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/youku.py | 52 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d107080f5..676a0400c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1013,7 +1013,10 @@ from .yesjapan import YesJapanIE from .yinyuetai import YinYueTaiIE from .ynet import YnetIE from .youjizz import YouJizzIE -from .youku import YoukuIE +from .youku import ( + YoukuIE, + YoukuShowIE, +) from .youporn import YouPornIE from .yourupload import YourUploadIE from .youtube import ( diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index dbccbe228..147608ebe 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -2,7 +2,9 @@ from __future__ import unicode_literals import base64 +import itertools import random +import re import string import time @@ -13,6 +15,7 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + get_element_by_attribute, sanitized_Request, ) @@ -285,3 +288,52 @@ class YoukuIE(InfoExtractor): 'title': title, 'entries': entries, } + + +class YoukuShowIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?youku\.com/show_page/id_(?P<id>[0-9a-z]+)\.html' + IE_NAME = 'youku:show' + + _TEST = { + 'url': 'http://www.youku.com/show_page/id_zc7c670be07ff11e48b3f.html', + 'info_dict': { + 'id': 'zc7c670be07ff11e48b3f', + 'title': '花千骨 未删减版', + 'description': 'md5:578d4f2145ae3f9128d9d4d863312910', + }, + 'playlist_count': 50, + } + + _PAGE_SIZE = 40 + + def _find_videos_in_page(self, webpage): + videos = re.findall( + r'<li><a[^>]+href="(?P<url>https?://v\.youku\.com/[^"]+)"[^>]+title="(?P<title>[^"]+)"', webpage) + return [ + self.url_result(video_url, YoukuIE.ie_key(), title) + for video_url, title in videos] + + def _real_extract(self, url): + show_id = self._match_id(url) + webpage = self._download_webpage(url, show_id) + + entries = self._find_videos_in_page(webpage) + + playlist_title = self._html_search_regex( + r'<span[^>]+class="name">([^<]+)</span>', webpage, 'playlist title', fatal=False) + detail_div = get_element_by_attribute('class', 'detail', webpage) or '' + playlist_description = self._html_search_regex( + r'<span[^>]+style="display:none"[^>]*>([^<]+)</span>', + detail_div, 'playlist description', fatal=False) + + for idx in itertools.count(1): + episodes_page = self._download_webpage( + 'http://www.youku.com/show_episode/id_%s.html' % show_id, + show_id, query={'divid': 'reload_%d' % (idx * self._PAGE_SIZE + 1)}, + note='Downloading episodes page %d' % idx) + new_entries = self._find_videos_in_page(episodes_page) + entries.extend(new_entries) + if len(new_entries) < self._PAGE_SIZE: + break + + return self.playlist_result(entries, show_id, playlist_title, playlist_description) From 11380753b5aa9d8128ef28a968ab325973276fa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:00:47 +0700 Subject: [PATCH 421/501] [vessel] Add support for embed urls and improve extraction --- youtube_dl/extractor/vessel.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index 1a0ff3395..e027c018b 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import json +import re from .common import InfoExtractor from ..utils import ( @@ -12,11 +13,11 @@ from ..utils import ( class VesselIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)' _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s' _LOGIN_URL = 'https://www.vessel.com/api/account/login' _NETRC_MACHINE = 'vessel' - _TEST = { + _TESTS = [{ 'url': 'https://www.vessel.com/videos/HDN7G5UMs', 'md5': '455cdf8beb71c6dd797fd2f3818d05c4', 'info_dict': { @@ -28,7 +29,16 @@ class VesselIE(InfoExtractor): 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?', 'timestamp': int, }, - } + }, { + 'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return [url for _, url in re.findall( + r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z]+.*?)\1', + webpage)] @staticmethod def make_json_request(url, data): @@ -98,16 +108,19 @@ class VesselIE(InfoExtractor): formats = [] for f in video_asset.get('sources', []): - if f['name'] == 'hls-index': + location = f.get('location') + if not location: + continue + if f.get('name') == 'hls-index': formats.extend(self._extract_m3u8_formats( - f['location'], video_id, ext='mp4', m3u8_id='m3u8')) + location, video_id, ext='mp4', m3u8_id='m3u8')) else: formats.append({ - 'format_id': f['name'], + 'format_id': f.get('name'), 'tbr': f.get('bitrate'), 'height': f.get('height'), 'width': f.get('width'), - 'url': f['location'], + 'url': location, }) self._sort_formats(formats) From 48a5eabc487058ccaa1076b74ad9106fc6019955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:02:27 +0700 Subject: [PATCH 422/501] [extractor/generic] Add support vessel embeds (Closes #7083) --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b4138381d..90575ab0e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -63,6 +63,7 @@ from .instagram import InstagramIE from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE from .theplatform import ThePlatformIE +from .vessel import VesselIE class GenericIE(InfoExtractor): @@ -1533,6 +1534,11 @@ class GenericIE(InfoExtractor): if tp_urls: return _playlist_from_matches(tp_urls, ie='ThePlatform') + # Look for Vessel embeds + vessel_urls = VesselIE._extract_urls(webpage) + if vessel_urls: + return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key()) + # Look for embedded rtl.nl player matches = re.findall( r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', From a479b8f687245a9cb1b5c25ed9ece28c4710981f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:09:32 +0700 Subject: [PATCH 423/501] [vessel] Use native hls by default --- youtube_dl/extractor/vessel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index e027c018b..59f2b4ba4 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -113,7 +113,8 @@ class VesselIE(InfoExtractor): continue if f.get('name') == 'hls-index': formats.extend(self._extract_m3u8_formats( - location, video_id, ext='mp4', m3u8_id='m3u8')) + location, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='m3u8')) else: formats.append({ 'format_id': f.get('name'), From 39da509f6712b6b0e9d52a9c9e990a5b5cd6c2ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:12:48 +0700 Subject: [PATCH 424/501] [vessel] Extract DASH formats --- youtube_dl/extractor/vessel.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index 59f2b4ba4..c53f44584 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -111,13 +111,17 @@ class VesselIE(InfoExtractor): location = f.get('location') if not location: continue - if f.get('name') == 'hls-index': + name = f.get('name') + if name == 'hls-index': formats.extend(self._extract_m3u8_formats( location, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='m3u8')) + elif name == 'dash-index': + formats.extend(self._extract_mpd_formats( + location, video_id, mpd_id='dash', fatal=False)) else: formats.append({ - 'format_id': f.get('name'), + 'format_id': name, 'tbr': f.get('bitrate'), 'height': f.get('height'), 'width': f.get('width'), From 9d51a0a9a19f07997cfb3ff1bb9fc9c1669a455c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:13:38 +0700 Subject: [PATCH 425/501] [vessel] Make hls formats non fatal --- youtube_dl/extractor/vessel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index c53f44584..2cd617b91 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -115,7 +115,7 @@ class VesselIE(InfoExtractor): if name == 'hls-index': formats.extend(self._extract_m3u8_formats( location, video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='m3u8')) + entry_protocol='m3u8_native', m3u8_id='m3u8', fatal=False)) elif name == 'dash-index': formats.extend(self._extract_mpd_formats( location, video_id, mpd_id='dash', fatal=False)) From be6217b26142491232fb697b125015d45437832d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 05:34:19 +0700 Subject: [PATCH 426/501] [YoutubeDL] Force string conversion on non string video ids --- youtube_dl/YoutubeDL.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 3917ca9dc..5036289b0 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1223,6 +1223,10 @@ class YoutubeDL(object): if 'title' not in info_dict: raise ExtractorError('Missing "title" field in extractor result') + if not isinstance(info_dict['id'], compat_str): + self.report_warning('"id" field is not a string - forcing string conversion') + info_dict['id'] = compat_str(info_dict['id']) + if 'playlist' not in info_dict: # It isn't part of a playlist info_dict['playlist'] = None From 6c33d24b46ecfb1f2ce790e21f2410149fdfb095 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 12:58:24 +0800 Subject: [PATCH 427/501] [utils] Add audio/mpeg to mimetype2ext() Used in WDR live radios (#6147) --- youtube_dl/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 89234b39d..229de4b39 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2020,6 +2020,9 @@ def mimetype2ext(mt): ext = { 'audio/mp4': 'm4a', + # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as + # it's the most popular one + 'audio/mpeg': 'mp3', }.get(mt) if ext is not None: return ext From 50918c4ee01be6c1218a72bef35838216b2bf8d1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 13:04:30 +0800 Subject: [PATCH 428/501] [wdr] Support radio players (closes #6147) --- youtube_dl/extractor/wdr.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 6174eb19f..059e2aa08 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -10,12 +10,13 @@ from ..utils import ( strip_jsonp, unified_strdate, ExtractorError, + urlhandle_detect_ext, ) class WDRIE(InfoExtractor): _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' - _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' + _PAGE_REGEX = r'/(?:mediathek/)?(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL _TESTS = [ @@ -97,6 +98,16 @@ class WDRIE(InfoExtractor): 'description': '- Die Sendung mit der Maus -', }, }, + { + 'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html', + 'info_dict': { + 'id': 'mdb-869971', + 'ext': 'mp3', + 'title': 'Funkhaus Europa Livestream', + 'description': 'md5:2309992a6716c347891c045be50992e4', + 'upload_date': '20160101', + }, + } ] def _real_extract(self, url): @@ -107,9 +118,10 @@ class WDRIE(InfoExtractor): webpage = self._download_webpage(url, display_id) # for wdr.de the data-extension is in a tag with the class "mediaLink" + # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" # for wdrmaus its in a link to the page in a multiline "videoLink"-tag json_metadata = self._html_search_regex( - r'class=(?:"mediaLink\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', + r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', webpage, 'media link', default=None, flags=re.MULTILINE) if not json_metadata: @@ -143,15 +155,22 @@ class WDRIE(InfoExtractor): for tag_name in ['videoURL', 'audioURL']: if tag_name in metadata_media_alt: alt_url = metadata_media_alt[tag_name] - if determine_ext(alt_url) == 'm3u8': + ext = determine_ext(alt_url) + if ext == 'm3u8': m3u_fmt = self._extract_m3u8_formats( alt_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls') formats.extend(m3u_fmt) else: - formats.append({ + a_format = { 'url': alt_url - }) + } + if ext == 'unknown_video': + urlh = self._request_webpage( + alt_url, display_id, note='Determining extension') + ext = urlhandle_detect_ext(urlh) + a_format['ext'] = ext + formats.append(a_format) # check if there are flash-streams for this video if 'dflt' in metadata_media_resource and 'videoURL' in metadata_media_resource['dflt']: From 6869d634c6d7482dd53034dec8a8f2f0b8e1f9b0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 13:41:12 +0800 Subject: [PATCH 429/501] [wdr] Simplify extraction --- youtube_dl/extractor/wdr.py | 64 ++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 059e2aa08..88369d3f2 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -10,6 +10,7 @@ from ..utils import ( strip_jsonp, unified_strdate, ExtractorError, + update_url_query, urlhandle_detect_ext, ) @@ -100,9 +101,10 @@ class WDRIE(InfoExtractor): }, { 'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html', + # Live stream, MD5 unstable 'info_dict': { 'id': 'mdb-869971', - 'ext': 'mp3', + 'ext': 'flv', 'title': 'Funkhaus Europa Livestream', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20160101', @@ -150,36 +152,38 @@ class WDRIE(InfoExtractor): formats = [] # check if the metadata contains a direct URL to a file - metadata_media_alt = metadata_media_resource.get('alt') - if metadata_media_alt: - for tag_name in ['videoURL', 'audioURL']: - if tag_name in metadata_media_alt: - alt_url = metadata_media_alt[tag_name] - ext = determine_ext(alt_url) - if ext == 'm3u8': - m3u_fmt = self._extract_m3u8_formats( - alt_url, display_id, 'mp4', 'm3u8_native', - m3u8_id='hls') - formats.extend(m3u_fmt) - else: - a_format = { - 'url': alt_url - } - if ext == 'unknown_video': - urlh = self._request_webpage( - alt_url, display_id, note='Determining extension') - ext = urlhandle_detect_ext(urlh) - a_format['ext'] = ext - formats.append(a_format) + for kind, media_resource in metadata_media_resource.items(): + if kind not in ('dflt', 'alt'): + continue - # check if there are flash-streams for this video - if 'dflt' in metadata_media_resource and 'videoURL' in metadata_media_resource['dflt']: - video_url = metadata_media_resource['dflt']['videoURL'] - if video_url.endswith('.f4m'): - full_video_url = video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' - formats.extend(self._extract_f4m_formats(full_video_url, display_id, f4m_id='hds', fatal=False)) - elif video_url.endswith('.smil'): - formats.extend(self._extract_smil_formats(video_url, 'stream', fatal=False)) + for tag_name, medium_url in media_resource.items(): + if tag_name not in ('videoURL', 'audioURL'): + continue + + ext = determine_ext(medium_url) + if ext == 'm3u8': + m3u_fmt = self._extract_m3u8_formats( + medium_url, display_id, 'mp4', 'm3u8_native', + m3u8_id='hls') + formats.extend(m3u_fmt) + elif ext == 'f4m': + manifest_url = update_url_query( + medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'}) + formats.extend(self._extract_f4m_formats( + manifest_url, display_id, f4m_id='hds', fatal=False)) + elif ext == 'smil': + formats.extend(self._extract_smil_formats( + medium_url, 'stream', fatal=False)) + else: + a_format = { + 'url': medium_url + } + if ext == 'unknown_video': + urlh = self._request_webpage( + medium_url, display_id, note='Determining extension') + ext = urlhandle_detect_ext(urlh) + a_format['ext'] = ext + formats.append(a_format) subtitles = {} caption_url = metadata_media_resource.get('captionURL') From 1594a4932f7e94287c32b5d4d63a60b57ffee96a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 13:49:35 +0800 Subject: [PATCH 430/501] [wdr] Misc changes --- youtube_dl/extractor/wdr.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 88369d3f2..a9238cbeb 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -6,10 +6,10 @@ import re from .common import InfoExtractor from ..utils import ( determine_ext, + ExtractorError, js_to_json, strip_jsonp, unified_strdate, - ExtractorError, update_url_query, urlhandle_detect_ext, ) @@ -17,7 +17,7 @@ from ..utils import ( class WDRIE(InfoExtractor): _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' - _PAGE_REGEX = r'/(?:mediathek/)?(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' + _PAGE_REGEX = r'/(?:mediathek/)?[^/]+/(?P<type>[^/]+)/(?P<display_id>.+)\.html' _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL _TESTS = [ @@ -162,10 +162,9 @@ class WDRIE(InfoExtractor): ext = determine_ext(medium_url) if ext == 'm3u8': - m3u_fmt = self._extract_m3u8_formats( + formats.extend(self._extract_m3u8_formats( medium_url, display_id, 'mp4', 'm3u8_native', - m3u8_id='hls') - formats.extend(m3u_fmt) + m3u8_id='hls')) elif ext == 'f4m': manifest_url = update_url_query( medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'}) @@ -185,6 +184,8 @@ class WDRIE(InfoExtractor): a_format['ext'] = ext formats.append(a_format) + self._sort_formats(formats) + subtitles = {} caption_url = metadata_media_resource.get('captionURL') if caption_url: @@ -206,8 +207,6 @@ class WDRIE(InfoExtractor): if upload_date: upload_date = unified_strdate(upload_date) - self._sort_formats(formats) - return { 'id': metadata_tracker_data.get('trackerClipId', display_id), 'display_id': display_id, From e2713d32f49f1bfa830cc755a96691c39da88290 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 19:00:13 +0800 Subject: [PATCH 431/501] [openload] Fix extraction. Thanks @perron375 for the solution Closes #9706 --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 5049b870e..1b57462b5 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -100,7 +100,7 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True) code = self._search_regex( - r'</video>\s*</div>\s*<script[^>]+>([^<]+)</script>', + r'</video>\s*</div>\s*<script[^>]+>[^>]+</script>\s*<script[^>]+>([^<]+)</script>', webpage, 'JS code') decoded = self.openload_decode(code) From 21efee5f8bc8daf0cbb5fc3408a1fc5b9d5eadcb Mon Sep 17 00:00:00 2001 From: N1k145 <N1k145@users.noreply.github.com> Date: Thu, 9 Jun 2016 12:13:15 +0200 Subject: [PATCH 432/501] [openload] Relax _VALID_URL [openload] added to _TESTS, removed escape --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 1b57462b5..6415b8fdc 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -14,7 +14,7 @@ from ..utils import ( class OpenloadIE(InfoExtractor): - _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-]+)' + _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -31,6 +31,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://openload.io/f/ZAn6oz-VZGE/', 'only_matching': True, + }, { + 'url': 'https://openload.co/f/_-ztPaZtMhM/', + 'only_matching': True, }, { # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout # for title and ext From bb1e44cc8ee7937422fb5635f3431feb6d5fd918 Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Fri, 27 May 2016 13:37:40 +0200 Subject: [PATCH 433/501] [godtv] Add extractor [GodTV] Improvements --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/godtv.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 youtube_dl/extractor/godtv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index aa98782a5..40dcfcde3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -292,6 +292,7 @@ from .globo import ( GloboArticleIE, ) from .godtube import GodTubeIE +from .godtv import GodTVIE from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE from .googledrive import GoogleDriveIE diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py new file mode 100644 index 000000000..50f093ace --- /dev/null +++ b/youtube_dl/extractor/godtv.py @@ -0,0 +1,29 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .ooyala import OoyalaIE + + +class GodTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)+/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham', + 'info_dict': { + 'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3', + 'ext': 'mp4', + 'title': 'Randy Needham', + 'duration': 3615.08, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + ooyala_id = self._search_regex(r'"content_id"\s*:\s*"([\w-]{32})"', webpage, display_id) + + return OoyalaIE._build_url_result(ooyala_id) From c0fed3bda50f77d063f3817cfbc3d8b81c18afa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 21:29:41 +0700 Subject: [PATCH 434/501] [godtv] Improve and add support for playlists (Closes #9608) --- youtube_dl/extractor/godtv.py | 36 +++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py index 50f093ace..78d638cf0 100644 --- a/youtube_dl/extractor/godtv.py +++ b/youtube_dl/extractor/godtv.py @@ -1,13 +1,13 @@ -# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor from .ooyala import OoyalaIE +from ..utils import js_to_json class GodTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)+/(?P<id>[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham', 'info_dict': { 'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3', @@ -18,12 +18,40 @@ class GodTVIE(InfoExtractor): 'params': { 'skip_download': True, } - } + }, { + 'url': 'http://god.tv/playlist/bible-study', + 'info_dict': { + 'id': 'bible-study', + }, + 'playlist_mincount': 37, + }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - ooyala_id = self._search_regex(r'"content_id"\s*:\s*"([\w-]{32})"', webpage, display_id) + + settings = self._parse_json( + self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'settings', default='{}'), + display_id, transform_source=js_to_json, fatal=False) + + ooyala_id = None + + if settings: + playlist = settings.get('playlist') + if playlist and isinstance(playlist, list): + entries = [ + OoyalaIE._build_url_result(video['content_id']) + for video in playlist if video.get('content_id')] + if entries: + return self.playlist_result(entries, display_id) + ooyala_id = settings.get('ooyala', {}).get('content_id') + + if not ooyala_id: + ooyala_id = self._search_regex( + r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1', + webpage, 'ooyala id', group='id') return OoyalaIE._build_url_result(ooyala_id) From 416878f41f3b33cf1b10b0b30093dcd7a90bdbfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 21:33:51 +0700 Subject: [PATCH 435/501] [godtv] Add more tests --- youtube_dl/extractor/godtv.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py index 78d638cf0..7deca00aa 100644 --- a/youtube_dl/extractor/godtv.py +++ b/youtube_dl/extractor/godtv.py @@ -24,6 +24,12 @@ class GodTVIE(InfoExtractor): 'id': 'bible-study', }, 'playlist_mincount': 37, + }, { + 'url': 'http://god.tv/node/15097', + 'only_matching': True, + }, { + 'url': 'http://god.tv/live/africa', + 'only_matching': True, }] def _real_extract(self, url): From b0aebe702c538010fd92cd0807963293f112adcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 21:34:47 +0700 Subject: [PATCH 436/501] [godtv] Relax _VALID_URL --- youtube_dl/extractor/godtv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py index 7deca00aa..c5d3b4e6a 100644 --- a/youtube_dl/extractor/godtv.py +++ b/youtube_dl/extractor/godtv.py @@ -6,7 +6,7 @@ from ..utils import js_to_json class GodTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)+/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham', 'info_dict': { @@ -30,6 +30,9 @@ class GodTVIE(InfoExtractor): }, { 'url': 'http://god.tv/live/africa', 'only_matching': True, + }, { + 'url': 'http://god.tv/liveevents', + 'only_matching': True, }] def _real_extract(self, url): From bc7e7adf5154f15b74b2df3e2989f630667778ce Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 21:40:16 +0800 Subject: [PATCH 437/501] [wdr] Subtitles are TTML --- youtube_dl/extractor/wdr.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index a9238cbeb..6b83a2a04 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -34,7 +34,8 @@ class WDRIE(InfoExtractor): 'description': 'md5:87be8ff14d8dfd7a7ee46f0299b52318', 'is_live': False, 'subtitles': {'de': [{ - 'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml' + 'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml', + 'ext': 'ttml', }]}, }, }, @@ -190,7 +191,8 @@ class WDRIE(InfoExtractor): caption_url = metadata_media_resource.get('captionURL') if caption_url: subtitles['de'] = [{ - 'url': caption_url + 'url': caption_url, + 'ext': 'ttml', }] title = metadata_tracker_data.get('trackerClipTitle') From 55290788d352168844c8e64d64428a76baa63eea Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 12:28:09 +0800 Subject: [PATCH 438/501] [yahoo] Yahoo doesn't like region names in lower cases Fix test_Yahoo_7 --- youtube_dl/extractor/yahoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b376f2b93..927a964a4 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -343,7 +343,7 @@ class YahooIE(InfoExtractor): webpage, 'region', fatal=False, default='US') data = compat_urllib_parse_urlencode({ 'protocol': 'http', - 'region': region, + 'region': region.upper(), }) query_url = ( 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' From 506d0e96936f84c2b21c7ed37f4a7fca2eec86a2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 12:29:58 +0800 Subject: [PATCH 439/501] [xuite] Skip the invalid test --- youtube_dl/extractor/xuite.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index 2466410fa..0be8932ad 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -66,6 +66,7 @@ class XuiteIE(InfoExtractor): 'uploader_id': '242127761', 'categories': ['電玩動漫'], }, + 'skip': 'Video removed', }, { 'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9', 'only_matching': True, From 436214baf70c1a50fbaf1fbfca4b48f33695590c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 12:31:06 +0800 Subject: [PATCH 440/501] [xfileshare] Skip an invalid test --- youtube_dl/extractor/xfileshare.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 769003735..ee4d04c20 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -62,7 +62,8 @@ class XFileShareIE(InfoExtractor): 'ext': 'mp4', 'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4', 'thumbnail': 're:http://.*\.jpg', - } + }, + 'skip': 'Video removed', }, { 'url': 'http://vidto.me/ku5glz52nqe1.html', 'info_dict': { From e1e0a10c567e8457bf83f6b54e65963447e17a8f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 12:33:31 +0800 Subject: [PATCH 441/501] [weibo] Remove the extractor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Weibo weishipin (微視頻, tiny videos) service is dead and now all videos are hosted on Sina videos, which is covered by sina.py --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/weibo.py | 49 ------------------------------ 2 files changed, 50 deletions(-) delete mode 100644 youtube_dl/extractor/weibo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 40dcfcde3..0789e4a6e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -974,7 +974,6 @@ from .webofstories import ( WebOfStoriesIE, WebOfStoriesPlaylistIE, ) -from .weibo import WeiboIE from .weiqitv import WeiqiTVIE from .wimp import WimpIE from .wistia import WistiaIE diff --git a/youtube_dl/extractor/weibo.py b/youtube_dl/extractor/weibo.py deleted file mode 100644 index 20bb039d3..000000000 --- a/youtube_dl/extractor/weibo.py +++ /dev/null @@ -1,49 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class WeiboIE(InfoExtractor): - """ - The videos in Weibo come from different sites, this IE just finds the link - to the external video and returns it. - """ - _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm' - - _TEST = { - 'url': 'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm', - 'info_dict': { - 'id': '98322879', - 'ext': 'flv', - 'title': '魔声耳机最新广告“All Eyes On Us”', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Sina'], - } - - # Additional example videos from different sites - # Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm - # 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) - video_id = mobj.group('id') - info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id - info = self._download_json(info_url, video_id) - - videos_urls = map(lambda v: v['play_page_url'], info['result']['data']) - # Prefer sina video since they have thumbnails - videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u) - player_url = videos_urls[-1] - m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html', - player_url) - if m_sina is not None: - self.to_screen('Sina video detected') - sina_id = m_sina.group(1) - player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id - return self.url_result(player_url) From 3e74b444e7324fdda956aa816240b938eabf9c93 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 13:13:59 +0800 Subject: [PATCH 442/501] [vulture] Remove the extractor The first 10 URLs in google search "site:http://video.vulture.com/video" is dead. I guess Vulture does not host videos on their own anymore. --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/vulture.py | 69 ------------------------------ 2 files changed, 70 deletions(-) delete mode 100644 youtube_dl/extractor/vulture.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0789e4a6e..38708294a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -958,7 +958,6 @@ from .vporn import VpornIE from .vrt import VRTIE from .vube import VubeIE from .vuclip import VuClipIE -from .vulture import VultureIE from .walla import WallaIE from .washingtonpost import ( WashingtonPostIE, diff --git a/youtube_dl/extractor/vulture.py b/youtube_dl/extractor/vulture.py deleted file mode 100644 index faa167e65..000000000 --- a/youtube_dl/extractor/vulture.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import unicode_literals - -import json -import os.path -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_iso8601, -) - - -class VultureIE(InfoExtractor): - IE_NAME = 'vulture.com' - _VALID_URL = r'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/' - _TEST = { - 'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1', - 'md5': '8d997845642a2b5152820f7257871bc8', - 'info_dict': { - 'id': '6GHRQL3RV7MSD1H4', - 'ext': 'mp4', - 'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED', - 'uploader_id': 'Sarah', - 'thumbnail': 're:^http://.*\.jpg$', - 'timestamp': 1401288564, - 'upload_date': '20140528', - 'description': 'Uplifting and witty, as predicted.', - 'duration': 1015, - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - query_string = self._search_regex( - r"queryString\s*=\s*'([^']+)'", webpage, 'query string') - video_id = self._search_regex( - r'content=([^&]+)', query_string, 'video ID') - query_url = 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string - - query_webpage = self._download_webpage( - query_url, display_id, note='Downloading query page') - params_json = self._search_regex( - r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n?,\n', - query_webpage, - 'player params') - params = json.loads(params_json) - - upload_timestamp = parse_iso8601(params['posted'].replace(' ', 'T')) - uploader_id = params.get('user', {}).get('handle') - - media_item = params['media_item'] - title = os.path.splitext(media_item['title'])[0] - duration = int_or_none(media_item.get('duration_seconds')) - - return { - 'id': video_id, - 'display_id': display_id, - 'url': media_item['pipeline_xid'], - 'title': title, - 'timestamp': upload_timestamp, - 'thumbnail': params.get('thumbnail_url'), - 'uploader_id': uploader_id, - 'description': params.get('description'), - 'duration': duration, - } From 5de008e8c3e4058c20956d19f69ac3347a2722e0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 13:31:55 +0800 Subject: [PATCH 443/501] [nbcnews] Support embed widgets Used in some Vulture videos --- youtube_dl/extractor/nbc.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index f27c7f139..6b7da1149 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -266,6 +266,11 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952', 'only_matching': True, }, + { + # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html + 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682', + 'only_matching': True, + }, ] def _real_extract(self, url): @@ -289,18 +294,17 @@ class NBCNewsIE(ThePlatformIE): webpage = self._download_webpage(url, display_id) info = None bootstrap_json = self._search_regex( - r'(?m)var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$', + [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', + r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'], webpage, 'bootstrap json', default=None) - if bootstrap_json: - bootstrap = self._parse_json(bootstrap_json, display_id) + bootstrap = self._parse_json( + bootstrap_json, display_id, transform_source=unescapeHTML) + if 'results' in bootstrap: info = bootstrap['results'][0]['video'] + elif 'video' in bootstrap: + info = bootstrap['video'] else: - player_instance_json = self._search_regex( - r'videoObj\s*:\s*({.+})', webpage, 'player instance', default=None) - if not player_instance_json: - player_instance_json = self._html_search_regex( - r'data-video="([^"]+)"', webpage, 'video json') - info = self._parse_json(player_instance_json, display_id) + info = bootstrap video_id = info['mpxId'] title = info['title'] From de3eb07ed64e3d50164a6db59385a94f2675b0b4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 13:32:59 +0800 Subject: [PATCH 444/501] [generic] Detect NBC News embeds --- youtube_dl/extractor/generic.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 90575ab0e..36a3d91fc 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1032,6 +1032,17 @@ class GenericIE(InfoExtractor): 'timestamp': 1389118457, }, }, + # NBC News embed + { + 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html', + 'md5': '1aa589c675898ae6d37a17913cf68d66', + 'info_dict': { + 'id': '701714499682', + 'ext': 'mp4', + 'title': 'PREVIEW: On Assignment: David Letterman', + 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.', + }, + }, # UDN embed { 'url': 'https://video.udn.com/news/300346', @@ -1966,6 +1977,12 @@ class GenericIE(InfoExtractor): if nbc_sports_url: return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') + # Look for NBC News embeds + nbc_news_embed_url = re.search( + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage) + if nbc_news_embed_url: + return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews') + # Look for Google Drive embeds google_drive_url = GoogleDriveIE._extract_url(webpage) if google_drive_url: From cc4444662c54c24f6f82efd3ba5e60e9556d88b8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 13:33:59 +0800 Subject: [PATCH 445/501] [generic] Remove Vulture embed detection Vulture.com videos now hosts on YouTube, Vimeo, MTV, NBC News or Hulu. Here's an example of Hulu: http://www.vulture.com/2016/06/kimmel-interviews-mariah-carey-in-a-bathtub.html --- youtube_dl/extractor/generic.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 36a3d91fc..798c109c6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1857,14 +1857,6 @@ class GenericIE(InfoExtractor): url = unescapeHTML(mobj.group('url')) return self.url_result(url) - # Look for embedded vulture.com player - mobj = re.search( - r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"', - webpage) - if mobj is not None: - url = unescapeHTML(mobj.group('url')) - return self.url_result(url, ie='Vulture') - # Look for embedded mtvservices player mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage) if mtvservices_url: From 9631a94fb5e5ee9b92135f938df00866535fc6c6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 15:05:24 +0800 Subject: [PATCH 446/501] [compat] Add compat_html_entities_html5 Used in tset_Vporn_1. Also Related to #9270 --- youtube_dl/compat.py | 2240 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 2239 insertions(+), 1 deletion(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index e3cab4dd0..0243949a4 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -63,6 +63,2244 @@ try: except ImportError: # Python 2 import htmlentitydefs as compat_html_entities +try: # Python >= 3.3 + from compat_html_entities import html as compat_html_entities_html5 +except ImportError: + # Copied from CPython 3.5.1 html/entities.py + compat_html_entities_html5 = { + 'Aacute': '\xc1', + 'aacute': '\xe1', + 'Aacute;': '\xc1', + 'aacute;': '\xe1', + 'Abreve;': '\u0102', + 'abreve;': '\u0103', + 'ac;': '\u223e', + 'acd;': '\u223f', + 'acE;': '\u223e\u0333', + 'Acirc': '\xc2', + 'acirc': '\xe2', + 'Acirc;': '\xc2', + 'acirc;': '\xe2', + 'acute': '\xb4', + 'acute;': '\xb4', + 'Acy;': '\u0410', + 'acy;': '\u0430', + 'AElig': '\xc6', + 'aelig': '\xe6', + 'AElig;': '\xc6', + 'aelig;': '\xe6', + 'af;': '\u2061', + 'Afr;': '\U0001d504', + 'afr;': '\U0001d51e', + 'Agrave': '\xc0', + 'agrave': '\xe0', + 'Agrave;': '\xc0', + 'agrave;': '\xe0', + 'alefsym;': '\u2135', + 'aleph;': '\u2135', + 'Alpha;': '\u0391', + 'alpha;': '\u03b1', + 'Amacr;': '\u0100', + 'amacr;': '\u0101', + 'amalg;': '\u2a3f', + 'AMP': '&', + 'amp': '&', + 'AMP;': '&', + 'amp;': '&', + 'And;': '\u2a53', + 'and;': '\u2227', + 'andand;': '\u2a55', + 'andd;': '\u2a5c', + 'andslope;': '\u2a58', + 'andv;': '\u2a5a', + 'ang;': '\u2220', + 'ange;': '\u29a4', + 'angle;': '\u2220', + 'angmsd;': '\u2221', + 'angmsdaa;': '\u29a8', + 'angmsdab;': '\u29a9', + 'angmsdac;': '\u29aa', + 'angmsdad;': '\u29ab', + 'angmsdae;': '\u29ac', + 'angmsdaf;': '\u29ad', + 'angmsdag;': '\u29ae', + 'angmsdah;': '\u29af', + 'angrt;': '\u221f', + 'angrtvb;': '\u22be', + 'angrtvbd;': '\u299d', + 'angsph;': '\u2222', + 'angst;': '\xc5', + 'angzarr;': '\u237c', + 'Aogon;': '\u0104', + 'aogon;': '\u0105', + 'Aopf;': '\U0001d538', + 'aopf;': '\U0001d552', + 'ap;': '\u2248', + 'apacir;': '\u2a6f', + 'apE;': '\u2a70', + 'ape;': '\u224a', + 'apid;': '\u224b', + 'apos;': "'", + 'ApplyFunction;': '\u2061', + 'approx;': '\u2248', + 'approxeq;': '\u224a', + 'Aring': '\xc5', + 'aring': '\xe5', + 'Aring;': '\xc5', + 'aring;': '\xe5', + 'Ascr;': '\U0001d49c', + 'ascr;': '\U0001d4b6', + 'Assign;': '\u2254', + 'ast;': '*', + 'asymp;': '\u2248', + 'asympeq;': '\u224d', + 'Atilde': '\xc3', + 'atilde': '\xe3', + 'Atilde;': '\xc3', + 'atilde;': '\xe3', + 'Auml': '\xc4', + 'auml': '\xe4', + 'Auml;': '\xc4', + 'auml;': '\xe4', + 'awconint;': '\u2233', + 'awint;': '\u2a11', + 'backcong;': '\u224c', + 'backepsilon;': '\u03f6', + 'backprime;': '\u2035', + 'backsim;': '\u223d', + 'backsimeq;': '\u22cd', + 'Backslash;': '\u2216', + 'Barv;': '\u2ae7', + 'barvee;': '\u22bd', + 'Barwed;': '\u2306', + 'barwed;': '\u2305', + 'barwedge;': '\u2305', + 'bbrk;': '\u23b5', + 'bbrktbrk;': '\u23b6', + 'bcong;': '\u224c', + 'Bcy;': '\u0411', + 'bcy;': '\u0431', + 'bdquo;': '\u201e', + 'becaus;': '\u2235', + 'Because;': '\u2235', + 'because;': '\u2235', + 'bemptyv;': '\u29b0', + 'bepsi;': '\u03f6', + 'bernou;': '\u212c', + 'Bernoullis;': '\u212c', + 'Beta;': '\u0392', + 'beta;': '\u03b2', + 'beth;': '\u2136', + 'between;': '\u226c', + 'Bfr;': '\U0001d505', + 'bfr;': '\U0001d51f', + 'bigcap;': '\u22c2', + 'bigcirc;': '\u25ef', + 'bigcup;': '\u22c3', + 'bigodot;': '\u2a00', + 'bigoplus;': '\u2a01', + 'bigotimes;': '\u2a02', + 'bigsqcup;': '\u2a06', + 'bigstar;': '\u2605', + 'bigtriangledown;': '\u25bd', + 'bigtriangleup;': '\u25b3', + 'biguplus;': '\u2a04', + 'bigvee;': '\u22c1', + 'bigwedge;': '\u22c0', + 'bkarow;': '\u290d', + 'blacklozenge;': '\u29eb', + 'blacksquare;': '\u25aa', + 'blacktriangle;': '\u25b4', + 'blacktriangledown;': '\u25be', + 'blacktriangleleft;': '\u25c2', + 'blacktriangleright;': '\u25b8', + 'blank;': '\u2423', + 'blk12;': '\u2592', + 'blk14;': '\u2591', + 'blk34;': '\u2593', + 'block;': '\u2588', + 'bne;': '=\u20e5', + 'bnequiv;': '\u2261\u20e5', + 'bNot;': '\u2aed', + 'bnot;': '\u2310', + 'Bopf;': '\U0001d539', + 'bopf;': '\U0001d553', + 'bot;': '\u22a5', + 'bottom;': '\u22a5', + 'bowtie;': '\u22c8', + 'boxbox;': '\u29c9', + 'boxDL;': '\u2557', + 'boxDl;': '\u2556', + 'boxdL;': '\u2555', + 'boxdl;': '\u2510', + 'boxDR;': '\u2554', + 'boxDr;': '\u2553', + 'boxdR;': '\u2552', + 'boxdr;': '\u250c', + 'boxH;': '\u2550', + 'boxh;': '\u2500', + 'boxHD;': '\u2566', + 'boxHd;': '\u2564', + 'boxhD;': '\u2565', + 'boxhd;': '\u252c', + 'boxHU;': '\u2569', + 'boxHu;': '\u2567', + 'boxhU;': '\u2568', + 'boxhu;': '\u2534', + 'boxminus;': '\u229f', + 'boxplus;': '\u229e', + 'boxtimes;': '\u22a0', + 'boxUL;': '\u255d', + 'boxUl;': '\u255c', + 'boxuL;': '\u255b', + 'boxul;': '\u2518', + 'boxUR;': '\u255a', + 'boxUr;': '\u2559', + 'boxuR;': '\u2558', + 'boxur;': '\u2514', + 'boxV;': '\u2551', + 'boxv;': '\u2502', + 'boxVH;': '\u256c', + 'boxVh;': '\u256b', + 'boxvH;': '\u256a', + 'boxvh;': '\u253c', + 'boxVL;': '\u2563', + 'boxVl;': '\u2562', + 'boxvL;': '\u2561', + 'boxvl;': '\u2524', + 'boxVR;': '\u2560', + 'boxVr;': '\u255f', + 'boxvR;': '\u255e', + 'boxvr;': '\u251c', + 'bprime;': '\u2035', + 'Breve;': '\u02d8', + 'breve;': '\u02d8', + 'brvbar': '\xa6', + 'brvbar;': '\xa6', + 'Bscr;': '\u212c', + 'bscr;': '\U0001d4b7', + 'bsemi;': '\u204f', + 'bsim;': '\u223d', + 'bsime;': '\u22cd', + 'bsol;': '\\', + 'bsolb;': '\u29c5', + 'bsolhsub;': '\u27c8', + 'bull;': '\u2022', + 'bullet;': '\u2022', + 'bump;': '\u224e', + 'bumpE;': '\u2aae', + 'bumpe;': '\u224f', + 'Bumpeq;': '\u224e', + 'bumpeq;': '\u224f', + 'Cacute;': '\u0106', + 'cacute;': '\u0107', + 'Cap;': '\u22d2', + 'cap;': '\u2229', + 'capand;': '\u2a44', + 'capbrcup;': '\u2a49', + 'capcap;': '\u2a4b', + 'capcup;': '\u2a47', + 'capdot;': '\u2a40', + 'CapitalDifferentialD;': '\u2145', + 'caps;': '\u2229\ufe00', + 'caret;': '\u2041', + 'caron;': '\u02c7', + 'Cayleys;': '\u212d', + 'ccaps;': '\u2a4d', + 'Ccaron;': '\u010c', + 'ccaron;': '\u010d', + 'Ccedil': '\xc7', + 'ccedil': '\xe7', + 'Ccedil;': '\xc7', + 'ccedil;': '\xe7', + 'Ccirc;': '\u0108', + 'ccirc;': '\u0109', + 'Cconint;': '\u2230', + 'ccups;': '\u2a4c', + 'ccupssm;': '\u2a50', + 'Cdot;': '\u010a', + 'cdot;': '\u010b', + 'cedil': '\xb8', + 'cedil;': '\xb8', + 'Cedilla;': '\xb8', + 'cemptyv;': '\u29b2', + 'cent': '\xa2', + 'cent;': '\xa2', + 'CenterDot;': '\xb7', + 'centerdot;': '\xb7', + 'Cfr;': '\u212d', + 'cfr;': '\U0001d520', + 'CHcy;': '\u0427', + 'chcy;': '\u0447', + 'check;': '\u2713', + 'checkmark;': '\u2713', + 'Chi;': '\u03a7', + 'chi;': '\u03c7', + 'cir;': '\u25cb', + 'circ;': '\u02c6', + 'circeq;': '\u2257', + 'circlearrowleft;': '\u21ba', + 'circlearrowright;': '\u21bb', + 'circledast;': '\u229b', + 'circledcirc;': '\u229a', + 'circleddash;': '\u229d', + 'CircleDot;': '\u2299', + 'circledR;': '\xae', + 'circledS;': '\u24c8', + 'CircleMinus;': '\u2296', + 'CirclePlus;': '\u2295', + 'CircleTimes;': '\u2297', + 'cirE;': '\u29c3', + 'cire;': '\u2257', + 'cirfnint;': '\u2a10', + 'cirmid;': '\u2aef', + 'cirscir;': '\u29c2', + 'ClockwiseContourIntegral;': '\u2232', + 'CloseCurlyDoubleQuote;': '\u201d', + 'CloseCurlyQuote;': '\u2019', + 'clubs;': '\u2663', + 'clubsuit;': '\u2663', + 'Colon;': '\u2237', + 'colon;': ':', + 'Colone;': '\u2a74', + 'colone;': '\u2254', + 'coloneq;': '\u2254', + 'comma;': ',', + 'commat;': '@', + 'comp;': '\u2201', + 'compfn;': '\u2218', + 'complement;': '\u2201', + 'complexes;': '\u2102', + 'cong;': '\u2245', + 'congdot;': '\u2a6d', + 'Congruent;': '\u2261', + 'Conint;': '\u222f', + 'conint;': '\u222e', + 'ContourIntegral;': '\u222e', + 'Copf;': '\u2102', + 'copf;': '\U0001d554', + 'coprod;': '\u2210', + 'Coproduct;': '\u2210', + 'COPY': '\xa9', + 'copy': '\xa9', + 'COPY;': '\xa9', + 'copy;': '\xa9', + 'copysr;': '\u2117', + 'CounterClockwiseContourIntegral;': '\u2233', + 'crarr;': '\u21b5', + 'Cross;': '\u2a2f', + 'cross;': '\u2717', + 'Cscr;': '\U0001d49e', + 'cscr;': '\U0001d4b8', + 'csub;': '\u2acf', + 'csube;': '\u2ad1', + 'csup;': '\u2ad0', + 'csupe;': '\u2ad2', + 'ctdot;': '\u22ef', + 'cudarrl;': '\u2938', + 'cudarrr;': '\u2935', + 'cuepr;': '\u22de', + 'cuesc;': '\u22df', + 'cularr;': '\u21b6', + 'cularrp;': '\u293d', + 'Cup;': '\u22d3', + 'cup;': '\u222a', + 'cupbrcap;': '\u2a48', + 'CupCap;': '\u224d', + 'cupcap;': '\u2a46', + 'cupcup;': '\u2a4a', + 'cupdot;': '\u228d', + 'cupor;': '\u2a45', + 'cups;': '\u222a\ufe00', + 'curarr;': '\u21b7', + 'curarrm;': '\u293c', + 'curlyeqprec;': '\u22de', + 'curlyeqsucc;': '\u22df', + 'curlyvee;': '\u22ce', + 'curlywedge;': '\u22cf', + 'curren': '\xa4', + 'curren;': '\xa4', + 'curvearrowleft;': '\u21b6', + 'curvearrowright;': '\u21b7', + 'cuvee;': '\u22ce', + 'cuwed;': '\u22cf', + 'cwconint;': '\u2232', + 'cwint;': '\u2231', + 'cylcty;': '\u232d', + 'Dagger;': '\u2021', + 'dagger;': '\u2020', + 'daleth;': '\u2138', + 'Darr;': '\u21a1', + 'dArr;': '\u21d3', + 'darr;': '\u2193', + 'dash;': '\u2010', + 'Dashv;': '\u2ae4', + 'dashv;': '\u22a3', + 'dbkarow;': '\u290f', + 'dblac;': '\u02dd', + 'Dcaron;': '\u010e', + 'dcaron;': '\u010f', + 'Dcy;': '\u0414', + 'dcy;': '\u0434', + 'DD;': '\u2145', + 'dd;': '\u2146', + 'ddagger;': '\u2021', + 'ddarr;': '\u21ca', + 'DDotrahd;': '\u2911', + 'ddotseq;': '\u2a77', + 'deg': '\xb0', + 'deg;': '\xb0', + 'Del;': '\u2207', + 'Delta;': '\u0394', + 'delta;': '\u03b4', + 'demptyv;': '\u29b1', + 'dfisht;': '\u297f', + 'Dfr;': '\U0001d507', + 'dfr;': '\U0001d521', + 'dHar;': '\u2965', + 'dharl;': '\u21c3', + 'dharr;': '\u21c2', + 'DiacriticalAcute;': '\xb4', + 'DiacriticalDot;': '\u02d9', + 'DiacriticalDoubleAcute;': '\u02dd', + 'DiacriticalGrave;': '`', + 'DiacriticalTilde;': '\u02dc', + 'diam;': '\u22c4', + 'Diamond;': '\u22c4', + 'diamond;': '\u22c4', + 'diamondsuit;': '\u2666', + 'diams;': '\u2666', + 'die;': '\xa8', + 'DifferentialD;': '\u2146', + 'digamma;': '\u03dd', + 'disin;': '\u22f2', + 'div;': '\xf7', + 'divide': '\xf7', + 'divide;': '\xf7', + 'divideontimes;': '\u22c7', + 'divonx;': '\u22c7', + 'DJcy;': '\u0402', + 'djcy;': '\u0452', + 'dlcorn;': '\u231e', + 'dlcrop;': '\u230d', + 'dollar;': '$', + 'Dopf;': '\U0001d53b', + 'dopf;': '\U0001d555', + 'Dot;': '\xa8', + 'dot;': '\u02d9', + 'DotDot;': '\u20dc', + 'doteq;': '\u2250', + 'doteqdot;': '\u2251', + 'DotEqual;': '\u2250', + 'dotminus;': '\u2238', + 'dotplus;': '\u2214', + 'dotsquare;': '\u22a1', + 'doublebarwedge;': '\u2306', + 'DoubleContourIntegral;': '\u222f', + 'DoubleDot;': '\xa8', + 'DoubleDownArrow;': '\u21d3', + 'DoubleLeftArrow;': '\u21d0', + 'DoubleLeftRightArrow;': '\u21d4', + 'DoubleLeftTee;': '\u2ae4', + 'DoubleLongLeftArrow;': '\u27f8', + 'DoubleLongLeftRightArrow;': '\u27fa', + 'DoubleLongRightArrow;': '\u27f9', + 'DoubleRightArrow;': '\u21d2', + 'DoubleRightTee;': '\u22a8', + 'DoubleUpArrow;': '\u21d1', + 'DoubleUpDownArrow;': '\u21d5', + 'DoubleVerticalBar;': '\u2225', + 'DownArrow;': '\u2193', + 'Downarrow;': '\u21d3', + 'downarrow;': '\u2193', + 'DownArrowBar;': '\u2913', + 'DownArrowUpArrow;': '\u21f5', + 'DownBreve;': '\u0311', + 'downdownarrows;': '\u21ca', + 'downharpoonleft;': '\u21c3', + 'downharpoonright;': '\u21c2', + 'DownLeftRightVector;': '\u2950', + 'DownLeftTeeVector;': '\u295e', + 'DownLeftVector;': '\u21bd', + 'DownLeftVectorBar;': '\u2956', + 'DownRightTeeVector;': '\u295f', + 'DownRightVector;': '\u21c1', + 'DownRightVectorBar;': '\u2957', + 'DownTee;': '\u22a4', + 'DownTeeArrow;': '\u21a7', + 'drbkarow;': '\u2910', + 'drcorn;': '\u231f', + 'drcrop;': '\u230c', + 'Dscr;': '\U0001d49f', + 'dscr;': '\U0001d4b9', + 'DScy;': '\u0405', + 'dscy;': '\u0455', + 'dsol;': '\u29f6', + 'Dstrok;': '\u0110', + 'dstrok;': '\u0111', + 'dtdot;': '\u22f1', + 'dtri;': '\u25bf', + 'dtrif;': '\u25be', + 'duarr;': '\u21f5', + 'duhar;': '\u296f', + 'dwangle;': '\u29a6', + 'DZcy;': '\u040f', + 'dzcy;': '\u045f', + 'dzigrarr;': '\u27ff', + 'Eacute': '\xc9', + 'eacute': '\xe9', + 'Eacute;': '\xc9', + 'eacute;': '\xe9', + 'easter;': '\u2a6e', + 'Ecaron;': '\u011a', + 'ecaron;': '\u011b', + 'ecir;': '\u2256', + 'Ecirc': '\xca', + 'ecirc': '\xea', + 'Ecirc;': '\xca', + 'ecirc;': '\xea', + 'ecolon;': '\u2255', + 'Ecy;': '\u042d', + 'ecy;': '\u044d', + 'eDDot;': '\u2a77', + 'Edot;': '\u0116', + 'eDot;': '\u2251', + 'edot;': '\u0117', + 'ee;': '\u2147', + 'efDot;': '\u2252', + 'Efr;': '\U0001d508', + 'efr;': '\U0001d522', + 'eg;': '\u2a9a', + 'Egrave': '\xc8', + 'egrave': '\xe8', + 'Egrave;': '\xc8', + 'egrave;': '\xe8', + 'egs;': '\u2a96', + 'egsdot;': '\u2a98', + 'el;': '\u2a99', + 'Element;': '\u2208', + 'elinters;': '\u23e7', + 'ell;': '\u2113', + 'els;': '\u2a95', + 'elsdot;': '\u2a97', + 'Emacr;': '\u0112', + 'emacr;': '\u0113', + 'empty;': '\u2205', + 'emptyset;': '\u2205', + 'EmptySmallSquare;': '\u25fb', + 'emptyv;': '\u2205', + 'EmptyVerySmallSquare;': '\u25ab', + 'emsp13;': '\u2004', + 'emsp14;': '\u2005', + 'emsp;': '\u2003', + 'ENG;': '\u014a', + 'eng;': '\u014b', + 'ensp;': '\u2002', + 'Eogon;': '\u0118', + 'eogon;': '\u0119', + 'Eopf;': '\U0001d53c', + 'eopf;': '\U0001d556', + 'epar;': '\u22d5', + 'eparsl;': '\u29e3', + 'eplus;': '\u2a71', + 'epsi;': '\u03b5', + 'Epsilon;': '\u0395', + 'epsilon;': '\u03b5', + 'epsiv;': '\u03f5', + 'eqcirc;': '\u2256', + 'eqcolon;': '\u2255', + 'eqsim;': '\u2242', + 'eqslantgtr;': '\u2a96', + 'eqslantless;': '\u2a95', + 'Equal;': '\u2a75', + 'equals;': '=', + 'EqualTilde;': '\u2242', + 'equest;': '\u225f', + 'Equilibrium;': '\u21cc', + 'equiv;': '\u2261', + 'equivDD;': '\u2a78', + 'eqvparsl;': '\u29e5', + 'erarr;': '\u2971', + 'erDot;': '\u2253', + 'Escr;': '\u2130', + 'escr;': '\u212f', + 'esdot;': '\u2250', + 'Esim;': '\u2a73', + 'esim;': '\u2242', + 'Eta;': '\u0397', + 'eta;': '\u03b7', + 'ETH': '\xd0', + 'eth': '\xf0', + 'ETH;': '\xd0', + 'eth;': '\xf0', + 'Euml': '\xcb', + 'euml': '\xeb', + 'Euml;': '\xcb', + 'euml;': '\xeb', + 'euro;': '\u20ac', + 'excl;': '!', + 'exist;': '\u2203', + 'Exists;': '\u2203', + 'expectation;': '\u2130', + 'ExponentialE;': '\u2147', + 'exponentiale;': '\u2147', + 'fallingdotseq;': '\u2252', + 'Fcy;': '\u0424', + 'fcy;': '\u0444', + 'female;': '\u2640', + 'ffilig;': '\ufb03', + 'fflig;': '\ufb00', + 'ffllig;': '\ufb04', + 'Ffr;': '\U0001d509', + 'ffr;': '\U0001d523', + 'filig;': '\ufb01', + 'FilledSmallSquare;': '\u25fc', + 'FilledVerySmallSquare;': '\u25aa', + 'fjlig;': 'fj', + 'flat;': '\u266d', + 'fllig;': '\ufb02', + 'fltns;': '\u25b1', + 'fnof;': '\u0192', + 'Fopf;': '\U0001d53d', + 'fopf;': '\U0001d557', + 'ForAll;': '\u2200', + 'forall;': '\u2200', + 'fork;': '\u22d4', + 'forkv;': '\u2ad9', + 'Fouriertrf;': '\u2131', + 'fpartint;': '\u2a0d', + 'frac12': '\xbd', + 'frac12;': '\xbd', + 'frac13;': '\u2153', + 'frac14': '\xbc', + 'frac14;': '\xbc', + 'frac15;': '\u2155', + 'frac16;': '\u2159', + 'frac18;': '\u215b', + 'frac23;': '\u2154', + 'frac25;': '\u2156', + 'frac34': '\xbe', + 'frac34;': '\xbe', + 'frac35;': '\u2157', + 'frac38;': '\u215c', + 'frac45;': '\u2158', + 'frac56;': '\u215a', + 'frac58;': '\u215d', + 'frac78;': '\u215e', + 'frasl;': '\u2044', + 'frown;': '\u2322', + 'Fscr;': '\u2131', + 'fscr;': '\U0001d4bb', + 'gacute;': '\u01f5', + 'Gamma;': '\u0393', + 'gamma;': '\u03b3', + 'Gammad;': '\u03dc', + 'gammad;': '\u03dd', + 'gap;': '\u2a86', + 'Gbreve;': '\u011e', + 'gbreve;': '\u011f', + 'Gcedil;': '\u0122', + 'Gcirc;': '\u011c', + 'gcirc;': '\u011d', + 'Gcy;': '\u0413', + 'gcy;': '\u0433', + 'Gdot;': '\u0120', + 'gdot;': '\u0121', + 'gE;': '\u2267', + 'ge;': '\u2265', + 'gEl;': '\u2a8c', + 'gel;': '\u22db', + 'geq;': '\u2265', + 'geqq;': '\u2267', + 'geqslant;': '\u2a7e', + 'ges;': '\u2a7e', + 'gescc;': '\u2aa9', + 'gesdot;': '\u2a80', + 'gesdoto;': '\u2a82', + 'gesdotol;': '\u2a84', + 'gesl;': '\u22db\ufe00', + 'gesles;': '\u2a94', + 'Gfr;': '\U0001d50a', + 'gfr;': '\U0001d524', + 'Gg;': '\u22d9', + 'gg;': '\u226b', + 'ggg;': '\u22d9', + 'gimel;': '\u2137', + 'GJcy;': '\u0403', + 'gjcy;': '\u0453', + 'gl;': '\u2277', + 'gla;': '\u2aa5', + 'glE;': '\u2a92', + 'glj;': '\u2aa4', + 'gnap;': '\u2a8a', + 'gnapprox;': '\u2a8a', + 'gnE;': '\u2269', + 'gne;': '\u2a88', + 'gneq;': '\u2a88', + 'gneqq;': '\u2269', + 'gnsim;': '\u22e7', + 'Gopf;': '\U0001d53e', + 'gopf;': '\U0001d558', + 'grave;': '`', + 'GreaterEqual;': '\u2265', + 'GreaterEqualLess;': '\u22db', + 'GreaterFullEqual;': '\u2267', + 'GreaterGreater;': '\u2aa2', + 'GreaterLess;': '\u2277', + 'GreaterSlantEqual;': '\u2a7e', + 'GreaterTilde;': '\u2273', + 'Gscr;': '\U0001d4a2', + 'gscr;': '\u210a', + 'gsim;': '\u2273', + 'gsime;': '\u2a8e', + 'gsiml;': '\u2a90', + 'GT': '>', + 'gt': '>', + 'GT;': '>', + 'Gt;': '\u226b', + 'gt;': '>', + 'gtcc;': '\u2aa7', + 'gtcir;': '\u2a7a', + 'gtdot;': '\u22d7', + 'gtlPar;': '\u2995', + 'gtquest;': '\u2a7c', + 'gtrapprox;': '\u2a86', + 'gtrarr;': '\u2978', + 'gtrdot;': '\u22d7', + 'gtreqless;': '\u22db', + 'gtreqqless;': '\u2a8c', + 'gtrless;': '\u2277', + 'gtrsim;': '\u2273', + 'gvertneqq;': '\u2269\ufe00', + 'gvnE;': '\u2269\ufe00', + 'Hacek;': '\u02c7', + 'hairsp;': '\u200a', + 'half;': '\xbd', + 'hamilt;': '\u210b', + 'HARDcy;': '\u042a', + 'hardcy;': '\u044a', + 'hArr;': '\u21d4', + 'harr;': '\u2194', + 'harrcir;': '\u2948', + 'harrw;': '\u21ad', + 'Hat;': '^', + 'hbar;': '\u210f', + 'Hcirc;': '\u0124', + 'hcirc;': '\u0125', + 'hearts;': '\u2665', + 'heartsuit;': '\u2665', + 'hellip;': '\u2026', + 'hercon;': '\u22b9', + 'Hfr;': '\u210c', + 'hfr;': '\U0001d525', + 'HilbertSpace;': '\u210b', + 'hksearow;': '\u2925', + 'hkswarow;': '\u2926', + 'hoarr;': '\u21ff', + 'homtht;': '\u223b', + 'hookleftarrow;': '\u21a9', + 'hookrightarrow;': '\u21aa', + 'Hopf;': '\u210d', + 'hopf;': '\U0001d559', + 'horbar;': '\u2015', + 'HorizontalLine;': '\u2500', + 'Hscr;': '\u210b', + 'hscr;': '\U0001d4bd', + 'hslash;': '\u210f', + 'Hstrok;': '\u0126', + 'hstrok;': '\u0127', + 'HumpDownHump;': '\u224e', + 'HumpEqual;': '\u224f', + 'hybull;': '\u2043', + 'hyphen;': '\u2010', + 'Iacute': '\xcd', + 'iacute': '\xed', + 'Iacute;': '\xcd', + 'iacute;': '\xed', + 'ic;': '\u2063', + 'Icirc': '\xce', + 'icirc': '\xee', + 'Icirc;': '\xce', + 'icirc;': '\xee', + 'Icy;': '\u0418', + 'icy;': '\u0438', + 'Idot;': '\u0130', + 'IEcy;': '\u0415', + 'iecy;': '\u0435', + 'iexcl': '\xa1', + 'iexcl;': '\xa1', + 'iff;': '\u21d4', + 'Ifr;': '\u2111', + 'ifr;': '\U0001d526', + 'Igrave': '\xcc', + 'igrave': '\xec', + 'Igrave;': '\xcc', + 'igrave;': '\xec', + 'ii;': '\u2148', + 'iiiint;': '\u2a0c', + 'iiint;': '\u222d', + 'iinfin;': '\u29dc', + 'iiota;': '\u2129', + 'IJlig;': '\u0132', + 'ijlig;': '\u0133', + 'Im;': '\u2111', + 'Imacr;': '\u012a', + 'imacr;': '\u012b', + 'image;': '\u2111', + 'ImaginaryI;': '\u2148', + 'imagline;': '\u2110', + 'imagpart;': '\u2111', + 'imath;': '\u0131', + 'imof;': '\u22b7', + 'imped;': '\u01b5', + 'Implies;': '\u21d2', + 'in;': '\u2208', + 'incare;': '\u2105', + 'infin;': '\u221e', + 'infintie;': '\u29dd', + 'inodot;': '\u0131', + 'Int;': '\u222c', + 'int;': '\u222b', + 'intcal;': '\u22ba', + 'integers;': '\u2124', + 'Integral;': '\u222b', + 'intercal;': '\u22ba', + 'Intersection;': '\u22c2', + 'intlarhk;': '\u2a17', + 'intprod;': '\u2a3c', + 'InvisibleComma;': '\u2063', + 'InvisibleTimes;': '\u2062', + 'IOcy;': '\u0401', + 'iocy;': '\u0451', + 'Iogon;': '\u012e', + 'iogon;': '\u012f', + 'Iopf;': '\U0001d540', + 'iopf;': '\U0001d55a', + 'Iota;': '\u0399', + 'iota;': '\u03b9', + 'iprod;': '\u2a3c', + 'iquest': '\xbf', + 'iquest;': '\xbf', + 'Iscr;': '\u2110', + 'iscr;': '\U0001d4be', + 'isin;': '\u2208', + 'isindot;': '\u22f5', + 'isinE;': '\u22f9', + 'isins;': '\u22f4', + 'isinsv;': '\u22f3', + 'isinv;': '\u2208', + 'it;': '\u2062', + 'Itilde;': '\u0128', + 'itilde;': '\u0129', + 'Iukcy;': '\u0406', + 'iukcy;': '\u0456', + 'Iuml': '\xcf', + 'iuml': '\xef', + 'Iuml;': '\xcf', + 'iuml;': '\xef', + 'Jcirc;': '\u0134', + 'jcirc;': '\u0135', + 'Jcy;': '\u0419', + 'jcy;': '\u0439', + 'Jfr;': '\U0001d50d', + 'jfr;': '\U0001d527', + 'jmath;': '\u0237', + 'Jopf;': '\U0001d541', + 'jopf;': '\U0001d55b', + 'Jscr;': '\U0001d4a5', + 'jscr;': '\U0001d4bf', + 'Jsercy;': '\u0408', + 'jsercy;': '\u0458', + 'Jukcy;': '\u0404', + 'jukcy;': '\u0454', + 'Kappa;': '\u039a', + 'kappa;': '\u03ba', + 'kappav;': '\u03f0', + 'Kcedil;': '\u0136', + 'kcedil;': '\u0137', + 'Kcy;': '\u041a', + 'kcy;': '\u043a', + 'Kfr;': '\U0001d50e', + 'kfr;': '\U0001d528', + 'kgreen;': '\u0138', + 'KHcy;': '\u0425', + 'khcy;': '\u0445', + 'KJcy;': '\u040c', + 'kjcy;': '\u045c', + 'Kopf;': '\U0001d542', + 'kopf;': '\U0001d55c', + 'Kscr;': '\U0001d4a6', + 'kscr;': '\U0001d4c0', + 'lAarr;': '\u21da', + 'Lacute;': '\u0139', + 'lacute;': '\u013a', + 'laemptyv;': '\u29b4', + 'lagran;': '\u2112', + 'Lambda;': '\u039b', + 'lambda;': '\u03bb', + 'Lang;': '\u27ea', + 'lang;': '\u27e8', + 'langd;': '\u2991', + 'langle;': '\u27e8', + 'lap;': '\u2a85', + 'Laplacetrf;': '\u2112', + 'laquo': '\xab', + 'laquo;': '\xab', + 'Larr;': '\u219e', + 'lArr;': '\u21d0', + 'larr;': '\u2190', + 'larrb;': '\u21e4', + 'larrbfs;': '\u291f', + 'larrfs;': '\u291d', + 'larrhk;': '\u21a9', + 'larrlp;': '\u21ab', + 'larrpl;': '\u2939', + 'larrsim;': '\u2973', + 'larrtl;': '\u21a2', + 'lat;': '\u2aab', + 'lAtail;': '\u291b', + 'latail;': '\u2919', + 'late;': '\u2aad', + 'lates;': '\u2aad\ufe00', + 'lBarr;': '\u290e', + 'lbarr;': '\u290c', + 'lbbrk;': '\u2772', + 'lbrace;': '{', + 'lbrack;': '[', + 'lbrke;': '\u298b', + 'lbrksld;': '\u298f', + 'lbrkslu;': '\u298d', + 'Lcaron;': '\u013d', + 'lcaron;': '\u013e', + 'Lcedil;': '\u013b', + 'lcedil;': '\u013c', + 'lceil;': '\u2308', + 'lcub;': '{', + 'Lcy;': '\u041b', + 'lcy;': '\u043b', + 'ldca;': '\u2936', + 'ldquo;': '\u201c', + 'ldquor;': '\u201e', + 'ldrdhar;': '\u2967', + 'ldrushar;': '\u294b', + 'ldsh;': '\u21b2', + 'lE;': '\u2266', + 'le;': '\u2264', + 'LeftAngleBracket;': '\u27e8', + 'LeftArrow;': '\u2190', + 'Leftarrow;': '\u21d0', + 'leftarrow;': '\u2190', + 'LeftArrowBar;': '\u21e4', + 'LeftArrowRightArrow;': '\u21c6', + 'leftarrowtail;': '\u21a2', + 'LeftCeiling;': '\u2308', + 'LeftDoubleBracket;': '\u27e6', + 'LeftDownTeeVector;': '\u2961', + 'LeftDownVector;': '\u21c3', + 'LeftDownVectorBar;': '\u2959', + 'LeftFloor;': '\u230a', + 'leftharpoondown;': '\u21bd', + 'leftharpoonup;': '\u21bc', + 'leftleftarrows;': '\u21c7', + 'LeftRightArrow;': '\u2194', + 'Leftrightarrow;': '\u21d4', + 'leftrightarrow;': '\u2194', + 'leftrightarrows;': '\u21c6', + 'leftrightharpoons;': '\u21cb', + 'leftrightsquigarrow;': '\u21ad', + 'LeftRightVector;': '\u294e', + 'LeftTee;': '\u22a3', + 'LeftTeeArrow;': '\u21a4', + 'LeftTeeVector;': '\u295a', + 'leftthreetimes;': '\u22cb', + 'LeftTriangle;': '\u22b2', + 'LeftTriangleBar;': '\u29cf', + 'LeftTriangleEqual;': '\u22b4', + 'LeftUpDownVector;': '\u2951', + 'LeftUpTeeVector;': '\u2960', + 'LeftUpVector;': '\u21bf', + 'LeftUpVectorBar;': '\u2958', + 'LeftVector;': '\u21bc', + 'LeftVectorBar;': '\u2952', + 'lEg;': '\u2a8b', + 'leg;': '\u22da', + 'leq;': '\u2264', + 'leqq;': '\u2266', + 'leqslant;': '\u2a7d', + 'les;': '\u2a7d', + 'lescc;': '\u2aa8', + 'lesdot;': '\u2a7f', + 'lesdoto;': '\u2a81', + 'lesdotor;': '\u2a83', + 'lesg;': '\u22da\ufe00', + 'lesges;': '\u2a93', + 'lessapprox;': '\u2a85', + 'lessdot;': '\u22d6', + 'lesseqgtr;': '\u22da', + 'lesseqqgtr;': '\u2a8b', + 'LessEqualGreater;': '\u22da', + 'LessFullEqual;': '\u2266', + 'LessGreater;': '\u2276', + 'lessgtr;': '\u2276', + 'LessLess;': '\u2aa1', + 'lesssim;': '\u2272', + 'LessSlantEqual;': '\u2a7d', + 'LessTilde;': '\u2272', + 'lfisht;': '\u297c', + 'lfloor;': '\u230a', + 'Lfr;': '\U0001d50f', + 'lfr;': '\U0001d529', + 'lg;': '\u2276', + 'lgE;': '\u2a91', + 'lHar;': '\u2962', + 'lhard;': '\u21bd', + 'lharu;': '\u21bc', + 'lharul;': '\u296a', + 'lhblk;': '\u2584', + 'LJcy;': '\u0409', + 'ljcy;': '\u0459', + 'Ll;': '\u22d8', + 'll;': '\u226a', + 'llarr;': '\u21c7', + 'llcorner;': '\u231e', + 'Lleftarrow;': '\u21da', + 'llhard;': '\u296b', + 'lltri;': '\u25fa', + 'Lmidot;': '\u013f', + 'lmidot;': '\u0140', + 'lmoust;': '\u23b0', + 'lmoustache;': '\u23b0', + 'lnap;': '\u2a89', + 'lnapprox;': '\u2a89', + 'lnE;': '\u2268', + 'lne;': '\u2a87', + 'lneq;': '\u2a87', + 'lneqq;': '\u2268', + 'lnsim;': '\u22e6', + 'loang;': '\u27ec', + 'loarr;': '\u21fd', + 'lobrk;': '\u27e6', + 'LongLeftArrow;': '\u27f5', + 'Longleftarrow;': '\u27f8', + 'longleftarrow;': '\u27f5', + 'LongLeftRightArrow;': '\u27f7', + 'Longleftrightarrow;': '\u27fa', + 'longleftrightarrow;': '\u27f7', + 'longmapsto;': '\u27fc', + 'LongRightArrow;': '\u27f6', + 'Longrightarrow;': '\u27f9', + 'longrightarrow;': '\u27f6', + 'looparrowleft;': '\u21ab', + 'looparrowright;': '\u21ac', + 'lopar;': '\u2985', + 'Lopf;': '\U0001d543', + 'lopf;': '\U0001d55d', + 'loplus;': '\u2a2d', + 'lotimes;': '\u2a34', + 'lowast;': '\u2217', + 'lowbar;': '_', + 'LowerLeftArrow;': '\u2199', + 'LowerRightArrow;': '\u2198', + 'loz;': '\u25ca', + 'lozenge;': '\u25ca', + 'lozf;': '\u29eb', + 'lpar;': '(', + 'lparlt;': '\u2993', + 'lrarr;': '\u21c6', + 'lrcorner;': '\u231f', + 'lrhar;': '\u21cb', + 'lrhard;': '\u296d', + 'lrm;': '\u200e', + 'lrtri;': '\u22bf', + 'lsaquo;': '\u2039', + 'Lscr;': '\u2112', + 'lscr;': '\U0001d4c1', + 'Lsh;': '\u21b0', + 'lsh;': '\u21b0', + 'lsim;': '\u2272', + 'lsime;': '\u2a8d', + 'lsimg;': '\u2a8f', + 'lsqb;': '[', + 'lsquo;': '\u2018', + 'lsquor;': '\u201a', + 'Lstrok;': '\u0141', + 'lstrok;': '\u0142', + 'LT': '<', + 'lt': '<', + 'LT;': '<', + 'Lt;': '\u226a', + 'lt;': '<', + 'ltcc;': '\u2aa6', + 'ltcir;': '\u2a79', + 'ltdot;': '\u22d6', + 'lthree;': '\u22cb', + 'ltimes;': '\u22c9', + 'ltlarr;': '\u2976', + 'ltquest;': '\u2a7b', + 'ltri;': '\u25c3', + 'ltrie;': '\u22b4', + 'ltrif;': '\u25c2', + 'ltrPar;': '\u2996', + 'lurdshar;': '\u294a', + 'luruhar;': '\u2966', + 'lvertneqq;': '\u2268\ufe00', + 'lvnE;': '\u2268\ufe00', + 'macr': '\xaf', + 'macr;': '\xaf', + 'male;': '\u2642', + 'malt;': '\u2720', + 'maltese;': '\u2720', + 'Map;': '\u2905', + 'map;': '\u21a6', + 'mapsto;': '\u21a6', + 'mapstodown;': '\u21a7', + 'mapstoleft;': '\u21a4', + 'mapstoup;': '\u21a5', + 'marker;': '\u25ae', + 'mcomma;': '\u2a29', + 'Mcy;': '\u041c', + 'mcy;': '\u043c', + 'mdash;': '\u2014', + 'mDDot;': '\u223a', + 'measuredangle;': '\u2221', + 'MediumSpace;': '\u205f', + 'Mellintrf;': '\u2133', + 'Mfr;': '\U0001d510', + 'mfr;': '\U0001d52a', + 'mho;': '\u2127', + 'micro': '\xb5', + 'micro;': '\xb5', + 'mid;': '\u2223', + 'midast;': '*', + 'midcir;': '\u2af0', + 'middot': '\xb7', + 'middot;': '\xb7', + 'minus;': '\u2212', + 'minusb;': '\u229f', + 'minusd;': '\u2238', + 'minusdu;': '\u2a2a', + 'MinusPlus;': '\u2213', + 'mlcp;': '\u2adb', + 'mldr;': '\u2026', + 'mnplus;': '\u2213', + 'models;': '\u22a7', + 'Mopf;': '\U0001d544', + 'mopf;': '\U0001d55e', + 'mp;': '\u2213', + 'Mscr;': '\u2133', + 'mscr;': '\U0001d4c2', + 'mstpos;': '\u223e', + 'Mu;': '\u039c', + 'mu;': '\u03bc', + 'multimap;': '\u22b8', + 'mumap;': '\u22b8', + 'nabla;': '\u2207', + 'Nacute;': '\u0143', + 'nacute;': '\u0144', + 'nang;': '\u2220\u20d2', + 'nap;': '\u2249', + 'napE;': '\u2a70\u0338', + 'napid;': '\u224b\u0338', + 'napos;': '\u0149', + 'napprox;': '\u2249', + 'natur;': '\u266e', + 'natural;': '\u266e', + 'naturals;': '\u2115', + 'nbsp': '\xa0', + 'nbsp;': '\xa0', + 'nbump;': '\u224e\u0338', + 'nbumpe;': '\u224f\u0338', + 'ncap;': '\u2a43', + 'Ncaron;': '\u0147', + 'ncaron;': '\u0148', + 'Ncedil;': '\u0145', + 'ncedil;': '\u0146', + 'ncong;': '\u2247', + 'ncongdot;': '\u2a6d\u0338', + 'ncup;': '\u2a42', + 'Ncy;': '\u041d', + 'ncy;': '\u043d', + 'ndash;': '\u2013', + 'ne;': '\u2260', + 'nearhk;': '\u2924', + 'neArr;': '\u21d7', + 'nearr;': '\u2197', + 'nearrow;': '\u2197', + 'nedot;': '\u2250\u0338', + 'NegativeMediumSpace;': '\u200b', + 'NegativeThickSpace;': '\u200b', + 'NegativeThinSpace;': '\u200b', + 'NegativeVeryThinSpace;': '\u200b', + 'nequiv;': '\u2262', + 'nesear;': '\u2928', + 'nesim;': '\u2242\u0338', + 'NestedGreaterGreater;': '\u226b', + 'NestedLessLess;': '\u226a', + 'NewLine;': '\n', + 'nexist;': '\u2204', + 'nexists;': '\u2204', + 'Nfr;': '\U0001d511', + 'nfr;': '\U0001d52b', + 'ngE;': '\u2267\u0338', + 'nge;': '\u2271', + 'ngeq;': '\u2271', + 'ngeqq;': '\u2267\u0338', + 'ngeqslant;': '\u2a7e\u0338', + 'nges;': '\u2a7e\u0338', + 'nGg;': '\u22d9\u0338', + 'ngsim;': '\u2275', + 'nGt;': '\u226b\u20d2', + 'ngt;': '\u226f', + 'ngtr;': '\u226f', + 'nGtv;': '\u226b\u0338', + 'nhArr;': '\u21ce', + 'nharr;': '\u21ae', + 'nhpar;': '\u2af2', + 'ni;': '\u220b', + 'nis;': '\u22fc', + 'nisd;': '\u22fa', + 'niv;': '\u220b', + 'NJcy;': '\u040a', + 'njcy;': '\u045a', + 'nlArr;': '\u21cd', + 'nlarr;': '\u219a', + 'nldr;': '\u2025', + 'nlE;': '\u2266\u0338', + 'nle;': '\u2270', + 'nLeftarrow;': '\u21cd', + 'nleftarrow;': '\u219a', + 'nLeftrightarrow;': '\u21ce', + 'nleftrightarrow;': '\u21ae', + 'nleq;': '\u2270', + 'nleqq;': '\u2266\u0338', + 'nleqslant;': '\u2a7d\u0338', + 'nles;': '\u2a7d\u0338', + 'nless;': '\u226e', + 'nLl;': '\u22d8\u0338', + 'nlsim;': '\u2274', + 'nLt;': '\u226a\u20d2', + 'nlt;': '\u226e', + 'nltri;': '\u22ea', + 'nltrie;': '\u22ec', + 'nLtv;': '\u226a\u0338', + 'nmid;': '\u2224', + 'NoBreak;': '\u2060', + 'NonBreakingSpace;': '\xa0', + 'Nopf;': '\u2115', + 'nopf;': '\U0001d55f', + 'not': '\xac', + 'Not;': '\u2aec', + 'not;': '\xac', + 'NotCongruent;': '\u2262', + 'NotCupCap;': '\u226d', + 'NotDoubleVerticalBar;': '\u2226', + 'NotElement;': '\u2209', + 'NotEqual;': '\u2260', + 'NotEqualTilde;': '\u2242\u0338', + 'NotExists;': '\u2204', + 'NotGreater;': '\u226f', + 'NotGreaterEqual;': '\u2271', + 'NotGreaterFullEqual;': '\u2267\u0338', + 'NotGreaterGreater;': '\u226b\u0338', + 'NotGreaterLess;': '\u2279', + 'NotGreaterSlantEqual;': '\u2a7e\u0338', + 'NotGreaterTilde;': '\u2275', + 'NotHumpDownHump;': '\u224e\u0338', + 'NotHumpEqual;': '\u224f\u0338', + 'notin;': '\u2209', + 'notindot;': '\u22f5\u0338', + 'notinE;': '\u22f9\u0338', + 'notinva;': '\u2209', + 'notinvb;': '\u22f7', + 'notinvc;': '\u22f6', + 'NotLeftTriangle;': '\u22ea', + 'NotLeftTriangleBar;': '\u29cf\u0338', + 'NotLeftTriangleEqual;': '\u22ec', + 'NotLess;': '\u226e', + 'NotLessEqual;': '\u2270', + 'NotLessGreater;': '\u2278', + 'NotLessLess;': '\u226a\u0338', + 'NotLessSlantEqual;': '\u2a7d\u0338', + 'NotLessTilde;': '\u2274', + 'NotNestedGreaterGreater;': '\u2aa2\u0338', + 'NotNestedLessLess;': '\u2aa1\u0338', + 'notni;': '\u220c', + 'notniva;': '\u220c', + 'notnivb;': '\u22fe', + 'notnivc;': '\u22fd', + 'NotPrecedes;': '\u2280', + 'NotPrecedesEqual;': '\u2aaf\u0338', + 'NotPrecedesSlantEqual;': '\u22e0', + 'NotReverseElement;': '\u220c', + 'NotRightTriangle;': '\u22eb', + 'NotRightTriangleBar;': '\u29d0\u0338', + 'NotRightTriangleEqual;': '\u22ed', + 'NotSquareSubset;': '\u228f\u0338', + 'NotSquareSubsetEqual;': '\u22e2', + 'NotSquareSuperset;': '\u2290\u0338', + 'NotSquareSupersetEqual;': '\u22e3', + 'NotSubset;': '\u2282\u20d2', + 'NotSubsetEqual;': '\u2288', + 'NotSucceeds;': '\u2281', + 'NotSucceedsEqual;': '\u2ab0\u0338', + 'NotSucceedsSlantEqual;': '\u22e1', + 'NotSucceedsTilde;': '\u227f\u0338', + 'NotSuperset;': '\u2283\u20d2', + 'NotSupersetEqual;': '\u2289', + 'NotTilde;': '\u2241', + 'NotTildeEqual;': '\u2244', + 'NotTildeFullEqual;': '\u2247', + 'NotTildeTilde;': '\u2249', + 'NotVerticalBar;': '\u2224', + 'npar;': '\u2226', + 'nparallel;': '\u2226', + 'nparsl;': '\u2afd\u20e5', + 'npart;': '\u2202\u0338', + 'npolint;': '\u2a14', + 'npr;': '\u2280', + 'nprcue;': '\u22e0', + 'npre;': '\u2aaf\u0338', + 'nprec;': '\u2280', + 'npreceq;': '\u2aaf\u0338', + 'nrArr;': '\u21cf', + 'nrarr;': '\u219b', + 'nrarrc;': '\u2933\u0338', + 'nrarrw;': '\u219d\u0338', + 'nRightarrow;': '\u21cf', + 'nrightarrow;': '\u219b', + 'nrtri;': '\u22eb', + 'nrtrie;': '\u22ed', + 'nsc;': '\u2281', + 'nsccue;': '\u22e1', + 'nsce;': '\u2ab0\u0338', + 'Nscr;': '\U0001d4a9', + 'nscr;': '\U0001d4c3', + 'nshortmid;': '\u2224', + 'nshortparallel;': '\u2226', + 'nsim;': '\u2241', + 'nsime;': '\u2244', + 'nsimeq;': '\u2244', + 'nsmid;': '\u2224', + 'nspar;': '\u2226', + 'nsqsube;': '\u22e2', + 'nsqsupe;': '\u22e3', + 'nsub;': '\u2284', + 'nsubE;': '\u2ac5\u0338', + 'nsube;': '\u2288', + 'nsubset;': '\u2282\u20d2', + 'nsubseteq;': '\u2288', + 'nsubseteqq;': '\u2ac5\u0338', + 'nsucc;': '\u2281', + 'nsucceq;': '\u2ab0\u0338', + 'nsup;': '\u2285', + 'nsupE;': '\u2ac6\u0338', + 'nsupe;': '\u2289', + 'nsupset;': '\u2283\u20d2', + 'nsupseteq;': '\u2289', + 'nsupseteqq;': '\u2ac6\u0338', + 'ntgl;': '\u2279', + 'Ntilde': '\xd1', + 'ntilde': '\xf1', + 'Ntilde;': '\xd1', + 'ntilde;': '\xf1', + 'ntlg;': '\u2278', + 'ntriangleleft;': '\u22ea', + 'ntrianglelefteq;': '\u22ec', + 'ntriangleright;': '\u22eb', + 'ntrianglerighteq;': '\u22ed', + 'Nu;': '\u039d', + 'nu;': '\u03bd', + 'num;': '#', + 'numero;': '\u2116', + 'numsp;': '\u2007', + 'nvap;': '\u224d\u20d2', + 'nVDash;': '\u22af', + 'nVdash;': '\u22ae', + 'nvDash;': '\u22ad', + 'nvdash;': '\u22ac', + 'nvge;': '\u2265\u20d2', + 'nvgt;': '>\u20d2', + 'nvHarr;': '\u2904', + 'nvinfin;': '\u29de', + 'nvlArr;': '\u2902', + 'nvle;': '\u2264\u20d2', + 'nvlt;': '<\u20d2', + 'nvltrie;': '\u22b4\u20d2', + 'nvrArr;': '\u2903', + 'nvrtrie;': '\u22b5\u20d2', + 'nvsim;': '\u223c\u20d2', + 'nwarhk;': '\u2923', + 'nwArr;': '\u21d6', + 'nwarr;': '\u2196', + 'nwarrow;': '\u2196', + 'nwnear;': '\u2927', + 'Oacute': '\xd3', + 'oacute': '\xf3', + 'Oacute;': '\xd3', + 'oacute;': '\xf3', + 'oast;': '\u229b', + 'ocir;': '\u229a', + 'Ocirc': '\xd4', + 'ocirc': '\xf4', + 'Ocirc;': '\xd4', + 'ocirc;': '\xf4', + 'Ocy;': '\u041e', + 'ocy;': '\u043e', + 'odash;': '\u229d', + 'Odblac;': '\u0150', + 'odblac;': '\u0151', + 'odiv;': '\u2a38', + 'odot;': '\u2299', + 'odsold;': '\u29bc', + 'OElig;': '\u0152', + 'oelig;': '\u0153', + 'ofcir;': '\u29bf', + 'Ofr;': '\U0001d512', + 'ofr;': '\U0001d52c', + 'ogon;': '\u02db', + 'Ograve': '\xd2', + 'ograve': '\xf2', + 'Ograve;': '\xd2', + 'ograve;': '\xf2', + 'ogt;': '\u29c1', + 'ohbar;': '\u29b5', + 'ohm;': '\u03a9', + 'oint;': '\u222e', + 'olarr;': '\u21ba', + 'olcir;': '\u29be', + 'olcross;': '\u29bb', + 'oline;': '\u203e', + 'olt;': '\u29c0', + 'Omacr;': '\u014c', + 'omacr;': '\u014d', + 'Omega;': '\u03a9', + 'omega;': '\u03c9', + 'Omicron;': '\u039f', + 'omicron;': '\u03bf', + 'omid;': '\u29b6', + 'ominus;': '\u2296', + 'Oopf;': '\U0001d546', + 'oopf;': '\U0001d560', + 'opar;': '\u29b7', + 'OpenCurlyDoubleQuote;': '\u201c', + 'OpenCurlyQuote;': '\u2018', + 'operp;': '\u29b9', + 'oplus;': '\u2295', + 'Or;': '\u2a54', + 'or;': '\u2228', + 'orarr;': '\u21bb', + 'ord;': '\u2a5d', + 'order;': '\u2134', + 'orderof;': '\u2134', + 'ordf': '\xaa', + 'ordf;': '\xaa', + 'ordm': '\xba', + 'ordm;': '\xba', + 'origof;': '\u22b6', + 'oror;': '\u2a56', + 'orslope;': '\u2a57', + 'orv;': '\u2a5b', + 'oS;': '\u24c8', + 'Oscr;': '\U0001d4aa', + 'oscr;': '\u2134', + 'Oslash': '\xd8', + 'oslash': '\xf8', + 'Oslash;': '\xd8', + 'oslash;': '\xf8', + 'osol;': '\u2298', + 'Otilde': '\xd5', + 'otilde': '\xf5', + 'Otilde;': '\xd5', + 'otilde;': '\xf5', + 'Otimes;': '\u2a37', + 'otimes;': '\u2297', + 'otimesas;': '\u2a36', + 'Ouml': '\xd6', + 'ouml': '\xf6', + 'Ouml;': '\xd6', + 'ouml;': '\xf6', + 'ovbar;': '\u233d', + 'OverBar;': '\u203e', + 'OverBrace;': '\u23de', + 'OverBracket;': '\u23b4', + 'OverParenthesis;': '\u23dc', + 'par;': '\u2225', + 'para': '\xb6', + 'para;': '\xb6', + 'parallel;': '\u2225', + 'parsim;': '\u2af3', + 'parsl;': '\u2afd', + 'part;': '\u2202', + 'PartialD;': '\u2202', + 'Pcy;': '\u041f', + 'pcy;': '\u043f', + 'percnt;': '%', + 'period;': '.', + 'permil;': '\u2030', + 'perp;': '\u22a5', + 'pertenk;': '\u2031', + 'Pfr;': '\U0001d513', + 'pfr;': '\U0001d52d', + 'Phi;': '\u03a6', + 'phi;': '\u03c6', + 'phiv;': '\u03d5', + 'phmmat;': '\u2133', + 'phone;': '\u260e', + 'Pi;': '\u03a0', + 'pi;': '\u03c0', + 'pitchfork;': '\u22d4', + 'piv;': '\u03d6', + 'planck;': '\u210f', + 'planckh;': '\u210e', + 'plankv;': '\u210f', + 'plus;': '+', + 'plusacir;': '\u2a23', + 'plusb;': '\u229e', + 'pluscir;': '\u2a22', + 'plusdo;': '\u2214', + 'plusdu;': '\u2a25', + 'pluse;': '\u2a72', + 'PlusMinus;': '\xb1', + 'plusmn': '\xb1', + 'plusmn;': '\xb1', + 'plussim;': '\u2a26', + 'plustwo;': '\u2a27', + 'pm;': '\xb1', + 'Poincareplane;': '\u210c', + 'pointint;': '\u2a15', + 'Popf;': '\u2119', + 'popf;': '\U0001d561', + 'pound': '\xa3', + 'pound;': '\xa3', + 'Pr;': '\u2abb', + 'pr;': '\u227a', + 'prap;': '\u2ab7', + 'prcue;': '\u227c', + 'prE;': '\u2ab3', + 'pre;': '\u2aaf', + 'prec;': '\u227a', + 'precapprox;': '\u2ab7', + 'preccurlyeq;': '\u227c', + 'Precedes;': '\u227a', + 'PrecedesEqual;': '\u2aaf', + 'PrecedesSlantEqual;': '\u227c', + 'PrecedesTilde;': '\u227e', + 'preceq;': '\u2aaf', + 'precnapprox;': '\u2ab9', + 'precneqq;': '\u2ab5', + 'precnsim;': '\u22e8', + 'precsim;': '\u227e', + 'Prime;': '\u2033', + 'prime;': '\u2032', + 'primes;': '\u2119', + 'prnap;': '\u2ab9', + 'prnE;': '\u2ab5', + 'prnsim;': '\u22e8', + 'prod;': '\u220f', + 'Product;': '\u220f', + 'profalar;': '\u232e', + 'profline;': '\u2312', + 'profsurf;': '\u2313', + 'prop;': '\u221d', + 'Proportion;': '\u2237', + 'Proportional;': '\u221d', + 'propto;': '\u221d', + 'prsim;': '\u227e', + 'prurel;': '\u22b0', + 'Pscr;': '\U0001d4ab', + 'pscr;': '\U0001d4c5', + 'Psi;': '\u03a8', + 'psi;': '\u03c8', + 'puncsp;': '\u2008', + 'Qfr;': '\U0001d514', + 'qfr;': '\U0001d52e', + 'qint;': '\u2a0c', + 'Qopf;': '\u211a', + 'qopf;': '\U0001d562', + 'qprime;': '\u2057', + 'Qscr;': '\U0001d4ac', + 'qscr;': '\U0001d4c6', + 'quaternions;': '\u210d', + 'quatint;': '\u2a16', + 'quest;': '?', + 'questeq;': '\u225f', + 'QUOT': '"', + 'quot': '"', + 'QUOT;': '"', + 'quot;': '"', + 'rAarr;': '\u21db', + 'race;': '\u223d\u0331', + 'Racute;': '\u0154', + 'racute;': '\u0155', + 'radic;': '\u221a', + 'raemptyv;': '\u29b3', + 'Rang;': '\u27eb', + 'rang;': '\u27e9', + 'rangd;': '\u2992', + 'range;': '\u29a5', + 'rangle;': '\u27e9', + 'raquo': '\xbb', + 'raquo;': '\xbb', + 'Rarr;': '\u21a0', + 'rArr;': '\u21d2', + 'rarr;': '\u2192', + 'rarrap;': '\u2975', + 'rarrb;': '\u21e5', + 'rarrbfs;': '\u2920', + 'rarrc;': '\u2933', + 'rarrfs;': '\u291e', + 'rarrhk;': '\u21aa', + 'rarrlp;': '\u21ac', + 'rarrpl;': '\u2945', + 'rarrsim;': '\u2974', + 'Rarrtl;': '\u2916', + 'rarrtl;': '\u21a3', + 'rarrw;': '\u219d', + 'rAtail;': '\u291c', + 'ratail;': '\u291a', + 'ratio;': '\u2236', + 'rationals;': '\u211a', + 'RBarr;': '\u2910', + 'rBarr;': '\u290f', + 'rbarr;': '\u290d', + 'rbbrk;': '\u2773', + 'rbrace;': '}', + 'rbrack;': ']', + 'rbrke;': '\u298c', + 'rbrksld;': '\u298e', + 'rbrkslu;': '\u2990', + 'Rcaron;': '\u0158', + 'rcaron;': '\u0159', + 'Rcedil;': '\u0156', + 'rcedil;': '\u0157', + 'rceil;': '\u2309', + 'rcub;': '}', + 'Rcy;': '\u0420', + 'rcy;': '\u0440', + 'rdca;': '\u2937', + 'rdldhar;': '\u2969', + 'rdquo;': '\u201d', + 'rdquor;': '\u201d', + 'rdsh;': '\u21b3', + 'Re;': '\u211c', + 'real;': '\u211c', + 'realine;': '\u211b', + 'realpart;': '\u211c', + 'reals;': '\u211d', + 'rect;': '\u25ad', + 'REG': '\xae', + 'reg': '\xae', + 'REG;': '\xae', + 'reg;': '\xae', + 'ReverseElement;': '\u220b', + 'ReverseEquilibrium;': '\u21cb', + 'ReverseUpEquilibrium;': '\u296f', + 'rfisht;': '\u297d', + 'rfloor;': '\u230b', + 'Rfr;': '\u211c', + 'rfr;': '\U0001d52f', + 'rHar;': '\u2964', + 'rhard;': '\u21c1', + 'rharu;': '\u21c0', + 'rharul;': '\u296c', + 'Rho;': '\u03a1', + 'rho;': '\u03c1', + 'rhov;': '\u03f1', + 'RightAngleBracket;': '\u27e9', + 'RightArrow;': '\u2192', + 'Rightarrow;': '\u21d2', + 'rightarrow;': '\u2192', + 'RightArrowBar;': '\u21e5', + 'RightArrowLeftArrow;': '\u21c4', + 'rightarrowtail;': '\u21a3', + 'RightCeiling;': '\u2309', + 'RightDoubleBracket;': '\u27e7', + 'RightDownTeeVector;': '\u295d', + 'RightDownVector;': '\u21c2', + 'RightDownVectorBar;': '\u2955', + 'RightFloor;': '\u230b', + 'rightharpoondown;': '\u21c1', + 'rightharpoonup;': '\u21c0', + 'rightleftarrows;': '\u21c4', + 'rightleftharpoons;': '\u21cc', + 'rightrightarrows;': '\u21c9', + 'rightsquigarrow;': '\u219d', + 'RightTee;': '\u22a2', + 'RightTeeArrow;': '\u21a6', + 'RightTeeVector;': '\u295b', + 'rightthreetimes;': '\u22cc', + 'RightTriangle;': '\u22b3', + 'RightTriangleBar;': '\u29d0', + 'RightTriangleEqual;': '\u22b5', + 'RightUpDownVector;': '\u294f', + 'RightUpTeeVector;': '\u295c', + 'RightUpVector;': '\u21be', + 'RightUpVectorBar;': '\u2954', + 'RightVector;': '\u21c0', + 'RightVectorBar;': '\u2953', + 'ring;': '\u02da', + 'risingdotseq;': '\u2253', + 'rlarr;': '\u21c4', + 'rlhar;': '\u21cc', + 'rlm;': '\u200f', + 'rmoust;': '\u23b1', + 'rmoustache;': '\u23b1', + 'rnmid;': '\u2aee', + 'roang;': '\u27ed', + 'roarr;': '\u21fe', + 'robrk;': '\u27e7', + 'ropar;': '\u2986', + 'Ropf;': '\u211d', + 'ropf;': '\U0001d563', + 'roplus;': '\u2a2e', + 'rotimes;': '\u2a35', + 'RoundImplies;': '\u2970', + 'rpar;': ')', + 'rpargt;': '\u2994', + 'rppolint;': '\u2a12', + 'rrarr;': '\u21c9', + 'Rrightarrow;': '\u21db', + 'rsaquo;': '\u203a', + 'Rscr;': '\u211b', + 'rscr;': '\U0001d4c7', + 'Rsh;': '\u21b1', + 'rsh;': '\u21b1', + 'rsqb;': ']', + 'rsquo;': '\u2019', + 'rsquor;': '\u2019', + 'rthree;': '\u22cc', + 'rtimes;': '\u22ca', + 'rtri;': '\u25b9', + 'rtrie;': '\u22b5', + 'rtrif;': '\u25b8', + 'rtriltri;': '\u29ce', + 'RuleDelayed;': '\u29f4', + 'ruluhar;': '\u2968', + 'rx;': '\u211e', + 'Sacute;': '\u015a', + 'sacute;': '\u015b', + 'sbquo;': '\u201a', + 'Sc;': '\u2abc', + 'sc;': '\u227b', + 'scap;': '\u2ab8', + 'Scaron;': '\u0160', + 'scaron;': '\u0161', + 'sccue;': '\u227d', + 'scE;': '\u2ab4', + 'sce;': '\u2ab0', + 'Scedil;': '\u015e', + 'scedil;': '\u015f', + 'Scirc;': '\u015c', + 'scirc;': '\u015d', + 'scnap;': '\u2aba', + 'scnE;': '\u2ab6', + 'scnsim;': '\u22e9', + 'scpolint;': '\u2a13', + 'scsim;': '\u227f', + 'Scy;': '\u0421', + 'scy;': '\u0441', + 'sdot;': '\u22c5', + 'sdotb;': '\u22a1', + 'sdote;': '\u2a66', + 'searhk;': '\u2925', + 'seArr;': '\u21d8', + 'searr;': '\u2198', + 'searrow;': '\u2198', + 'sect': '\xa7', + 'sect;': '\xa7', + 'semi;': ';', + 'seswar;': '\u2929', + 'setminus;': '\u2216', + 'setmn;': '\u2216', + 'sext;': '\u2736', + 'Sfr;': '\U0001d516', + 'sfr;': '\U0001d530', + 'sfrown;': '\u2322', + 'sharp;': '\u266f', + 'SHCHcy;': '\u0429', + 'shchcy;': '\u0449', + 'SHcy;': '\u0428', + 'shcy;': '\u0448', + 'ShortDownArrow;': '\u2193', + 'ShortLeftArrow;': '\u2190', + 'shortmid;': '\u2223', + 'shortparallel;': '\u2225', + 'ShortRightArrow;': '\u2192', + 'ShortUpArrow;': '\u2191', + 'shy': '\xad', + 'shy;': '\xad', + 'Sigma;': '\u03a3', + 'sigma;': '\u03c3', + 'sigmaf;': '\u03c2', + 'sigmav;': '\u03c2', + 'sim;': '\u223c', + 'simdot;': '\u2a6a', + 'sime;': '\u2243', + 'simeq;': '\u2243', + 'simg;': '\u2a9e', + 'simgE;': '\u2aa0', + 'siml;': '\u2a9d', + 'simlE;': '\u2a9f', + 'simne;': '\u2246', + 'simplus;': '\u2a24', + 'simrarr;': '\u2972', + 'slarr;': '\u2190', + 'SmallCircle;': '\u2218', + 'smallsetminus;': '\u2216', + 'smashp;': '\u2a33', + 'smeparsl;': '\u29e4', + 'smid;': '\u2223', + 'smile;': '\u2323', + 'smt;': '\u2aaa', + 'smte;': '\u2aac', + 'smtes;': '\u2aac\ufe00', + 'SOFTcy;': '\u042c', + 'softcy;': '\u044c', + 'sol;': '/', + 'solb;': '\u29c4', + 'solbar;': '\u233f', + 'Sopf;': '\U0001d54a', + 'sopf;': '\U0001d564', + 'spades;': '\u2660', + 'spadesuit;': '\u2660', + 'spar;': '\u2225', + 'sqcap;': '\u2293', + 'sqcaps;': '\u2293\ufe00', + 'sqcup;': '\u2294', + 'sqcups;': '\u2294\ufe00', + 'Sqrt;': '\u221a', + 'sqsub;': '\u228f', + 'sqsube;': '\u2291', + 'sqsubset;': '\u228f', + 'sqsubseteq;': '\u2291', + 'sqsup;': '\u2290', + 'sqsupe;': '\u2292', + 'sqsupset;': '\u2290', + 'sqsupseteq;': '\u2292', + 'squ;': '\u25a1', + 'Square;': '\u25a1', + 'square;': '\u25a1', + 'SquareIntersection;': '\u2293', + 'SquareSubset;': '\u228f', + 'SquareSubsetEqual;': '\u2291', + 'SquareSuperset;': '\u2290', + 'SquareSupersetEqual;': '\u2292', + 'SquareUnion;': '\u2294', + 'squarf;': '\u25aa', + 'squf;': '\u25aa', + 'srarr;': '\u2192', + 'Sscr;': '\U0001d4ae', + 'sscr;': '\U0001d4c8', + 'ssetmn;': '\u2216', + 'ssmile;': '\u2323', + 'sstarf;': '\u22c6', + 'Star;': '\u22c6', + 'star;': '\u2606', + 'starf;': '\u2605', + 'straightepsilon;': '\u03f5', + 'straightphi;': '\u03d5', + 'strns;': '\xaf', + 'Sub;': '\u22d0', + 'sub;': '\u2282', + 'subdot;': '\u2abd', + 'subE;': '\u2ac5', + 'sube;': '\u2286', + 'subedot;': '\u2ac3', + 'submult;': '\u2ac1', + 'subnE;': '\u2acb', + 'subne;': '\u228a', + 'subplus;': '\u2abf', + 'subrarr;': '\u2979', + 'Subset;': '\u22d0', + 'subset;': '\u2282', + 'subseteq;': '\u2286', + 'subseteqq;': '\u2ac5', + 'SubsetEqual;': '\u2286', + 'subsetneq;': '\u228a', + 'subsetneqq;': '\u2acb', + 'subsim;': '\u2ac7', + 'subsub;': '\u2ad5', + 'subsup;': '\u2ad3', + 'succ;': '\u227b', + 'succapprox;': '\u2ab8', + 'succcurlyeq;': '\u227d', + 'Succeeds;': '\u227b', + 'SucceedsEqual;': '\u2ab0', + 'SucceedsSlantEqual;': '\u227d', + 'SucceedsTilde;': '\u227f', + 'succeq;': '\u2ab0', + 'succnapprox;': '\u2aba', + 'succneqq;': '\u2ab6', + 'succnsim;': '\u22e9', + 'succsim;': '\u227f', + 'SuchThat;': '\u220b', + 'Sum;': '\u2211', + 'sum;': '\u2211', + 'sung;': '\u266a', + 'sup1': '\xb9', + 'sup1;': '\xb9', + 'sup2': '\xb2', + 'sup2;': '\xb2', + 'sup3': '\xb3', + 'sup3;': '\xb3', + 'Sup;': '\u22d1', + 'sup;': '\u2283', + 'supdot;': '\u2abe', + 'supdsub;': '\u2ad8', + 'supE;': '\u2ac6', + 'supe;': '\u2287', + 'supedot;': '\u2ac4', + 'Superset;': '\u2283', + 'SupersetEqual;': '\u2287', + 'suphsol;': '\u27c9', + 'suphsub;': '\u2ad7', + 'suplarr;': '\u297b', + 'supmult;': '\u2ac2', + 'supnE;': '\u2acc', + 'supne;': '\u228b', + 'supplus;': '\u2ac0', + 'Supset;': '\u22d1', + 'supset;': '\u2283', + 'supseteq;': '\u2287', + 'supseteqq;': '\u2ac6', + 'supsetneq;': '\u228b', + 'supsetneqq;': '\u2acc', + 'supsim;': '\u2ac8', + 'supsub;': '\u2ad4', + 'supsup;': '\u2ad6', + 'swarhk;': '\u2926', + 'swArr;': '\u21d9', + 'swarr;': '\u2199', + 'swarrow;': '\u2199', + 'swnwar;': '\u292a', + 'szlig': '\xdf', + 'szlig;': '\xdf', + 'Tab;': '\t', + 'target;': '\u2316', + 'Tau;': '\u03a4', + 'tau;': '\u03c4', + 'tbrk;': '\u23b4', + 'Tcaron;': '\u0164', + 'tcaron;': '\u0165', + 'Tcedil;': '\u0162', + 'tcedil;': '\u0163', + 'Tcy;': '\u0422', + 'tcy;': '\u0442', + 'tdot;': '\u20db', + 'telrec;': '\u2315', + 'Tfr;': '\U0001d517', + 'tfr;': '\U0001d531', + 'there4;': '\u2234', + 'Therefore;': '\u2234', + 'therefore;': '\u2234', + 'Theta;': '\u0398', + 'theta;': '\u03b8', + 'thetasym;': '\u03d1', + 'thetav;': '\u03d1', + 'thickapprox;': '\u2248', + 'thicksim;': '\u223c', + 'ThickSpace;': '\u205f\u200a', + 'thinsp;': '\u2009', + 'ThinSpace;': '\u2009', + 'thkap;': '\u2248', + 'thksim;': '\u223c', + 'THORN': '\xde', + 'thorn': '\xfe', + 'THORN;': '\xde', + 'thorn;': '\xfe', + 'Tilde;': '\u223c', + 'tilde;': '\u02dc', + 'TildeEqual;': '\u2243', + 'TildeFullEqual;': '\u2245', + 'TildeTilde;': '\u2248', + 'times': '\xd7', + 'times;': '\xd7', + 'timesb;': '\u22a0', + 'timesbar;': '\u2a31', + 'timesd;': '\u2a30', + 'tint;': '\u222d', + 'toea;': '\u2928', + 'top;': '\u22a4', + 'topbot;': '\u2336', + 'topcir;': '\u2af1', + 'Topf;': '\U0001d54b', + 'topf;': '\U0001d565', + 'topfork;': '\u2ada', + 'tosa;': '\u2929', + 'tprime;': '\u2034', + 'TRADE;': '\u2122', + 'trade;': '\u2122', + 'triangle;': '\u25b5', + 'triangledown;': '\u25bf', + 'triangleleft;': '\u25c3', + 'trianglelefteq;': '\u22b4', + 'triangleq;': '\u225c', + 'triangleright;': '\u25b9', + 'trianglerighteq;': '\u22b5', + 'tridot;': '\u25ec', + 'trie;': '\u225c', + 'triminus;': '\u2a3a', + 'TripleDot;': '\u20db', + 'triplus;': '\u2a39', + 'trisb;': '\u29cd', + 'tritime;': '\u2a3b', + 'trpezium;': '\u23e2', + 'Tscr;': '\U0001d4af', + 'tscr;': '\U0001d4c9', + 'TScy;': '\u0426', + 'tscy;': '\u0446', + 'TSHcy;': '\u040b', + 'tshcy;': '\u045b', + 'Tstrok;': '\u0166', + 'tstrok;': '\u0167', + 'twixt;': '\u226c', + 'twoheadleftarrow;': '\u219e', + 'twoheadrightarrow;': '\u21a0', + 'Uacute': '\xda', + 'uacute': '\xfa', + 'Uacute;': '\xda', + 'uacute;': '\xfa', + 'Uarr;': '\u219f', + 'uArr;': '\u21d1', + 'uarr;': '\u2191', + 'Uarrocir;': '\u2949', + 'Ubrcy;': '\u040e', + 'ubrcy;': '\u045e', + 'Ubreve;': '\u016c', + 'ubreve;': '\u016d', + 'Ucirc': '\xdb', + 'ucirc': '\xfb', + 'Ucirc;': '\xdb', + 'ucirc;': '\xfb', + 'Ucy;': '\u0423', + 'ucy;': '\u0443', + 'udarr;': '\u21c5', + 'Udblac;': '\u0170', + 'udblac;': '\u0171', + 'udhar;': '\u296e', + 'ufisht;': '\u297e', + 'Ufr;': '\U0001d518', + 'ufr;': '\U0001d532', + 'Ugrave': '\xd9', + 'ugrave': '\xf9', + 'Ugrave;': '\xd9', + 'ugrave;': '\xf9', + 'uHar;': '\u2963', + 'uharl;': '\u21bf', + 'uharr;': '\u21be', + 'uhblk;': '\u2580', + 'ulcorn;': '\u231c', + 'ulcorner;': '\u231c', + 'ulcrop;': '\u230f', + 'ultri;': '\u25f8', + 'Umacr;': '\u016a', + 'umacr;': '\u016b', + 'uml': '\xa8', + 'uml;': '\xa8', + 'UnderBar;': '_', + 'UnderBrace;': '\u23df', + 'UnderBracket;': '\u23b5', + 'UnderParenthesis;': '\u23dd', + 'Union;': '\u22c3', + 'UnionPlus;': '\u228e', + 'Uogon;': '\u0172', + 'uogon;': '\u0173', + 'Uopf;': '\U0001d54c', + 'uopf;': '\U0001d566', + 'UpArrow;': '\u2191', + 'Uparrow;': '\u21d1', + 'uparrow;': '\u2191', + 'UpArrowBar;': '\u2912', + 'UpArrowDownArrow;': '\u21c5', + 'UpDownArrow;': '\u2195', + 'Updownarrow;': '\u21d5', + 'updownarrow;': '\u2195', + 'UpEquilibrium;': '\u296e', + 'upharpoonleft;': '\u21bf', + 'upharpoonright;': '\u21be', + 'uplus;': '\u228e', + 'UpperLeftArrow;': '\u2196', + 'UpperRightArrow;': '\u2197', + 'Upsi;': '\u03d2', + 'upsi;': '\u03c5', + 'upsih;': '\u03d2', + 'Upsilon;': '\u03a5', + 'upsilon;': '\u03c5', + 'UpTee;': '\u22a5', + 'UpTeeArrow;': '\u21a5', + 'upuparrows;': '\u21c8', + 'urcorn;': '\u231d', + 'urcorner;': '\u231d', + 'urcrop;': '\u230e', + 'Uring;': '\u016e', + 'uring;': '\u016f', + 'urtri;': '\u25f9', + 'Uscr;': '\U0001d4b0', + 'uscr;': '\U0001d4ca', + 'utdot;': '\u22f0', + 'Utilde;': '\u0168', + 'utilde;': '\u0169', + 'utri;': '\u25b5', + 'utrif;': '\u25b4', + 'uuarr;': '\u21c8', + 'Uuml': '\xdc', + 'uuml': '\xfc', + 'Uuml;': '\xdc', + 'uuml;': '\xfc', + 'uwangle;': '\u29a7', + 'vangrt;': '\u299c', + 'varepsilon;': '\u03f5', + 'varkappa;': '\u03f0', + 'varnothing;': '\u2205', + 'varphi;': '\u03d5', + 'varpi;': '\u03d6', + 'varpropto;': '\u221d', + 'vArr;': '\u21d5', + 'varr;': '\u2195', + 'varrho;': '\u03f1', + 'varsigma;': '\u03c2', + 'varsubsetneq;': '\u228a\ufe00', + 'varsubsetneqq;': '\u2acb\ufe00', + 'varsupsetneq;': '\u228b\ufe00', + 'varsupsetneqq;': '\u2acc\ufe00', + 'vartheta;': '\u03d1', + 'vartriangleleft;': '\u22b2', + 'vartriangleright;': '\u22b3', + 'Vbar;': '\u2aeb', + 'vBar;': '\u2ae8', + 'vBarv;': '\u2ae9', + 'Vcy;': '\u0412', + 'vcy;': '\u0432', + 'VDash;': '\u22ab', + 'Vdash;': '\u22a9', + 'vDash;': '\u22a8', + 'vdash;': '\u22a2', + 'Vdashl;': '\u2ae6', + 'Vee;': '\u22c1', + 'vee;': '\u2228', + 'veebar;': '\u22bb', + 'veeeq;': '\u225a', + 'vellip;': '\u22ee', + 'Verbar;': '\u2016', + 'verbar;': '|', + 'Vert;': '\u2016', + 'vert;': '|', + 'VerticalBar;': '\u2223', + 'VerticalLine;': '|', + 'VerticalSeparator;': '\u2758', + 'VerticalTilde;': '\u2240', + 'VeryThinSpace;': '\u200a', + 'Vfr;': '\U0001d519', + 'vfr;': '\U0001d533', + 'vltri;': '\u22b2', + 'vnsub;': '\u2282\u20d2', + 'vnsup;': '\u2283\u20d2', + 'Vopf;': '\U0001d54d', + 'vopf;': '\U0001d567', + 'vprop;': '\u221d', + 'vrtri;': '\u22b3', + 'Vscr;': '\U0001d4b1', + 'vscr;': '\U0001d4cb', + 'vsubnE;': '\u2acb\ufe00', + 'vsubne;': '\u228a\ufe00', + 'vsupnE;': '\u2acc\ufe00', + 'vsupne;': '\u228b\ufe00', + 'Vvdash;': '\u22aa', + 'vzigzag;': '\u299a', + 'Wcirc;': '\u0174', + 'wcirc;': '\u0175', + 'wedbar;': '\u2a5f', + 'Wedge;': '\u22c0', + 'wedge;': '\u2227', + 'wedgeq;': '\u2259', + 'weierp;': '\u2118', + 'Wfr;': '\U0001d51a', + 'wfr;': '\U0001d534', + 'Wopf;': '\U0001d54e', + 'wopf;': '\U0001d568', + 'wp;': '\u2118', + 'wr;': '\u2240', + 'wreath;': '\u2240', + 'Wscr;': '\U0001d4b2', + 'wscr;': '\U0001d4cc', + 'xcap;': '\u22c2', + 'xcirc;': '\u25ef', + 'xcup;': '\u22c3', + 'xdtri;': '\u25bd', + 'Xfr;': '\U0001d51b', + 'xfr;': '\U0001d535', + 'xhArr;': '\u27fa', + 'xharr;': '\u27f7', + 'Xi;': '\u039e', + 'xi;': '\u03be', + 'xlArr;': '\u27f8', + 'xlarr;': '\u27f5', + 'xmap;': '\u27fc', + 'xnis;': '\u22fb', + 'xodot;': '\u2a00', + 'Xopf;': '\U0001d54f', + 'xopf;': '\U0001d569', + 'xoplus;': '\u2a01', + 'xotime;': '\u2a02', + 'xrArr;': '\u27f9', + 'xrarr;': '\u27f6', + 'Xscr;': '\U0001d4b3', + 'xscr;': '\U0001d4cd', + 'xsqcup;': '\u2a06', + 'xuplus;': '\u2a04', + 'xutri;': '\u25b3', + 'xvee;': '\u22c1', + 'xwedge;': '\u22c0', + 'Yacute': '\xdd', + 'yacute': '\xfd', + 'Yacute;': '\xdd', + 'yacute;': '\xfd', + 'YAcy;': '\u042f', + 'yacy;': '\u044f', + 'Ycirc;': '\u0176', + 'ycirc;': '\u0177', + 'Ycy;': '\u042b', + 'ycy;': '\u044b', + 'yen': '\xa5', + 'yen;': '\xa5', + 'Yfr;': '\U0001d51c', + 'yfr;': '\U0001d536', + 'YIcy;': '\u0407', + 'yicy;': '\u0457', + 'Yopf;': '\U0001d550', + 'yopf;': '\U0001d56a', + 'Yscr;': '\U0001d4b4', + 'yscr;': '\U0001d4ce', + 'YUcy;': '\u042e', + 'yucy;': '\u044e', + 'yuml': '\xff', + 'Yuml;': '\u0178', + 'yuml;': '\xff', + 'Zacute;': '\u0179', + 'zacute;': '\u017a', + 'Zcaron;': '\u017d', + 'zcaron;': '\u017e', + 'Zcy;': '\u0417', + 'zcy;': '\u0437', + 'Zdot;': '\u017b', + 'zdot;': '\u017c', + 'zeetrf;': '\u2128', + 'ZeroWidthSpace;': '\u200b', + 'Zeta;': '\u0396', + 'zeta;': '\u03b6', + 'Zfr;': '\u2128', + 'zfr;': '\U0001d537', + 'ZHcy;': '\u0416', + 'zhcy;': '\u0436', + 'zigrarr;': '\u21dd', + 'Zopf;': '\u2124', + 'zopf;': '\U0001d56b', + 'Zscr;': '\U0001d4b5', + 'zscr;': '\U0001d4cf', + 'zwj;': '\u200d', + 'zwnj;': '\u200c', + } + try: import http.client as compat_http_client except ImportError: # Python 2 @@ -83,7 +2321,6 @@ try: except ImportError: # Python 2 from HTMLParser import HTMLParser as compat_HTMLParser - try: from subprocess import DEVNULL compat_subprocess_get_DEVNULL = lambda: DEVNULL @@ -626,6 +2863,7 @@ __all__ = [ 'compat_getenv', 'compat_getpass', 'compat_html_entities', + 'compat_html_entities_html5', 'compat_http_client', 'compat_http_server', 'compat_input', From 55b2f099c0c820d6c4b46609b175a44a6d7f97bf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 15:11:55 +0800 Subject: [PATCH 447/501] [utils] Decode HTML5 entities Used in test_Vporn_1. Also related to #9270 --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index feef80465..0e25de6b7 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -249,6 +249,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescapeHTML('é'), 'é') self.assertEqual(unescapeHTML('�'), '�') + # HTML5 entities + self.assertEqual(unescapeHTML('.''), '.\'') def test_date_from_str(self): self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day')) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 229de4b39..f77ab8650 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -39,6 +39,7 @@ from .compat import ( compat_chr, compat_etree_fromstring, compat_html_entities, + compat_html_entities_html5, compat_http_client, compat_kwargs, compat_parse_qs, @@ -456,12 +457,19 @@ def orderedSet(iterable): return res -def _htmlentity_transform(entity): +def _htmlentity_transform(entity_with_semicolon): """Transforms an HTML entity to a character.""" + entity = entity_with_semicolon[:-1] + # Known non-numeric HTML entity if entity in compat_html_entities.name2codepoint: return compat_chr(compat_html_entities.name2codepoint[entity]) + # TODO: HTML5 allows entities without a semicolon. For example, + # 'Éric' should be decoded as 'Éric'. + if entity_with_semicolon in compat_html_entities_html5: + return compat_html_entities_html5[entity_with_semicolon] + mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity) if mobj is not None: numstr = mobj.group(1) @@ -486,7 +494,7 @@ def unescapeHTML(s): assert type(s) == compat_str return re.sub( - r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s) + r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) def get_subprocess_encoding(): From a2252385308898074f5006ed737aeb98bb8b0402 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 15:12:53 +0800 Subject: [PATCH 448/501] [vporn] Improve error detection and update _TESTS --- youtube_dl/extractor/vporn.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py index 92c90e517..1557a0e04 100644 --- a/youtube_dl/extractor/vporn.py +++ b/youtube_dl/extractor/vporn.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, parse_duration, str_to_int, ) @@ -27,7 +28,8 @@ class VpornIE(InfoExtractor): 'duration': 393, 'age_limit': 18, 'view_count': int, - } + }, + 'skip': 'video removed', }, { 'url': 'http://www.vporn.com/female/hana-shower/523564/', @@ -40,7 +42,7 @@ class VpornIE(InfoExtractor): 'description': 'Hana showers at the bathroom.', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': 'Hmmmmm', - 'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'], + 'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female', '720p'], 'duration': 588, 'age_limit': 18, 'view_count': int, @@ -55,6 +57,10 @@ class VpornIE(InfoExtractor): webpage = self._download_webpage(url, display_id) + errmsg = 'This video has been deleted due to Copyright Infringement or by the account owner!' + if errmsg in webpage: + raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) + title = self._html_search_regex( r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip() description = self._html_search_regex( From c16f8a4659566fd7421226b0d5ddb871425b392b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:04:28 +0800 Subject: [PATCH 449/501] [voicerepublic] Force video_id to be strings Related: be6217b26142491232fb697b125015d45437832d --- youtube_dl/extractor/voicerepublic.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py index 93d15a556..4f1a99a89 100644 --- a/youtube_dl/extractor/voicerepublic.py +++ b/youtube_dl/extractor/voicerepublic.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( ExtractorError, determine_ext, @@ -16,13 +19,13 @@ class VoiceRepublicIE(InfoExtractor): _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)' _TESTS = [{ 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', - 'md5': '0554a24d1657915aa8e8f84e15dc9353', + 'md5': 'b9174d651323f17783000876347116e3', 'info_dict': { 'id': '2296', 'display_id': 'watching-the-watchers-building-a-sousveillance-state', 'ext': 'm4a', 'title': 'Watching the Watchers: Building a Sousveillance State', - 'description': 'md5:715ba964958afa2398df615809cfecb1', + 'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.', 'thumbnail': 're:^https?://.*\.(?:png|jpg)$', 'duration': 1800, 'view_count': int, @@ -52,7 +55,7 @@ class VoiceRepublicIE(InfoExtractor): if data: title = data['title'] description = data.get('teaser') - talk_id = data.get('talk_id') or display_id + talk_id = compat_str(data.get('talk_id') or display_id) talk = data['talk'] duration = int_or_none(talk.get('duration')) formats = [{ From 09728d5fbc93c769b3f8971c06e9ed0bfb168b37 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:11:28 +0800 Subject: [PATCH 450/501] [audiomack:album] Force video_id to be strings Related: be6217b26142491232fb697b125015d45437832d --- youtube_dl/extractor/audiomack.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index a52d26cec..f3bd4d444 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -6,6 +6,7 @@ import time from .common import InfoExtractor from .soundcloud import SoundcloudIE +from ..compat import compat_str from ..utils import ( ExtractorError, url_basename, @@ -136,7 +137,7 @@ class AudiomackAlbumIE(InfoExtractor): result[resultkey] = api_response[apikey] song_id = url_basename(api_response['url']).rpartition('.')[0] result['entries'].append({ - 'id': api_response.get('id', song_id), + 'id': compat_str(api_response.get('id', song_id)), 'uploader': api_response.get('artist'), 'title': api_response.get('title', song_id), 'url': api_response['url'], From daa0df9e8beac1325e5fb55d828e7a3a38e74bf6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:37:12 +0800 Subject: [PATCH 451/501] [youtube:user] Support another URL form Such an URL comes from http://www.gametrailers.com/. This is originally a test case in GenericIE, but now seems all GameTrailers videos are on YouTube. --- youtube_dl/extractor/youtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6c9f77d95..00dd602ff 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1988,7 +1988,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): class YoutubeUserIE(YoutubeChannelIE): IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' - _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)' + _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/|c/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)' _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos' IE_NAME = 'youtube:user' @@ -2001,6 +2001,9 @@ class YoutubeUserIE(YoutubeChannelIE): }, { 'url': 'ytuser:phihag', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/c/gametrailers', + 'only_matching': True, }] @classmethod From 1fa309da40bfc5e7e72639e80cf6556b3839fc81 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:39:31 +0800 Subject: [PATCH 452/501] [generic] Update test_Generic_40 The original link now redirects to an YouTube user channel. --- youtube_dl/extractor/generic.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 798c109c6..ef18ce3dc 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -627,13 +627,13 @@ class GenericIE(InfoExtractor): }, # MTVSercices embed { - 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too', - 'md5': '35727f82f58c76d996fc188f9755b0d5', + 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html', + 'md5': 'ca1aef97695ef2c1d6973256a57e5252', 'info_dict': { - 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9', + 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1', 'ext': 'mp4', - 'title': 'Review', - 'description': 'Mario\'s life in the fast lane has never looked so good.', + 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored', + 'description': 'Two valets share their love for movie star Liam Neesons.', }, }, # YouTube embed via <data-embed-url=""> From 6c0376fe4f16f53fd87f5e6a56531fc153922980 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:53:40 +0800 Subject: [PATCH 453/501] [dw] Skip an invalid test DW documentaries only last for one or two weeks. See #9475 --- youtube_dl/extractor/dw.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/dw.py b/youtube_dl/extractor/dw.py index 0f0f0b8d3..d740652f1 100644 --- a/youtube_dl/extractor/dw.py +++ b/youtube_dl/extractor/dw.py @@ -35,6 +35,7 @@ class DWIE(InfoExtractor): 'upload_date': '20160311', } }, { + # DW documentaries, only last for one or two weeks 'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798', 'md5': '56b6214ef463bfb9a3b71aeb886f3cf1', 'info_dict': { @@ -44,6 +45,7 @@ class DWIE(InfoExtractor): 'description': 'Welcome to the 90s - The Golden Decade of Hip Hop', 'upload_date': '20160521', }, + 'skip': 'Video removed', }] def _real_extract(self, url): From 836ab0c554f13751adff02d3987f6f3f79e2db09 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 18:12:57 +0800 Subject: [PATCH 454/501] [compat] Import html5 entities correctly --- youtube_dl/compat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 0243949a4..67db1c7c6 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -64,8 +64,8 @@ except ImportError: # Python 2 import htmlentitydefs as compat_html_entities try: # Python >= 3.3 - from compat_html_entities import html as compat_html_entities_html5 -except ImportError: + compat_html_entities_html5 = compat_html_entities.html5 +except AttributeError: # Copied from CPython 3.5.1 html/entities.py compat_html_entities_html5 = { 'Aacute': '\xc1', From bdf16f81403c036a0f40d10a136a46aa7d2f6f0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 10 Jun 2016 22:40:18 +0700 Subject: [PATCH 455/501] [lynda] Add support for new authentication (Closes #9740) --- youtube_dl/extractor/lynda.py | 115 ++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 55 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 86d47266f..c2678652e 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -1,84 +1,89 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_HTTPError, + compat_str, + compat_urlparse, +) from ..utils import ( ExtractorError, - clean_html, int_or_none, - sanitized_Request, urlencode_postdata, ) class LyndaBaseIE(InfoExtractor): - _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' + _SIGNIN_URL = 'https://www.lynda.com/signin' + _PASSWORD_URL = 'https://www.lynda.com/signin/password' + _USER_URL = 'https://www.lynda.com/signin/user' _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' _NETRC_MACHINE = 'lynda' def _real_initialize(self): self._login() + @staticmethod + def _check_error(json_string, key_or_keys): + keys = [key_or_keys] if isinstance(key_or_keys, compat_str) else key_or_keys + for key in keys: + error = json_string.get(key) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + + def _login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url): + action_url = self._search_regex( + r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_html, + 'post url', default=fallback_action_url, group='url') + + if not action_url.startswith('http'): + action_url = compat_urlparse.urljoin(self._SIGNIN_URL, action_url) + + form_data = self._hidden_inputs(form_html) + form_data.update(extra_form_data) + + try: + response = self._download_json( + action_url, None, note, + data=urlencode_postdata(form_data), + headers={ + 'Referer': referrer_url, + 'X-Requested-With': 'XMLHttpRequest', + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500: + response = self._parse_json(e.cause.read().decode('utf-8'), None) + self._check_error(response, ('email', 'password')) + raise + + self._check_error(response, 'ErrorMessage') + + return response, action_url + def _login(self): username, password = self._get_login_info() if username is None: return - login_form = { - 'username': username, - 'password': password, - 'remember': 'false', - 'stayPut': 'false' - } - request = sanitized_Request( - self._LOGIN_URL, urlencode_postdata(login_form)) - login_page = self._download_webpage( - request, None, 'Logging in as %s' % username) + # Step 1: download signin page + signin_page = self._download_webpage( + self._SIGNIN_URL, None, 'Downloading signin page') - # Not (yet) logged in - m = re.search(r'loginResultJson\s*=\s*\'(?P<json>[^\']+)\';', login_page) - if m is not None: - response = m.group('json') - response_json = json.loads(response) - state = response_json['state'] + # Step 2: submit email + signin_form = self._search_regex( + r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)', + signin_page, 'signin form') + signin_page, signin_url = self._login_step( + signin_form, self._PASSWORD_URL, {'email': username}, + 'Submitting email', self._SIGNIN_URL) - if state == 'notlogged': - raise ExtractorError( - 'Unable to login, incorrect username and/or password', - expected=True) - - # This is when we get popup: - # > You're already logged in to lynda.com on two devices. - # > If you log in here, we'll log you out of another device. - # So, we need to confirm this. - if state == 'conflicted': - confirm_form = { - 'username': '', - 'password': '', - 'resolve': 'true', - 'remember': 'false', - 'stayPut': 'false', - } - request = sanitized_Request( - self._LOGIN_URL, urlencode_postdata(confirm_form)) - login_page = self._download_webpage( - request, None, - 'Confirming log in and log out from another device') - - if all(not re.search(p, login_page) for p in ('isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')): - if 'login error' in login_page: - mobj = re.search( - r'(?s)<h1[^>]+class="topmost">(?P<title>[^<]+)</h1>\s*<div>(?P<description>.+?)</div>', - login_page) - if mobj: - raise ExtractorError( - 'lynda returned error: %s - %s' - % (mobj.group('title'), clean_html(mobj.group('description'))), - expected=True) - raise ExtractorError('Unable to log in') + # Step 3: submit password + password_form = signin_page['body'] + self._login_step( + password_form, self._USER_URL, {'email': username, 'password': password}, + 'Submitting password', signin_url) def _logout(self): username, _ = self._get_login_info() From 3841256c2c5fd35229cd8f2c2c8a8e2401f7016b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 10 Jun 2016 23:01:52 +0700 Subject: [PATCH 456/501] [lynda] Skip login if already logged in --- youtube_dl/extractor/lynda.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index c2678652e..7610985b4 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -71,6 +71,11 @@ class LyndaBaseIE(InfoExtractor): signin_page = self._download_webpage( self._SIGNIN_URL, None, 'Downloading signin page') + # Already logged in + if any(re.search(p, signin_page) for p in ( + 'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')): + return + # Step 2: submit email signin_form = self._search_regex( r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)', @@ -85,15 +90,6 @@ class LyndaBaseIE(InfoExtractor): password_form, self._USER_URL, {'email': username, 'password': password}, 'Submitting password', signin_url) - def _logout(self): - username, _ = self._get_login_info() - if username is None: - return - - self._download_webpage( - 'http://www.lynda.com/ajax/logout.aspx', None, - 'Logging out', 'Unable to log out', fatal=False) - class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' @@ -217,8 +213,6 @@ class LyndaCourseIE(LyndaBaseIE): 'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, course_id, 'Downloading course JSON') - self._logout() - if course.get('Status') == 'NotFound': raise ExtractorError( 'Course %s does not exist' % course_id, expected=True) From 0434358823a9b7da7656f3e6d8de28d1b42036f5 Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Fri, 10 Jun 2016 19:17:58 +0200 Subject: [PATCH 457/501] [Lynda] Extract course description --- youtube_dl/extractor/lynda.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 86d47266f..c1bca5678 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -246,5 +246,6 @@ class LyndaCourseIE(LyndaBaseIE): % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT) course_title = course.get('Title') + course_description = course.get('Description') - return self.playlist_result(entries, course_id, course_title) + return self.playlist_result(entries, course_id, course_title, course_description) From d845622b2e09ebac28e21f76f6d5c2795aa9bb50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 02:41:48 +0700 Subject: [PATCH 458/501] release 2016.06.11 --- .github/ISSUE_TEMPLATE.md | 6 +++--- README.md | 2 +- docs/supportedsites.md | 9 +++++---- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e593ee78a..16ef23066 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.03** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.03 +[debug] youtube-dl version 2016.06.11 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/README.md b/README.md index 205c485d0..2ea8acb30 100644 --- a/README.md +++ b/README.md @@ -255,7 +255,7 @@ which means you can modify it, redistribute it or use it however you like. --write-info-json Write video metadata to a .info.json file --write-annotations Write video annotations to a .annotations.xml file - --load-info FILE JSON file containing the video information + --load-info-json FILE JSON file containing the video information (created with the "--write-info-json" option) --cookies FILE File to read cookies from and dump cookie diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 619bd0825..f89c2d1f2 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -28,6 +28,7 @@ - **AdobeTVVideo** - **AdultSwim** - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network + - **AfreecaTV**: afreecatv.com - **Aftonbladet** - **AirMozilla** - **AlJazeera** @@ -43,8 +44,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek**: Saarländischer Rundfunk - **ARD:mediathek** + - **ARD:mediathek**: Saarländischer Rundfunk - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -253,6 +254,7 @@ - **Globo** - **GloboArticle** - **GodTube** + - **GodTV** - **GoldenMoustache** - **Golem** - **GoogleDrive** @@ -738,6 +740,7 @@ - **VideoPremium** - **VideoTt**: video.tt - Your True Tube (Currently broken) - **videoweed**: VideoWeed + - **Vidio** - **vidme** - **vidme:user** - **vidme:user:likes** @@ -773,7 +776,6 @@ - **VRT** - **vube**: Vube.com - **VuClip** - - **vulture.com** - **Walla** - **washingtonpost** - **washingtonpost:article** @@ -781,10 +783,8 @@ - **WatchIndianPorn**: Watch Indian Porn - **WDR** - **wdr:mobile** - - **WDRMaus**: Sendung mit der Maus - **WebOfStories** - **WebOfStoriesPlaylist** - - **Weibo** - **WeiqiTV**: WQTV - **wholecloud**: WholeCloud - **Wimp** @@ -820,6 +820,7 @@ - **Ynet** - **YouJizz** - **youku**: 优酷 + - **youku:show** - **YouPorn** - **YourUpload** - **youtube**: YouTube.com diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d24d06f4a..dafb6513a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.03' +__version__ = '2016.06.11' From 6626c214e1e0fa422d68b875cbb69dfb5aad8745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 03:00:08 +0700 Subject: [PATCH 459/501] release 2016.06.11.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 16ef23066..564cffae7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.11 +[debug] youtube-dl version 2016.06.11.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index dafb6513a..5bcb6a7b3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.11' +__version__ = '2016.06.11.1' From 9ddc289f88542f4b0bf7ad5e9c725caf8889f71b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 04:59:47 +0700 Subject: [PATCH 460/501] [README.md] Document missing playlist fields in output template --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 2ea8acb30..3ff33c156 100644 --- a/README.md +++ b/README.md @@ -511,6 +511,9 @@ The basic usage is not to set any template arguments when downloading a single f - `autonumber`: Five-digit number that will be increased with each download, starting at zero - `playlist`: Name or id of the playlist that contains the video - `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist + - `playlist_id`: Playlist identifier + - `playlist_title`: Playlist title + Available for the video that belongs to some logical chapter or section: - `chapter`: Name or title of the chapter the video belongs to From 62666af99fb55e3ba535ce630e8ce0aed1b5b0e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:13:05 +0700 Subject: [PATCH 461/501] [indavideo] Fix formats' height (Closes #9744) --- youtube_dl/extractor/indavideo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py index 9622f198a..c6f080484 100644 --- a/youtube_dl/extractor/indavideo.py +++ b/youtube_dl/extractor/indavideo.py @@ -60,7 +60,8 @@ class IndavideoEmbedIE(InfoExtractor): formats = [{ 'url': video_url, - 'height': self._search_regex(r'\.(\d{3,4})\.mp4$', video_url, 'height', default=None), + 'height': int_or_none(self._search_regex( + r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)), } for video_url in video_urls] self._sort_formats(formats) From 4cad2929cd7e90be174ae6b0ad0c7d9f47795374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:30:44 +0700 Subject: [PATCH 462/501] [limelight] Fix _VALID_URLs --- youtube_dl/extractor/limelight.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 2599d45c3..8dbc940a7 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -123,7 +123,7 @@ class LimelightBaseIE(InfoExtractor): class LimelightMediaIE(LimelightBaseIE): IE_NAME = 'limelight' - _VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})' + _VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bmediaId=)(?P<id>[a-z0-9]{32})' _TESTS = [{ 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', 'info_dict': { @@ -176,7 +176,7 @@ class LimelightMediaIE(LimelightBaseIE): class LimelightChannelIE(LimelightBaseIE): IE_NAME = 'limelight:channel' - _VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})' + _VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelId=)(?P<id>[a-z0-9]{32})' _TEST = { 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082', 'info_dict': { From 79027c0ea02d4f296aefe6ca6e5af393c2a4a209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:40:02 +0700 Subject: [PATCH 463/501] [limelight] Improve _VALID_URLs --- youtube_dl/extractor/limelight.py | 56 +++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 8dbc940a7..da5d198b9 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -123,7 +123,18 @@ class LimelightBaseIE(InfoExtractor): class LimelightMediaIE(LimelightBaseIE): IE_NAME = 'limelight' - _VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bmediaId=)(?P<id>[a-z0-9]{32})' + _VALID_URL = r'''(?x) + (?: + limelight:media:| + https?:// + (?: + link\.videoplatform\.limelight\.com/media/| + assets\.delvenetworks\.com/player/loader\.swf + ) + \?.*?\bmediaId= + ) + (?P<id>[a-z0-9]{32}) + ''' _TESTS = [{ 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', 'info_dict': { @@ -158,6 +169,9 @@ class LimelightMediaIE(LimelightBaseIE): # rtmp download 'skip_download': True, }, + }, { + 'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452', + 'only_matching': True, }] _PLAYLIST_SERVICE_PATH = 'media' _API_PATH = 'media' @@ -176,15 +190,29 @@ class LimelightMediaIE(LimelightBaseIE): class LimelightChannelIE(LimelightBaseIE): IE_NAME = 'limelight:channel' - _VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelId=)(?P<id>[a-z0-9]{32})' - _TEST = { + _VALID_URL = r'''(?x) + (?: + limelight:channel:| + https?:// + (?: + link\.videoplatform\.limelight\.com/media/| + assets\.delvenetworks\.com/player/loader\.swf + ) + \?.*?\bchannelId= + ) + (?P<id>[a-z0-9]{32}) + ''' + _TESTS = [{ 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082', 'info_dict': { 'id': 'ab6a524c379342f9b23642917020c082', 'title': 'Javascript Sample Code', }, 'playlist_mincount': 3, - } + }, { + 'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082', + 'only_matching': True, + }] _PLAYLIST_SERVICE_PATH = 'channel' _API_PATH = 'channels' @@ -207,15 +235,29 @@ class LimelightChannelIE(LimelightBaseIE): class LimelightChannelListIE(LimelightBaseIE): IE_NAME = 'limelight:channel_list' - _VALID_URL = r'(?:limelight:channel_list:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})' - _TEST = { + _VALID_URL = r'''(?x) + (?: + limelight:channel_list:| + https?:// + (?: + link\.videoplatform\.limelight\.com/media/| + assets\.delvenetworks\.com/player/loader\.swf + ) + \?.*?\bchannelListId= + ) + (?P<id>[a-z0-9]{32}) + ''' + _TESTS = [{ 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b', 'info_dict': { 'id': '301b117890c4465c8179ede21fd92e2b', 'title': 'Website - Hero Player', }, 'playlist_mincount': 2, - } + }, { + 'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b', + 'only_matching': True, + }] _PLAYLIST_SERVICE_PATH = 'channel_list' def _real_extract(self, url): From 21ac1a8ac3f2a3c301ad8c08730166a8fd82c287 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:52:50 +0700 Subject: [PATCH 464/501] [limelight] Fix typo --- youtube_dl/extractor/limelight.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index da5d198b9..a25fb8e2c 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -98,7 +98,7 @@ class LimelightBaseIE(InfoExtractor): } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')] subtitles = {} - for caption in properties.get('captions', {}): + for caption in properties.get('captions', []): lang = caption.get('language_code') subtitles_url = caption.get('url') if lang and subtitles_url: From fe458b65965e5a847a24d00138b723ce67b274e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:57:27 +0700 Subject: [PATCH 465/501] [limelight] Extract ttml subtitles (Closes #9739) --- youtube_dl/extractor/limelight.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index a25fb8e2c..5d2c3e256 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -102,9 +102,15 @@ class LimelightBaseIE(InfoExtractor): lang = caption.get('language_code') subtitles_url = caption.get('url') if lang and subtitles_url: - subtitles[lang] = [{ + subtitles.setdefault(lang, []).append({ 'url': subtitles_url, - }] + }) + closed_captions_url = properties.get('closed_captions_url') + if closed_captions_url: + subtitles.setdefault('en', []).append({ + 'url': closed_captions_url, + 'ext': 'ttml', + }) return { 'id': video_id, From 698f127c1a9dd460c8dede59df6a0e2ce69f913a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 06:14:22 +0700 Subject: [PATCH 466/501] [setup.py] Add python 3.5 classifier --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 9444d403d..c1e923f71 100644 --- a/setup.py +++ b/setup.py @@ -122,6 +122,7 @@ setup( "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", ], cmdclass={'build_lazy_extractors': build_lazy_extractors}, From 33751818d3e31270304db519849d85bec43e9c95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 08:28:51 +0700 Subject: [PATCH 467/501] release 2016.06.11.2 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 564cffae7..8fa97ee87 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.2** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.11.1 +[debug] youtube-dl version 2016.06.11.2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5bcb6a7b3..f6cc8b79e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.11.1' +__version__ = '2016.06.11.2' From 4a420119a6e0b7363f9d31e37d3e7af818bedfd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 08:34:30 +0700 Subject: [PATCH 468/501] release 2016.06.11.3 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 8fa97ee87..a46b75fd8 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.2** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.3*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.3** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.11.2 +[debug] youtube-dl version 2016.06.11.3 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f6cc8b79e..9932b1e62 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.11.2' +__version__ = '2016.06.11.3' From 47787efa2b6bd5dc1b6f6cb7027586bac2de4c6c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 13:13:16 +0800 Subject: [PATCH 469/501] [leeco] Recognize Le Sports URLs (fixes #9750) --- youtube_dl/extractor/leeco.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index 375fdaed1..63f581cd9 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -28,7 +28,7 @@ from ..utils import ( class LeIE(InfoExtractor): IE_DESC = '乐视网' - _VALID_URL = r'https?://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html' + _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|sports\.le\.com/video)/(?P<id>\d+)\.html' _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html' @@ -69,6 +69,9 @@ class LeIE(InfoExtractor): 'hls_prefer_native': True, }, 'skip': 'Only available in China', + }, { + 'url': 'http://sports.le.com/video/25737697.html', + 'only_matching': True, }] @staticmethod @@ -196,7 +199,7 @@ class LeIE(InfoExtractor): class LePlaylistIE(InfoExtractor): - _VALID_URL = r'https?://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)' + _VALID_URL = r'https?://[a-z]+\.le\.com/(?!video)[a-z]+/(?P<id>[a-z0-9_]+)' _TESTS = [{ 'url': 'http://www.le.com/tv/46177.html', From 7aab3696dd02ca45feba523b4194d6430939dd1c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 15:37:04 +0800 Subject: [PATCH 470/501] [kuwo] Update _TESTS --- youtube_dl/extractor/kuwo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 11b31a699..0221fb919 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -148,8 +148,8 @@ class KuwoAlbumIE(InfoExtractor): 'url': 'http://www.kuwo.cn/album/502294/', 'info_dict': { 'id': '502294', - 'title': 'M', - 'description': 'md5:6a7235a84cc6400ec3b38a7bdaf1d60c', + 'title': 'Made\xa0Series\xa0《M》', + 'description': 'md5:d463f0d8a0ff3c3ea3d6ed7452a9483f', }, 'playlist_count': 2, } @@ -209,7 +209,7 @@ class KuwoSingerIE(InfoExtractor): 'url': 'http://www.kuwo.cn/mingxing/bruno+mars/', 'info_dict': { 'id': 'bruno+mars', - 'title': 'Bruno Mars', + 'title': 'Bruno\xa0Mars', }, 'playlist_mincount': 329, }, { @@ -306,7 +306,7 @@ class KuwoMvIE(KuwoBaseIE): 'id': '6480076', 'ext': 'mp4', 'title': 'My HouseMV', - 'creator': '2PM', + 'creator': 'PM02:00', }, # In this video, music URLs (anti.s) are blocked outside China and # USA, while the MV URL (mvurl) is available globally, so force the MV From 15d106787e8c21e4d4df95957062bd07c873d203 Mon Sep 17 00:00:00 2001 From: Paul Henning <vxbinaca@users.noreply.github.com> Date: Sat, 11 Jun 2016 05:36:31 -0400 Subject: [PATCH 471/501] [utils] Change Firefox 44 to 47 See commit title. --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f77ab8650..0acbd67de 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -76,7 +76,7 @@ def register_socks_protocols(): compiled_regex_type = type(re.compile('')) std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', From 856150d05647904a5cf6c519c6e276ce3536bd20 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 18:22:26 +0800 Subject: [PATCH 472/501] [telewebion] Add new extractor (closes #5135) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/telewebion.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/telewebion.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 38708294a..36ddc1f73 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -777,6 +777,7 @@ from .telecinco import TelecincoIE from .telegraaf import TelegraafIE from .telemb import TeleMBIE from .teletask import TeleTaskIE +from .telewebion import TelewebionIE from .testurl import TestURLIE from .tf1 import TF1IE from .theintercept import TheInterceptIE diff --git a/youtube_dl/extractor/telewebion.py b/youtube_dl/extractor/telewebion.py new file mode 100644 index 000000000..77916c601 --- /dev/null +++ b/youtube_dl/extractor/telewebion.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class TelewebionIE(InfoExtractor): + _VALID_URL = r'https?://www\.telewebion\.com/#!/episode/(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.telewebion.com/#!/episode/1263668/', + 'info_dict': { + 'id': '1263668', + 'ext': 'mp4', + 'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا', + 'thumbnail': 're:^https?://.*\.jpg', + 'view_count': int, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + secure_token = self._download_webpage( + 'http://m.s2.telewebion.com/op/op?action=getSecurityToken', video_id) + episode_details = self._download_json( + 'http://m.s2.telewebion.com/op/op', video_id, + query={'action': 'getEpisodeDetails', 'episode_id': video_id}) + + m3u8_url = 'http://m.s1.telewebion.com/smil/%s.m3u8?filepath=%s&m3u8=1&secure_token=%s' % ( + video_id, episode_details['file_path'], secure_token) + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', m3u8_id='hls') + + picture_paths = [ + episode_details.get('picture_path'), + episode_details.get('large_picture_path'), + ] + + thumbnails = [{ + 'url': picture_path, + 'preference': idx, + } for idx, picture_path in enumerate(picture_paths) if picture_path is not None] + + return { + 'id': video_id, + 'title': episode_details['title'], + 'formats': formats, + 'thumbnails': thumbnails, + 'view_count': episode_details.get('view_count'), + } From c5edd147d1d2cf0502f5ef48652c88a75ef62529 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 18:33:37 +0800 Subject: [PATCH 473/501] [generic] Remove an invalid test Now handled by telewebion.py --- youtube_dl/extractor/generic.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ef18ce3dc..4aa24061c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1073,20 +1073,6 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, - # Contains a SMIL manifest - { - 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html', - 'info_dict': { - 'id': 'file', - 'ext': 'flv', - 'title': '+ Football: Lottery Champions League Europe', - 'uploader': 'www.telewebion.com', - }, - 'params': { - # rtmpe downloads - 'skip_download': True, - } - }, # Brightcove URL in single quotes { 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/', From 531a74968c24416cb2e4a79c9bfbcc9d02368e44 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 21:35:08 +0800 Subject: [PATCH 474/501] [vimeo] Fix extraction for VimeoReview videos --- youtube_dl/extractor/vimeo.py | 147 +++++++++++++++++++--------------- 1 file changed, 83 insertions(+), 64 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 59f9cb1ae..0fd2c18a0 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -66,6 +66,69 @@ class VimeoBaseInfoExtractor(InfoExtractor): def _set_vimeo_cookie(self, name, value): self._set_cookie('vimeo.com', name, value) + def _vimeo_sort_formats(self, formats): + # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps + # at the same time without actual units specified. This lead to wrong sorting. + self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id')) + + def _parse_config(self, config, video_id): + # Extract title + video_title = config['video']['title'] + + # Extract uploader, uploader_url and uploader_id + video_uploader = config['video'].get('owner', {}).get('name') + video_uploader_url = config['video'].get('owner', {}).get('url') + video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None + + # Extract video thumbnail + video_thumbnail = config['video'].get('thumbnail') + if video_thumbnail is None: + video_thumbs = config['video'].get('thumbs') + if video_thumbs and isinstance(video_thumbs, dict): + _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] + + # Extract video duration + video_duration = int_or_none(config['video'].get('duration')) + + formats = [] + config_files = config['video'].get('files') or config['request'].get('files', {}) + for f in config_files.get('progressive', []): + video_url = f.get('url') + if not video_url: + continue + formats.append({ + 'url': video_url, + 'format_id': 'http-%s' % f.get('quality'), + 'width': int_or_none(f.get('width')), + 'height': int_or_none(f.get('height')), + 'fps': int_or_none(f.get('fps')), + 'tbr': int_or_none(f.get('bitrate')), + }) + m3u8_url = config_files.get('hls', {}).get('url') + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + + subtitles = {} + text_tracks = config['request'].get('text_tracks') + if text_tracks: + for tt in text_tracks: + subtitles[tt['lang']] = [{ + 'ext': 'vtt', + 'url': 'https://vimeo.com' + tt['url'], + }] + + return { + 'title': video_title, + 'uploader': video_uploader, + 'uploader_id': video_uploader_id, + 'uploader_url': video_uploader_url, + 'thumbnail': video_thumbnail, + 'duration': video_duration, + 'formats': formats, + 'subtitles': subtitles, + } + class VimeoIE(VimeoBaseInfoExtractor): """Information extractor for vimeo.com.""" @@ -153,7 +216,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'user18948128', 'uploader': 'Jaime Marquínez Ferrándiz', 'duration': 10, - 'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people\u2026', + 'description': 'This is "youtube-dl password protected test video" by on Vimeo, the home for high quality videos and the people who love them.', }, 'params': { 'videopassword': 'youtube-dl', @@ -389,21 +452,6 @@ class VimeoIE(VimeoBaseInfoExtractor): 'https://player.vimeo.com/player/%s' % feature_id, {'force_feature_id': True}), 'Vimeo') - # Extract title - video_title = config['video']['title'] - - # Extract uploader, uploader_url and uploader_id - video_uploader = config['video'].get('owner', {}).get('name') - video_uploader_url = config['video'].get('owner', {}).get('url') - video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None - - # Extract video thumbnail - video_thumbnail = config['video'].get('thumbnail') - if video_thumbnail is None: - video_thumbs = config['video'].get('thumbs') - if video_thumbs and isinstance(video_thumbs, dict): - _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] - # Extract video description video_description = self._html_search_regex( @@ -423,9 +471,6 @@ class VimeoIE(VimeoBaseInfoExtractor): if not video_description and not mobj.group('player'): self._downloader.report_warning('Cannot find video description') - # Extract video duration - video_duration = int_or_none(config['video'].get('duration')) - # Extract upload date video_upload_date = None mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage) @@ -463,53 +508,22 @@ class VimeoIE(VimeoBaseInfoExtractor): 'format_id': source_name, 'preference': 1, }) - config_files = config['video'].get('files') or config['request'].get('files', {}) - for f in config_files.get('progressive', []): - video_url = f.get('url') - if not video_url: - continue - formats.append({ - 'url': video_url, - 'format_id': 'http-%s' % f.get('quality'), - 'width': int_or_none(f.get('width')), - 'height': int_or_none(f.get('height')), - 'fps': int_or_none(f.get('fps')), - 'tbr': int_or_none(f.get('bitrate')), - }) - m3u8_url = config_files.get('hls', {}).get('url') - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps - # at the same time without actual units specified. This lead to wrong sorting. - self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id')) - subtitles = {} - text_tracks = config['request'].get('text_tracks') - if text_tracks: - for tt in text_tracks: - subtitles[tt['lang']] = [{ - 'ext': 'vtt', - 'url': 'https://vimeo.com' + tt['url'], - }] - - return { + info_dict = self._parse_config(config, video_id) + formats.extend(info_dict['formats']) + self._vimeo_sort_formats(formats) + info_dict.update({ 'id': video_id, - 'uploader': video_uploader, - 'uploader_url': video_uploader_url, - 'uploader_id': video_uploader_id, - 'upload_date': video_upload_date, - 'title': video_title, - 'thumbnail': video_thumbnail, - 'description': video_description, - 'duration': video_duration, 'formats': formats, + 'upload_date': video_upload_date, + 'description': video_description, 'webpage_url': url, 'view_count': view_count, 'like_count': like_count, 'comment_count': comment_count, - 'subtitles': subtitles, - } + }) + + return info_dict class VimeoOndemandIE(VimeoBaseInfoExtractor): @@ -692,7 +706,7 @@ class VimeoGroupsIE(VimeoAlbumIE): return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name) -class VimeoReviewIE(InfoExtractor): +class VimeoReviewIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:review' IE_DESC = 'Review pages on vimeo' _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)' @@ -704,6 +718,7 @@ class VimeoReviewIE(InfoExtractor): 'ext': 'mp4', 'title': "DICK HARDWICK 'Comedian'", 'uploader': 'Richard Hardwick', + 'uploader_id': 'user21297594', } }, { 'note': 'video player needs Referer', @@ -716,14 +731,18 @@ class VimeoReviewIE(InfoExtractor): 'uploader': 'DevWeek Events', 'duration': 2773, 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader_id': 'user22258446', } }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - player_url = 'https://player.vimeo.com/player/' + video_id - return self.url_result(player_url, 'Vimeo', video_id) + video_id = self._match_id(url) + config = self._download_json( + 'https://player.vimeo.com/video/%s/config' % video_id, video_id) + info_dict = self._parse_config(config, video_id) + self._vimeo_sort_formats(info_dict['formats']) + info_dict['id'] = video_id + return info_dict class VimeoWatchLaterIE(VimeoChannelIE): From 94e5d6aedb5b509601d29dd8ea352afa925d3b22 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 21:49:01 +0800 Subject: [PATCH 475/501] [viki] Skip a geo-restricted test --- youtube_dl/extractor/viki.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index e04b814c8..0c0cd622a 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -156,7 +156,8 @@ class VikiIE(VikiBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, + 'skip': 'Blocked in the US', }, { # episode 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', From c83b35d4aa4cec98ac171cca94ec515500076926 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 22:39:13 +0800 Subject: [PATCH 476/501] [viki] Update _TESTS --- youtube_dl/extractor/viki.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 0c0cd622a..70ce5de0e 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -127,7 +127,7 @@ class VikiIE(VikiBaseIE): }, { # clip 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference', - 'md5': '86c0b5dbd4d83a6611a79987cc7a1989', + 'md5': 'feea2b1d7b3957f70886e6dfd8b8be84', 'info_dict': { 'id': '1067139v', 'ext': 'mp4', @@ -161,13 +161,13 @@ class VikiIE(VikiBaseIE): }, { # episode 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', - 'md5': '190f3ef426005ba3a080a63325955bc3', + 'md5': '1f54697dabc8f13f31bf06bb2e4de6db', 'info_dict': { 'id': '44699v', 'ext': 'mp4', 'title': 'Boys Over Flowers - Episode 1', - 'description': 'md5:52617e4f729c7d03bfd4bcbbb6e946f2', - 'duration': 4155, + 'description': 'md5:b89cf50038b480b88b5b3c93589a9076', + 'duration': 4204, 'timestamp': 1270496524, 'upload_date': '20100405', 'uploader': 'group8', @@ -197,7 +197,7 @@ class VikiIE(VikiBaseIE): }, { # non-English description 'url': 'http://www.viki.com/videos/158036v-love-in-magic', - 'md5': '1713ae35df5a521b31f6dc40730e7c9c', + 'md5': '013dc282714e22acf9447cad14ff1208', 'info_dict': { 'id': '158036v', 'ext': 'mp4', @@ -303,7 +303,7 @@ class VikiChannelIE(VikiBaseIE): 'title': 'Boys Over Flowers', 'description': 'md5:ecd3cff47967fe193cff37c0bec52790', }, - 'playlist_count': 70, + 'playlist_mincount': 71, }, { 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete', 'info_dict': { From 6d28c408cfb0ce42f591cc6e2bb67522c0812c72 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 23:00:44 +0800 Subject: [PATCH 477/501] [viki] Do not use a fallback language for title in the first try In test_Viki_3, 'titles' gives a Hebrew title. --- youtube_dl/extractor/viki.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 70ce5de0e..efa15e0b6 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -101,10 +101,13 @@ class VikiBaseIE(InfoExtractor): self.report_warning('Unable to get session token, login has probably failed') @staticmethod - def dict_selection(dict_obj, preferred_key): + def dict_selection(dict_obj, preferred_key, allow_fallback=True): if preferred_key in dict_obj: return dict_obj.get(preferred_key) + if not allow_fallback: + return + filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()])) return filtered_dict[0] if filtered_dict else None @@ -218,7 +221,7 @@ class VikiIE(VikiBaseIE): self._check_errors(video) - title = self.dict_selection(video.get('titles', {}), 'en') + title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False) if not title: title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id container_titles = video.get('container', {}).get('titles', {}) From 80ae228b344ce36a07fb91c7e968fc5249c03161 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 01:57:23 +0700 Subject: [PATCH 478/501] [matchtv] Modernize --- youtube_dl/extractor/matchtv.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/matchtv.py b/youtube_dl/extractor/matchtv.py index 80a0d7013..33b0b539f 100644 --- a/youtube_dl/extractor/matchtv.py +++ b/youtube_dl/extractor/matchtv.py @@ -4,16 +4,12 @@ from __future__ import unicode_literals import random from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode -from ..utils import ( - sanitized_Request, - xpath_text, -) +from ..utils import xpath_text class MatchTVIE(InfoExtractor): - _VALID_URL = r'https?://matchtv\.ru/?#live-player' - _TEST = { + _VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)' + _TESTS = [{ 'url': 'http://matchtv.ru/#live-player', 'info_dict': { 'id': 'matchtv-live', @@ -24,12 +20,16 @@ class MatchTVIE(InfoExtractor): 'params': { 'skip_download': True, }, - } + }, { + 'url': 'http://matchtv.ru/on-air/', + 'only_matching': True, + }] def _real_extract(self, url): video_id = 'matchtv-live' - request = sanitized_Request( - 'http://player.matchtv.ntvplus.tv/player/smil?%s' % compat_urllib_parse_urlencode({ + video_url = self._download_json( + 'http://player.matchtv.ntvplus.tv/player/smil', video_id, + query={ 'ts': '', 'quality': 'SD', 'contentId': '561d2c0df7159b37178b4567', @@ -40,11 +40,10 @@ class MatchTVIE(InfoExtractor): 'contentType': 'channel', 'timeShift': '0', 'platform': 'portal', - }), + }, headers={ 'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf', - }) - video_url = self._download_json(request, video_id)['data']['videoUrl'] + })['data']['videoUrl'] f4m_url = xpath_text(self._download_xml(video_url, video_id), './to') formats = self._extract_f4m_formats(f4m_url, video_id) self._sort_formats(formats) From 2c3322e36ef23eb0566b820dd8e8711de20ed963 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 04:49:37 +0700 Subject: [PATCH 479/501] [youporn] Fix metadata extraction --- youtube_dl/extractor/youporn.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 1124fe6c2..0df2d76ee 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -17,7 +17,7 @@ class YouPornIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?youporn\.com/watch/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' _TESTS = [{ 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', - 'md5': '71ec5fcfddacf80f495efa8b6a8d9a89', + 'md5': '3744d24c50438cf5b6f6d59feb5055c2', 'info_dict': { 'id': '505835', 'display_id': 'sex-ed-is-it-safe-to-masturbate-daily', @@ -121,21 +121,21 @@ class YouPornIE(InfoExtractor): webpage, 'thumbnail', fatal=False, group='thumbnail') uploader = self._html_search_regex( - r'(?s)<div[^>]+class=["\']videoInfoBy(?:\s+[^"\']+)?["\'][^>]*>\s*By:\s*</div>(.+?)</(?:a|div)>', + r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>', webpage, 'uploader', fatal=False) upload_date = unified_strdate(self._html_search_regex( - r'(?s)<div[^>]+class=["\']videoInfoTime["\'][^>]*>(.+?)</div>', + r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>', webpage, 'upload date', fatal=False)) age_limit = self._rta_search(webpage) average_rating = int_or_none(self._search_regex( - r'<div[^>]+class=["\']videoInfoRating["\'][^>]*>\s*<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>', + r'<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>', webpage, 'average rating', fatal=False)) view_count = str_to_int(self._search_regex( - r'(?s)<div[^>]+class=["\']videoInfoViews["\'][^>]*>.*?([\d,.]+)\s*</div>', - webpage, 'view count', fatal=False)) + r'(?s)<div[^>]+class=(["\']).*?\bvideoInfoViews\b.*?\1[^>]*>.*?(?P<count>[\d,.]+)<', + webpage, 'view count', fatal=False, group='count')) comment_count = str_to_int(self._search_regex( r'>All [Cc]omments? \(([\d,.]+)\)', webpage, 'comment count', fatal=False)) From 329ca3bef695bff011ed9b2d5f03e1331bf5bf0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 06:05:34 +0700 Subject: [PATCH 480/501] [utils] Add try_get To reduce boilerplate when accessing JSON --- youtube_dl/utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0acbd67de..c8308ba3a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1901,6 +1901,16 @@ def dict_get(d, key_or_keys, default=None, skip_false_values=True): return d.get(key_or_keys, default) +def try_get(src, getter, expected_type=None): + try: + v = getter(src) + except (AttributeError, KeyError, TypeError, IndexError): + pass + else: + if expected_type is None or isinstance(v, expected_type): + return v + + def encode_compat_str(string, encoding=preferredencoding(), errors='strict'): return string if isinstance(string, compat_str) else compat_str(string, encoding, errors) From 98960c911c9bacc0c366dd11b194963a82606850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 06:06:04 +0700 Subject: [PATCH 481/501] [instagram] Extract metadata from JSON --- youtube_dl/extractor/instagram.py | 72 ++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 3cbe77ad8..fc0197ae1 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -8,6 +8,7 @@ from ..utils import ( int_or_none, limit_length, lowercase_escape, + try_get, ) @@ -19,10 +20,16 @@ class InstagramIE(InfoExtractor): 'info_dict': { 'id': 'aye83DjauH', 'ext': 'mp4', - 'uploader_id': 'naomipq', 'title': 'Video by naomipq', 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', - } + 'thumbnail': 're:^https?://.*\.jpg', + 'timestamp': 1371748545, + 'upload_date': '20130620', + 'uploader_id': 'naomipq', + 'uploader': 'Naomi Leonor Phan-Quang', + 'like_count': int, + 'comment_count': int, + }, }, { # missing description 'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears', @@ -31,6 +38,13 @@ class InstagramIE(InfoExtractor): 'ext': 'mp4', 'uploader_id': 'britneyspears', 'title': 'Video by britneyspears', + 'thumbnail': 're:^https?://.*\.jpg', + 'timestamp': 1453760977, + 'upload_date': '20160125', + 'uploader_id': 'britneyspears', + 'uploader': 'Britney Spears', + 'like_count': int, + 'comment_count': int, }, 'params': { 'skip_download': True, @@ -67,21 +81,57 @@ class InstagramIE(InfoExtractor): url = mobj.group('url') webpage = self._download_webpage(url, video_id) - uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', - webpage, 'uploader id', fatal=False) - desc = self._search_regex( - r'"caption":"(.+?)"', webpage, 'description', default=None) - if desc is not None: - desc = lowercase_escape(desc) + + (video_url, description, thumbnail, timestamp, uploader, + uploader_id, like_count, comment_count) = [None] * 8 + + shared_data = self._parse_json( + self._search_regex( + r'window\._sharedData\s*=\s*({.+?});', + webpage, 'shared data', default='{}'), + video_id, fatal=False) + if shared_data: + media = try_get( + shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict) + if media: + video_url = media.get('video_url') + description = media.get('caption') + thumbnail = media.get('display_src') + timestamp = int_or_none(media.get('date')) + uploader = media.get('owner', {}).get('full_name') + uploader_id = media.get('owner', {}).get('username') + like_count = int_or_none(media.get('likes', {}).get('count')) + comment_count = int_or_none(media.get('comments', {}).get('count')) + + if not video_url: + video_url = self._og_search_video_url(webpage, secure=False) + + if not uploader_id: + uploader_id = self._search_regex( + r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', + webpage, 'uploader id', fatal=False) + + if not description: + description = self._search_regex( + r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None) + if description is not None: + description = lowercase_escape(description) + + if not thumbnail: + thumbnail = self._og_search_thumbnail(webpage) return { 'id': video_id, - 'url': self._og_search_video_url(webpage, secure=False), + 'url': video_url, 'ext': 'mp4', 'title': 'Video by %s' % uploader_id, - 'thumbnail': self._og_search_thumbnail(webpage), + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, 'uploader_id': uploader_id, - 'description': desc, + 'uploader': uploader, + 'like_count': like_count, + 'comment_count': comment_count, } From a936ac321c5c0cee8e9769334945e744cdc60ae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 06:39:31 +0700 Subject: [PATCH 482/501] [README.md] Document using output template in batch files (Closes #9717) --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 3ff33c156..43e5114ea 100644 --- a/README.md +++ b/README.md @@ -553,6 +553,10 @@ The current default template is `%(title)s-%(id)s.%(ext)s`. In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: +#### Output template and Windows batch files + +If you are using output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`. + #### Output template examples Note on Windows you may need to use double quotes instead of single. From 4e790117292d060a3e449c9edfffe14d231aee96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 06:57:04 +0700 Subject: [PATCH 483/501] [nrktv] Fix tests --- youtube_dl/extractor/nrk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 486e086bb..4a790da7b 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -163,7 +163,7 @@ class NRKTVIE(NRKBaseIE): 'ext': 'mp4', 'title': '20 spørsmål 23.05.2014', 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', - 'duration': 1741.52, + 'duration': 1741, }, }, { 'url': 'https://tv.nrk.no/program/mdfp15000514', @@ -173,7 +173,7 @@ class NRKTVIE(NRKBaseIE): 'ext': 'mp4', 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014', 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db', - 'duration': 4605.08, + 'duration': 4605, }, }, { # single playlist video From 971e3b7520563936f6e6946f5c08d64f65ab6f42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 07:20:37 +0700 Subject: [PATCH 484/501] [nrk:skole] Fix extraction --- youtube_dl/extractor/nrk.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 4a790da7b..6ded5bd45 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -260,30 +260,34 @@ class NRKPlaylistIE(InfoExtractor): class NRKSkoleIE(InfoExtractor): IE_DESC = 'NRK Skole' - _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/klippdetalj?.*\btopic=(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)' _TESTS = [{ - 'url': 'http://nrk.no/skole/klippdetalj?topic=nrk:klipp/616532', - 'md5': '04cd85877cc1913bce73c5d28a47e00f', + 'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099', + 'md5': '6bc936b01f9dd8ed45bc58b252b2d9b6', 'info_dict': { 'id': '6021', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Genetikk og eneggede tvillinger', 'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d', 'duration': 399, }, }, { - 'url': 'http://www.nrk.no/skole/klippdetalj?topic=nrk%3Aklipp%2F616532#embed', - 'only_matching': True, - }, { - 'url': 'http://www.nrk.no/skole/klippdetalj?topic=urn:x-mediadb:21379', + 'url': 'https://www.nrk.no/skole/?page=objectives&subject=naturfag&objective=K15114&mediaId=19355', 'only_matching': True, }] def _real_extract(self, url): - video_id = compat_urllib_parse_unquote(self._match_id(url)) + video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'https://mimir.nrk.no/plugin/1.0/static?mediaId=%s' % video_id, + video_id) + + nrk_id = self._parse_json( + self._search_regex( + r'<script[^>]+type=["\']application/json["\'][^>]*>({.+?})</script>', + webpage, 'application json'), + video_id)['activeMedia']['psId'] - nrk_id = self._search_regex(r'data-nrk-id=["\'](\d+)', webpage, 'nrk id') return self.url_result('nrk:%s' % nrk_id) From 84dcd1c4e47f2a5a84a4658f42c66f7546588001 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 11:08:39 +0700 Subject: [PATCH 485/501] [streamcloud] Detect removed videos (Closes #3768) --- youtube_dl/extractor/streamcloud.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py index 712359885..58560ec64 100644 --- a/youtube_dl/extractor/streamcloud.py +++ b/youtube_dl/extractor/streamcloud.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, sanitized_Request, urlencode_postdata, ) @@ -14,7 +15,7 @@ class StreamcloudIE(InfoExtractor): IE_NAME = 'streamcloud.eu' _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?' - _TEST = { + _TESTS = [{ 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html', 'md5': '6bea4c7fa5daaacc2a946b7146286686', 'info_dict': { @@ -23,7 +24,10 @@ class StreamcloudIE(InfoExtractor): 'title': 'youtube-dl test video \'/\\ ä ↭', }, 'skip': 'Only available from the EU' - } + }, { + 'url': 'http://streamcloud.eu/ua8cmfh1nbe6/NSHIP-148--KUC-NG--H264-.mp4.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -31,6 +35,10 @@ class StreamcloudIE(InfoExtractor): orig_webpage = self._download_webpage(url, video_id) + if '>File Not Found<' in orig_webpage: + raise ExtractorError( + 'Video %s does not exist' % video_id, expected=True) + fields = re.findall(r'''(?x)<input\s+ type="(?:hidden|submit)"\s+ name="([^"]+)"\s+ From 77a9a9c295c753c4de4c96def6a9a15de1025f0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 12:06:48 +0700 Subject: [PATCH 486/501] release 2016.06.12 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a46b75fd8..243f2de5d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.3*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.3** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.12** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.11.3 +[debug] youtube-dl version 2016.06.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f89c2d1f2..e8c0a5d24 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -44,8 +44,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek** - **ARD:mediathek**: Saarländischer Rundfunk + - **ARD:mediathek** - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -647,6 +647,7 @@ - **Telegraaf** - **TeleMB** - **TeleTask** + - **Telewebion** - **TF1** - **TheIntercept** - **ThePlatform** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9932b1e62..5e9c14398 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.11.3' +__version__ = '2016.06.12' From e69f9f5d68aed32cc27ca188b0f51925d949c365 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 16:45:07 +0700 Subject: [PATCH 487/501] [downloader/external] Decode error string before writing to stderr --- youtube_dl/downloader/external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 3ff1f9ed4..fae245024 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -85,7 +85,7 @@ class ExternalFD(FileDownloader): cmd, stderr=subprocess.PIPE) _, stderr = p.communicate() if p.returncode != 0: - self.to_stderr(stderr) + self.to_stderr(stderr.decode('utf-8', 'replace')) return p.returncode From bccdac68749e7a39a47dd0e1ad0ec9c177657de6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 01:11:04 +0700 Subject: [PATCH 488/501] [xfileshare:xvidstage] Add support for videos with packed codes (Closes #4335) --- youtube_dl/extractor/xfileshare.py | 31 ++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index ee4d04c20..fe0ab6300 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -5,8 +5,10 @@ import re from .common import InfoExtractor from ..utils import ( + decode_packed_codes, ExtractorError, int_or_none, + NO_DEFAULT, sanitized_Request, urlencode_postdata, ) @@ -23,6 +25,7 @@ class XFileShareIE(InfoExtractor): ('thevideobee.to', 'TheVideoBee'), ('vidto.me', 'Vidto'), ('streamin.to', 'Streamin.To'), + ('xvidstage.com', 'XVIDSTAGE'), ) IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) @@ -78,6 +81,13 @@ class XFileShareIE(InfoExtractor): 'ext': 'mp4', 'title': 'Big Buck Bunny trailer', }, + }, { + 'url': 'http://xvidstage.com/e0qcnl03co6z', + 'info_dict': { + 'id': 'e0qcnl03co6z', + 'ext': 'mp4', + 'title': 'Chucky Prank 2015.mp4', + }, }] def _real_extract(self, url): @@ -113,10 +123,23 @@ class XFileShareIE(InfoExtractor): r'>Watch (.+) ', r'<h2 class="video-page-head">([^<]+)</h2>'], webpage, 'title', default=None) or self._og_search_title(webpage)).strip() - video_url = self._search_regex( - [r'file\s*:\s*["\'](http[^"\']+)["\'],', - r'file_link\s*=\s*\'(https?:\/\/[0-9a-zA-z.\/\-_]+)'], - webpage, 'file url') + + def extract_video_url(default=NO_DEFAULT): + return self._search_regex( + (r'file\s*:\s*(["\'])(?P<url>http.+?)\1,', + r'file_link\s*=\s*(["\'])(?P<url>http.+?)\1', + r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http.+?)\2\)', + r'<embed[^>]+src=(["\'])(?P<url>http.+?)\1'), + webpage, 'file url', default=default, group='url') + + video_url = extract_video_url(default=None) + + if not video_url: + webpage = decode_packed_codes(self._search_regex( + r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))", + webpage, 'packed code')) + video_url = extract_video_url() + thumbnail = self._search_regex( r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None) From cf2bf840bac1742cb422549a5491a30f70d1abb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 01:11:14 +0700 Subject: [PATCH 489/501] [xfileshare] Fix test --- youtube_dl/extractor/xfileshare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index fe0ab6300..0f8ccf430 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -39,7 +39,7 @@ class XFileShareIE(InfoExtractor): 'md5': '5ae4a3580620380619678ee4875893ba', 'info_dict': { 'id': '06y9juieqpmi', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ', 'thumbnail': 're:http://.*\.jpg', }, From 33b72ce64e8705a71f8ab0e6a322e5f9f3b99276 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 01:19:54 +0700 Subject: [PATCH 490/501] [xfileshare] Improve removed videos detection --- youtube_dl/extractor/xfileshare.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 0f8ccf430..995aada0d 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -32,7 +32,10 @@ class XFileShareIE(InfoExtractor): _VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' % '|'.join(re.escape(site) for site in list(zip(*_SITES))[0])) - _FILE_NOT_FOUND_REGEX = r'>(?:404 - )?File Not Found<' + _FILE_NOT_FOUND_REGEXES = ( + r'>(?:404 - )?File Not Found<', + r'>The file was removed by administrator<', + ) _TESTS = [{ 'url': 'http://gorillavid.in/06y9juieqpmi', @@ -88,6 +91,10 @@ class XFileShareIE(InfoExtractor): 'ext': 'mp4', 'title': 'Chucky Prank 2015.mp4', }, + }, { + # removed by administrator + 'url': 'http://xvidstage.com/amfy7atlkx25', + 'only_matching': True, }] def _real_extract(self, url): @@ -97,7 +104,7 @@ class XFileShareIE(InfoExtractor): url = 'http://%s/%s' % (mobj.group('host'), video_id) webpage = self._download_webpage(url, video_id) - if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None: + if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES): raise ExtractorError('Video %s does not exist' % video_id, expected=True) fields = self._hidden_inputs(webpage) From b50e02c1e4c9ea70e88ab115b17cfa109b0c9617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 07:05:32 +0700 Subject: [PATCH 491/501] [README.md] Update links to options available for YoutubeDL --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 43e5114ea..b5cbaced7 100644 --- a/README.md +++ b/README.md @@ -964,7 +964,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) ``` -Most likely, you'll want to use various options. For a list of what can be done, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L121-L269). For a start, if you want to intercept youtube-dl's output, set a `logger` object. +Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L128-L278). For a start, if you want to intercept youtube-dl's output, set a `logger` object. Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: From b4663f12b1c872f4e731f1940831ec017bc86959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 07:16:35 +0700 Subject: [PATCH 492/501] [README.md] Update links to info dict metafields --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b5cbaced7..5a9768161 100644 --- a/README.md +++ b/README.md @@ -935,8 +935,8 @@ After you have ensured this site is distributing it's content legally, you can f ``` 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want. -8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. +7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. +8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. 9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: From 79cd8b3d8acee7845260d5bd60698155a0d81d33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 10:04:04 +0700 Subject: [PATCH 493/501] [README.md] Suggest checking extractor code under all Python versions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a9768161..f1e59542d 100644 --- a/README.md +++ b/README.md @@ -937,7 +937,7 @@ After you have ensured this site is distributing it's content legally, you can f 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. 8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. -9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). +9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/extractors.py From 778f96944785f814a97964be1d6fb3bb78bc13f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 00:06:31 +0700 Subject: [PATCH 494/501] [twitch:clips] Add extractor (Closes #9767) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/twitch.py | 43 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 36ddc1f73..d2db4d803 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -862,6 +862,7 @@ from .twitch import ( TwitchProfileIE, TwitchPastBroadcastsIE, TwitchStreamIE, + TwitchClipsIE, ) from .twitter import ( TwitterCardIE, diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index d898f14c3..20919774d 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -16,6 +16,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + js_to_json, orderedSet, parse_duration, parse_iso8601, @@ -454,3 +455,45 @@ class TwitchStreamIE(TwitchBaseIE): 'formats': formats, 'is_live': True, } + + +class TwitchClipsIE(InfoExtractor): + IE_NAME = 'twitch:clips' + _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' + + _TEST = { + 'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound', + 'md5': '761769e1eafce0ffebfb4089cb3847cd', + 'info_dict': { + 'id': 'AggressiveCobraPoooound', + 'ext': 'mp4', + 'title': 'EA Play 2016 Live from the Novo Theatre', + 'thumbnail': 're:^https?://.*\.jpg', + 'creator': 'EA', + 'uploader': 'stereotype_', + 'uploader_id': 'stereotype_', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + clip = self._parse_json( + self._search_regex( + r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'), + video_id, transform_source=js_to_json) + + video_url = clip['clip_video_url'] + title = clip['channel_title'] + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': self._og_search_thumbnail(webpage), + 'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'), + 'uploader': clip.get('curator_login'), + 'uploader_id': clip.get('curator_display_name'), + } From 14d0f4e0f3e1b6a467b6302eb60644535aff4292 Mon Sep 17 00:00:00 2001 From: Dracony <draconyster@gmail.com> Date: Thu, 9 Jun 2016 13:31:22 +0200 Subject: [PATCH 495/501] Added extractor for rockstargames.com --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/rockstargames.py | 54 +++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 youtube_dl/extractor/rockstargames.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d2db4d803..8a6c54b97 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -649,6 +649,7 @@ from .revision3 import ( from .rice import RICEIE from .ringtv import RingTVIE from .ro220 import Ro220IE +from .rockstargames import RockstarGamesIE from .rottentomatoes import RottenTomatoesIE from .roxwel import RoxwelIE from .rtbf import RTBFIE diff --git a/youtube_dl/extractor/rockstargames.py b/youtube_dl/extractor/rockstargames.py new file mode 100644 index 000000000..427ab153a --- /dev/null +++ b/youtube_dl/extractor/rockstargames.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + qualities, + parse_iso8601 +) + + +class RockstarGamesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos/video/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://www.rockstargames.com/videos/video/11544/', + 'md5': '03b5caa6e357a4bd50e3143fc03e5733', + 'info_dict': { + 'id': '11544', + 'ext': 'mp4', + 'title': 'Further Adventures in Finance and Felony Trailer', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'md5:6d31f55f30cb101b5476c4a379e324a3', + 'upload_date': '20160602', + 'timestamp': 1464876000 + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._download_json( + 'https://www.rockstargames.com/videoplayer/videos/get-video.json?id=%s&locale=en_us' % video_id, + video_id + )['video'] + + formats = [] + + for video in json_data['files_processed']['video/mp4']: + if not video.get('src'): + continue + height = video.get('resolution', '').replace('p', '') + + formats.append({ + 'url': self._proto_relative_url(video['src']), + 'height': int(height) if height.isdigit() else -1, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': json_data['title'], + 'description': json_data.get('description'), + 'formats': formats, + 'thumbnail': self._proto_relative_url(json_data.get('screencap')), + 'timestamp': parse_iso8601(json_data.get('created')) + } From 16b6bd01d238c2c58e3ac7ba91c706261d5810e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 01:11:24 +0700 Subject: [PATCH 496/501] [rockstargames] Improve and add Youtube fallback (Closes #9737) --- youtube_dl/extractor/rockstargames.py | 55 +++++++++++++++++---------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/rockstargames.py b/youtube_dl/extractor/rockstargames.py index 427ab153a..48128e219 100644 --- a/youtube_dl/extractor/rockstargames.py +++ b/youtube_dl/extractor/rockstargames.py @@ -3,52 +3,67 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - qualities, - parse_iso8601 + int_or_none, + parse_iso8601, ) class RockstarGamesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos/video/(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos(?:/video/|#?/?\?.*\bvideo=)(?P<id>\d+)' + _TESTS = [{ 'url': 'https://www.rockstargames.com/videos/video/11544/', 'md5': '03b5caa6e357a4bd50e3143fc03e5733', 'info_dict': { 'id': '11544', 'ext': 'mp4', 'title': 'Further Adventures in Finance and Felony Trailer', - 'thumbnail': 're:^https?://.*\.jpg$', 'description': 'md5:6d31f55f30cb101b5476c4a379e324a3', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1464876000, 'upload_date': '20160602', - 'timestamp': 1464876000 } - } + }, { + 'url': 'http://www.rockstargames.com/videos#/?video=48', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - json_data = self._download_json( - 'https://www.rockstargames.com/videoplayer/videos/get-video.json?id=%s&locale=en_us' % video_id, - video_id - )['video'] + + video = self._download_json( + 'https://www.rockstargames.com/videoplayer/videos/get-video.json', + video_id, query={ + 'id': video_id, + 'locale': 'en_us', + })['video'] + + title = video['title'] formats = [] - - for video in json_data['files_processed']['video/mp4']: + for video in video['files_processed']['video/mp4']: if not video.get('src'): continue - height = video.get('resolution', '').replace('p', '') - + resolution = video.get('resolution') + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', resolution or '', 'height', default=None)) formats.append({ 'url': self._proto_relative_url(video['src']), - 'height': int(height) if height.isdigit() else -1, + 'format_id': resolution, + 'height': height, }) + + if not formats: + youtube_id = video.get('youtube_id') + if youtube_id: + return self.url_result(youtube_id, 'Youtube') + self._sort_formats(formats) return { 'id': video_id, - 'title': json_data['title'], - 'description': json_data.get('description'), + 'title': title, + 'description': video.get('description'), + 'thumbnail': self._proto_relative_url(video.get('screencap')), + 'timestamp': parse_iso8601(video.get('created')), 'formats': formats, - 'thumbnail': self._proto_relative_url(json_data.get('screencap')), - 'timestamp': parse_iso8601(json_data.get('created')) } From fea55ef4a95d226668bd63742c4731832de93a79 Mon Sep 17 00:00:00 2001 From: venth <artur.krysiak.warszawa@gmail.com> Date: Sun, 8 May 2016 22:26:08 +0200 Subject: [PATCH 497/501] [wrzuta.pl:playlist] Added playlist extraction from wrzuta.pl --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/wrzuta.py | 74 ++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8a6c54b97..5a93fec6a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -981,6 +981,7 @@ from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE from .wrzuta import WrzutaIE +from .wrzuta import WrzutaPlaylistIE from .wsj import WSJIE from .xbef import XBefIE from .xboxclips import XboxClipsIE diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py index c42764921..830649214 100644 --- a/youtube_dl/extractor/wrzuta.py +++ b/youtube_dl/extractor/wrzuta.py @@ -80,3 +80,77 @@ class WrzutaIE(InfoExtractor): 'description': self._og_search_description(webpage), 'age_limit': embedpage.get('minimalAge', 0), } + + +_ENTRY_PATTERN = r'<a href="(?P<playlist_entry_url>[^"]+)" target="_blank" class="playlist\-file\-page">' +_PLAYLIST_SIZE_PATTERN = r'<div class="playlist-counter">[0-9]+/([0-9]+)</div>' + + +class WrzutaPlaylistIE(InfoExtractor): + """ + this class covers extraction of wrzuta playlist entries + the extraction process bases on following steps: + * collect information of playlist size + * download all entries provided on + the playlist webpage (the playlist is split + on two pages: first directly reached from webpage + second: downloaded on demand by ajax call and rendered + using the ajax call response) + * in case size of extracted entries not reached total number of entries + use the ajax call to collect the remaining entries + """ + + IE_NAME = 'wrzuta.pl:playlist' + + _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/' \ + '(?P<id>[0-9a-zA-Z]+)/.*' + + _TESTS = [{ + 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR/moja_muza', + 'playlist_mincount': 14, + 'info_dict': { + 'id': '7XfO4vE84iR', + 'title': 'Moja muza', + }, + }, { + 'url': 'http://heroesf70.wrzuta.pl/playlista/6Nj3wQHx756/lipiec_-_lato_2015_muzyka_swiata', + 'playlist_mincount': 144, + 'info_dict': { + 'id': '6Nj3wQHx756', + 'title': 'Lipiec - Lato 2015 Muzyka Świata', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + uploader = mobj.group('uploader') + + entries = [] + + webpage = self._download_webpage(url, playlist_id) + + playlist_size = self._html_search_regex(_PLAYLIST_SIZE_PATTERN, webpage, 'Size of the playlist') + playlist_size = int(playlist_size) if playlist_size else 0 + + playlist_title = self._og_search_title(webpage).replace('Playlista: ', '', 1) + + if playlist_size: + entries = list(map( + lambda entry_url: self.url_result(entry_url), + re.findall(_ENTRY_PATTERN, webpage) + )) + + if playlist_size > len(entries): + playlist_content = self._download_json( + 'http://{uploader_id}.wrzuta.pl/xhr/get_playlist_offset/{playlist_id}'.format( + uploader_id=uploader, + playlist_id=playlist_id, + ), + playlist_id, + 'Downloading playlist content as JSON metadata', + 'Unable to download playlist content as JSON metadata', + ) + entries += [self.url_result(entry['filelink']) for entry in playlist_content['files']] + + return self.playlist_result(entries, playlist_id, playlist_title) From 1759672eede27be0a3d473c4b2925a0b10dce547 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 02:13:54 +0700 Subject: [PATCH 498/501] [wrzuta:playlist] Improve and simplify (Closes #9341) --- youtube_dl/extractor/extractors.py | 6 ++-- youtube_dl/extractor/wrzuta.py | 49 ++++++++++++++---------------- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5a93fec6a..5fce9f47a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -980,8 +980,10 @@ from .weiqitv import WeiqiTVIE from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE -from .wrzuta import WrzutaIE -from .wrzuta import WrzutaPlaylistIE +from .wrzuta import ( + WrzutaIE, + WrzutaPlaylistIE, +) from .wsj import WSJIE from .xbef import XBefIE from .xboxclips import XboxClipsIE diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py index 830649214..b811f57fb 100644 --- a/youtube_dl/extractor/wrzuta.py +++ b/youtube_dl/extractor/wrzuta.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( int_or_none, qualities, + remove_start, ) @@ -82,10 +83,6 @@ class WrzutaIE(InfoExtractor): } -_ENTRY_PATTERN = r'<a href="(?P<playlist_entry_url>[^"]+)" target="_blank" class="playlist\-file\-page">' -_PLAYLIST_SIZE_PATTERN = r'<div class="playlist-counter">[0-9]+/([0-9]+)</div>' - - class WrzutaPlaylistIE(InfoExtractor): """ this class covers extraction of wrzuta playlist entries @@ -101,10 +98,7 @@ class WrzutaPlaylistIE(InfoExtractor): """ IE_NAME = 'wrzuta.pl:playlist' - - _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/' \ - '(?P<id>[0-9a-zA-Z]+)/.*' - + _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/(?P<id>[0-9a-zA-Z]+)' _TESTS = [{ 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR/moja_muza', 'playlist_mincount': 14, @@ -119,6 +113,9 @@ class WrzutaPlaylistIE(InfoExtractor): 'id': '6Nj3wQHx756', 'title': 'Lipiec - Lato 2015 Muzyka Świata', }, + }, { + 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR', + 'only_matching': True, }] def _real_extract(self, url): @@ -126,31 +123,31 @@ class WrzutaPlaylistIE(InfoExtractor): playlist_id = mobj.group('id') uploader = mobj.group('uploader') - entries = [] - webpage = self._download_webpage(url, playlist_id) - playlist_size = self._html_search_regex(_PLAYLIST_SIZE_PATTERN, webpage, 'Size of the playlist') - playlist_size = int(playlist_size) if playlist_size else 0 + playlist_size = int_or_none(self._html_search_regex( + (r'<div[^>]+class=["\']playlist-counter["\'][^>]*>\d+/(\d+)', + r'<div[^>]+class=["\']all-counter["\'][^>]*>(.+?)</div>'), + webpage, 'playlist size', default=None)) - playlist_title = self._og_search_title(webpage).replace('Playlista: ', '', 1) + playlist_title = remove_start( + self._og_search_title(webpage), 'Playlista: ') + entries = [] if playlist_size: - entries = list(map( - lambda entry_url: self.url_result(entry_url), - re.findall(_ENTRY_PATTERN, webpage) - )) - + entries = [ + self.url_result(entry_url) + for _, entry_url in re.findall( + r'<a[^>]+href=(["\'])(http.+?)\1[^>]+class=["\']playlist-file-page', + webpage)] if playlist_size > len(entries): playlist_content = self._download_json( - 'http://{uploader_id}.wrzuta.pl/xhr/get_playlist_offset/{playlist_id}'.format( - uploader_id=uploader, - playlist_id=playlist_id, - ), + 'http://%s.wrzuta.pl/xhr/get_playlist_offset/%s' % (uploader, playlist_id), playlist_id, - 'Downloading playlist content as JSON metadata', - 'Unable to download playlist content as JSON metadata', - ) - entries += [self.url_result(entry['filelink']) for entry in playlist_content['files']] + 'Downloading playlist JSON', + 'Unable to download playlist JSON') + entries.extend([ + self.url_result(entry['filelink']) + for entry in playlist_content.get('files', []) if entry.get('filelink')]) return self.playlist_result(entries, playlist_id, playlist_title) From bc2a871f3eb5f2fce7fc1097787e829106d11f4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 02:15:09 +0700 Subject: [PATCH 499/501] Credit @dracony for rockstargames (#9737) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 3272fc6ea..4f77de3c7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -173,3 +173,4 @@ Kevin Deldycke inondle Tomáš Čech Déstin Reed +Roman Tsiupa From a4ea28eee6c89756ac5bddfd0c6ef11dd490a191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 02:15:47 +0700 Subject: [PATCH 500/501] Credit @venth for wrzuta:playlist (#9341) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 4f77de3c7..cdf655c39 100644 --- a/AUTHORS +++ b/AUTHORS @@ -174,3 +174,4 @@ inondle Tomáš Čech Déstin Reed Roman Tsiupa +Artur Krysiak From d01fb21d4c58650a3ccd2a6fe2877cc9a53dd942 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 02:19:42 +0700 Subject: [PATCH 501/501] release 2016.06.14 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 6 +++--- docs/supportedsites.md | 5 ++++- youtube_dl/version.py | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 243f2de5d..4c52c5933 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.12** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.14** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.12 +[debug] youtube-dl version 2016.06.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c83b8655a..a59fac9b2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -142,9 +142,9 @@ After you have ensured this site is distributing it's content legally, you can f ``` 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want. -8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. -9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). +7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. +8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. +9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/extractors.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e8c0a5d24..152552dee 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -535,6 +535,7 @@ - **revision3:embed** - **RICE** - **RingTV** + - **RockstarGames** - **RottenTomatoes** - **Roxwel** - **RTBF** @@ -699,6 +700,7 @@ - **TVPlay**: TV3Play and related services - **Tweakers** - **twitch:chapter** + - **twitch:clips** - **twitch:past_broadcasts** - **twitch:profile** - **twitch:stream** @@ -793,10 +795,11 @@ - **WNL** - **WorldStarHipHop** - **wrzuta.pl** + - **wrzuta.pl:playlist** - **WSJ**: Wall Street Journal - **XBef** - **XboxClips** - - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To + - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE - **XHamster** - **XHamsterEmbed** - **xiami:album**: 虾米音乐 - 专辑 diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5e9c14398..e441a5dc4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.12' +__version__ = '2016.06.14'