From 57cf9b7f0689ed7b643ce863427c3211e407e5bf Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Thu, 5 May 2016 03:11:04 +0900 Subject: [PATCH 01/92] [afreecatv] Add new extractor for afreecatv.com VODs --- youtube_dl/extractor/afreecatv.py | 84 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 85 insertions(+) create mode 100644 youtube_dl/extractor/afreecatv.py diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py new file mode 100644 index 000000000..d57546e49 --- /dev/null +++ b/youtube_dl/extractor/afreecatv.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlparse, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + int_or_none, +) + + +class AfreecaTVIE(InfoExtractor): + IE_DESC = 'afreecatv.com' + _VALID_URL = r'''(?x)^ + https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)? + (?: + /app/(?:index|read_ucc_bbs)\.cgi| + /player/[Pp]layer\.(?:swf|html)) + \?.*?\bnTitleNo=(?P\d+)''' + _TEST = { + 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', + 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', + 'info_dict': { + 'id': '36164052', + 'ext': 'mp4', + 'title': '데일리 에이프릴 요정들의 시상식!', + 'thumbnail': 're:^https?://videoimg.afreecatv.com/.*$', + 'uploader': 'dailyapril', + 'uploader_id': 'dailyapril', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + parsed_url = compat_urllib_parse_urlparse(url) + info_url = compat_urlparse.urlunparse(parsed_url._replace( + netloc='afbbs.afreecatv.com:8080', + path='/api/video/get_video_info.php')) + video_xml = self._download_xml(info_url, video_id) + + track = video_xml.find('track') + if track.find('flag').text != 'SUCCEED': + raise ExtractorError('Specified AfreecaTV video does not exist', + expected=True) + title = track.find('title').text + uploader = track.find('nickname').text + uploader_id = track.find('bj_id').text + duration = int_or_none(track.find('duration').text) + thumbnail = track.find('titleImage').text + + entries = [] + for video in track.findall('video'): + for video_file in video.findall('file'): + entries.append({ + 'id': video_file.get('key'), + 'title': title, + 'duration': int_or_none(video_file.get('duration')), + 'formats': [{'url': video_file.text}] + }) + + info = { + 'id': video_id, + 'title': title, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'duration': duration, + 'thumbnail': thumbnail, + } + + if len(entries) > 1: + info['_type'] = 'multi_video' + info['entries'] = entries + elif len(entries) == 1: + info['formats'] = entries[0]['formats'] + else: + raise ExtractorError( + 'No files found for the specified AfreecaTV video, either' + ' the URL is incorrect or the video has been made private.', + expected=True) + + return info diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ef4431364..f85d75933 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -16,6 +16,7 @@ from .adobetv import ( AdobeTVVideoIE, ) from .adultswim import AdultSwimIE +from .afreecatv import AfreecaTVIE from .aenetworks import AENetworksIE from .aftonbladet import AftonbladetIE from .airmozilla import AirMozillaIE From 833b644fffae4d4cb0807591006e34ef963d9bc0 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Fri, 6 May 2016 01:24:02 +0900 Subject: [PATCH 02/92] use xpath_text --- youtube_dl/extractor/afreecatv.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index d57546e49..9f9399edc 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -9,6 +9,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + xpath_text, ) @@ -41,25 +42,24 @@ class AfreecaTVIE(InfoExtractor): path='/api/video/get_video_info.php')) video_xml = self._download_xml(info_url, video_id) - track = video_xml.find('track') - if track.find('flag').text != 'SUCCEED': + if xpath_text(video_xml, './track/flag', default='FAIL') != 'SUCCEED': raise ExtractorError('Specified AfreecaTV video does not exist', expected=True) - title = track.find('title').text - uploader = track.find('nickname').text - uploader_id = track.find('bj_id').text - duration = int_or_none(track.find('duration').text) - thumbnail = track.find('titleImage').text + title = xpath_text(video_xml, './track/title', 'title') + uploader = xpath_text(video_xml, './track/nickname', 'uploader') + uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id') + duration = int_or_none(xpath_text(video_xml, './track/duration', + 'duration')) + thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') entries = [] - for video in track.findall('video'): - for video_file in video.findall('file'): - entries.append({ - 'id': video_file.get('key'), - 'title': title, - 'duration': int_or_none(video_file.get('duration')), - 'formats': [{'url': video_file.text}] - }) + for video_file in video_xml.findall('./track/video/file'): + entries.append({ + 'id': video_file.get('key'), + 'title': title, + 'duration': int_or_none(video_file.get('duration')), + 'formats': [{'url': video_file.text}] + }) info = { 'id': video_id, From 22e35adefdcea7dccade852ad7dff0953060381d Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Fri, 6 May 2016 10:41:30 +0900 Subject: [PATCH 03/92] use url instead of single formats entry --- youtube_dl/extractor/afreecatv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 9f9399edc..c9a4b7311 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -58,7 +58,7 @@ class AfreecaTVIE(InfoExtractor): 'id': video_file.get('key'), 'title': title, 'duration': int_or_none(video_file.get('duration')), - 'formats': [{'url': video_file.text}] + 'url': video_file.text, }) info = { @@ -74,7 +74,7 @@ class AfreecaTVIE(InfoExtractor): info['_type'] = 'multi_video' info['entries'] = entries elif len(entries) == 1: - info['formats'] = entries[0]['formats'] + info['url'] = entries[0]['url'] else: raise ExtractorError( 'No files found for the specified AfreecaTV video, either' From 1dbfd7875497398e8c92c67b8307bafef20e8113 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Fri, 6 May 2016 12:07:29 +0900 Subject: [PATCH 04/92] fix multi_video part naming, add upload_date field --- youtube_dl/extractor/afreecatv.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index c9a4b7311..e927e4a48 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import ( compat_urllib_parse_urlparse, @@ -34,6 +36,15 @@ class AfreecaTVIE(InfoExtractor): } } + @staticmethod + def parse_video_key(key): + video_key = {'upload_date': None, 'part': '0'} + m = re.match(r'^(?P\d{8})_\w+_(?P\d+)$', key) + if m: + video_key['upload_date'] = m.group('upload_date') + video_key['part'] = m.group('part') + return video_key + def _real_extract(self, url): video_id = self._match_id(url) parsed_url = compat_urllib_parse_urlparse(url) @@ -54,9 +65,11 @@ class AfreecaTVIE(InfoExtractor): entries = [] for video_file in video_xml.findall('./track/video/file'): + video_key = self.parse_video_key(video_file.get('key')) entries.append({ - 'id': video_file.get('key'), + 'id': '%s_%s' % (video_id, video_key['part']), 'title': title, + 'upload_date': video_key['upload_date'], 'duration': int_or_none(video_file.get('duration')), 'url': video_file.text, }) @@ -75,6 +88,7 @@ class AfreecaTVIE(InfoExtractor): info['entries'] = entries elif len(entries) == 1: info['url'] = entries[0]['url'] + info['upload_date'] = entries[0]['upload_date'] else: raise ExtractorError( 'No files found for the specified AfreecaTV video, either' From 8d93c214664e5442320a20e899123b8bfd51cd08 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Fri, 6 May 2016 12:08:43 +0900 Subject: [PATCH 05/92] add multi_video test case --- youtube_dl/extractor/afreecatv.py | 32 +++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index e927e4a48..aa5847677 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -23,7 +23,7 @@ class AfreecaTVIE(InfoExtractor): /app/(?:index|read_ucc_bbs)\.cgi| /player/[Pp]layer\.(?:swf|html)) \?.*?\bnTitleNo=(?P\d+)''' - _TEST = { + _TESTS = [{ 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', 'info_dict': { @@ -33,8 +33,36 @@ class AfreecaTVIE(InfoExtractor): 'thumbnail': 're:^https?://videoimg.afreecatv.com/.*$', 'uploader': 'dailyapril', 'uploader_id': 'dailyapril', + 'upload_date': '20160503', } - } + }, { + 'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867', + 'info_dict': { + 'id': '36153164', + 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", + 'thumbnail': 're:^https?://videoimg.afreecatv.com/.*$', + 'uploader': 'dailyapril', + 'uploader_id': 'dailyapril', + }, + 'playlist_count': 2, + 'playlist': [{ + 'md5': 'd8b7c174568da61d774ef0203159bf97', + 'info_dict': { + 'id': '36153164_1', + 'ext': 'mp4', + 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", + 'upload_date': '20160502', + }, + }, { + 'md5': '58f2ce7f6044e34439ab2d50612ab02b', + 'info_dict': { + 'id': '36153164_2', + 'ext': 'mp4', + 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", + 'upload_date': '20160502', + }, + }], + }] @staticmethod def parse_video_key(key): From 0fdbe3146c2b3825cc26aca7e918df041b0f9adf Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 08:56:22 +0900 Subject: [PATCH 06/92] use dict.get in case upload_date does not exist --- youtube_dl/extractor/afreecatv.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index aa5847677..4ebc61bae 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -66,7 +66,7 @@ class AfreecaTVIE(InfoExtractor): @staticmethod def parse_video_key(key): - video_key = {'upload_date': None, 'part': '0'} + video_key = {} m = re.match(r'^(?P\d{8})_\w+_(?P\d+)$', key) if m: video_key['upload_date'] = m.group('upload_date') @@ -92,12 +92,12 @@ class AfreecaTVIE(InfoExtractor): thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') entries = [] - for video_file in video_xml.findall('./track/video/file'): + for i, video_file in enumerate(video_xml.findall('./track/video/file')): video_key = self.parse_video_key(video_file.get('key')) entries.append({ - 'id': '%s_%s' % (video_id, video_key['part']), + 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), 'title': title, - 'upload_date': video_key['upload_date'], + 'upload_date': video_key.get('upload_date'), 'duration': int_or_none(video_file.get('duration')), 'url': video_file.text, }) From 81f35fee2fd2b58d909887aaa7667310a4d65759 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 08:56:44 +0900 Subject: [PATCH 07/92] fix extractors.py import order --- youtube_dl/extractor/extractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f85d75933..1f95530a5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -16,8 +16,8 @@ from .adobetv import ( AdobeTVVideoIE, ) from .adultswim import AdultSwimIE -from .afreecatv import AfreecaTVIE from .aenetworks import AENetworksIE +from .afreecatv import AfreecaTVIE from .aftonbladet import AftonbladetIE from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE From 3452c3a27c2bfd278746314cda4247c2226a35f3 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 10:02:19 +0900 Subject: [PATCH 08/92] update tests --- youtube_dl/extractor/afreecatv.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 4ebc61bae..b90095881 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -30,7 +30,7 @@ class AfreecaTVIE(InfoExtractor): 'id': '36164052', 'ext': 'mp4', 'title': '데일리 에이프릴 요정들의 시상식!', - 'thumbnail': 're:^https?://videoimg.afreecatv.com/.*$', + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', 'uploader': 'dailyapril', 'uploader_id': 'dailyapril', 'upload_date': '20160503', @@ -40,7 +40,7 @@ class AfreecaTVIE(InfoExtractor): 'info_dict': { 'id': '36153164', 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", - 'thumbnail': 're:^https?://videoimg.afreecatv.com/.*$', + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', 'uploader': 'dailyapril', 'uploader_id': 'dailyapril', }, @@ -62,6 +62,9 @@ class AfreecaTVIE(InfoExtractor): 'upload_date': '20160502', }, }], + }, { + 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', + 'only_matching': True, }] @staticmethod From 370d4eb8ad3d9d092fc5eb116509eaf4a3e83177 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 10:02:48 +0900 Subject: [PATCH 09/92] use stricter file selector in case of empty in case of empty ./track/video/file entries --- youtube_dl/extractor/afreecatv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index b90095881..527386be3 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -95,7 +95,7 @@ class AfreecaTVIE(InfoExtractor): thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') entries = [] - for i, video_file in enumerate(video_xml.findall('./track/video/file')): + for i, video_file in enumerate(video_xml.findall('./track/video/file[@key]')): video_key = self.parse_video_key(video_file.get('key')) entries.append({ 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), @@ -119,7 +119,7 @@ class AfreecaTVIE(InfoExtractor): info['entries'] = entries elif len(entries) == 1: info['url'] = entries[0]['url'] - info['upload_date'] = entries[0]['upload_date'] + info['upload_date'] = entries[0].get('upload_date') else: raise ExtractorError( 'No files found for the specified AfreecaTV video, either' From 93fdb1417766015ddadcd13a709cdfae4de5e246 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 10:33:17 +0900 Subject: [PATCH 10/92] don't use selection by attribute --- youtube_dl/extractor/afreecatv.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 527386be3..0fcbea0d1 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -95,8 +95,10 @@ class AfreecaTVIE(InfoExtractor): thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') entries = [] - for i, video_file in enumerate(video_xml.findall('./track/video/file[@key]')): - video_key = self.parse_video_key(video_file.get('key')) + for i, video_file in enumerate(video_xml.findall('./track/video/file')): + video_key = self.parse_video_key(video_file.get('key', '')) + if not video_key: + continue entries.append({ 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), 'title': title, From e7d85c4ef7d2c74058d41ded1e2a6d6aa527dc9a Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Tue, 31 May 2016 17:28:49 +0900 Subject: [PATCH 11/92] use /track/video/file to determine if video exists --- youtube_dl/extractor/afreecatv.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 0fcbea0d1..518c61f67 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -11,6 +11,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + xpath_element, xpath_text, ) @@ -84,9 +85,10 @@ class AfreecaTVIE(InfoExtractor): path='/api/video/get_video_info.php')) video_xml = self._download_xml(info_url, video_id) - if xpath_text(video_xml, './track/flag', default='FAIL') != 'SUCCEED': + if xpath_element(video_xml, './track/video/file') is None: raise ExtractorError('Specified AfreecaTV video does not exist', expected=True) + title = xpath_text(video_xml, './track/title', 'title') uploader = xpath_text(video_xml, './track/nickname', 'uploader') uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id') From 7264e385912951167c27b40df5fd22010d594b12 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 8 Jun 2016 14:29:53 +0800 Subject: [PATCH 12/92] [bilibili] Fix for videos without upload time (closes #9710) --- youtube_dl/extractor/bilibili.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 910e539e4..b17047b39 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -102,6 +102,22 @@ class BiliBiliIE(InfoExtractor): 'uploader_id': '151066', }, }], + }, { + # Missing upload time + 'url': 'http://www.bilibili.com/video/av1867637/', + 'info_dict': { + 'id': '2880301', + 'ext': 'flv', + 'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】', + 'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】', + 'uploader': '黑夜为猫', + 'uploader_id': '610729', + }, + 'params': { + # Just to test metadata extraction + 'skip_download': True, + }, + 'expected_warnings': ['upload time'], }] # BiliBili blocks keys from time to time. The current key is extracted from @@ -172,6 +188,7 @@ class BiliBiliIE(InfoExtractor): description = self._html_search_meta('description', webpage) datetime_str = self._html_search_regex( r']+datetime="([^"]+)"', webpage, 'upload time', fatal=False) + timestamp = None if datetime_str: timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple()) From 50ce1c331c736d8219f3bf631ff069b9aecc48e3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 8 Jun 2016 14:43:52 +0800 Subject: [PATCH 13/92] [downloader/external] Add another env for proxies in ffmpeg/avconv Related sources: https://git.libav.org/?p=libav.git;a=blob;f=libavformat/http.c;h=8fe8d11e1edfdbb04a8726db2c49cfef3f572aac;hb=HEAD#l152 https://git.libav.org/?p=libav.git;a=blob;f=libavformat/tls.c;h=fab243e93e20034e88e619188c13a44a5d8ccdb9;hb=HEAD#l63 https://github.com/FFmpeg/FFmpeg/blob/f8e89d8/libavformat/http.c#L191 https://github.com/FFmpeg/FFmpeg/blob/f8e89d8/libavformat/tls.c#L92 --- youtube_dl/downloader/external.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 3a73cee1c..3ff1f9ed4 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -210,6 +210,7 @@ class FFmpegFD(ExternalFD): # args += ['-http_proxy', proxy] env = os.environ.copy() compat_setenv('HTTP_PROXY', proxy, env=env) + compat_setenv('http_proxy', proxy, env=env) protocol = info_dict.get('protocol') From 22a0a95247c30b346592b6e3d464776bceb3b934 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 8 Jun 2016 20:47:39 +0800 Subject: [PATCH 14/92] [theplatform] Some NBC videos require an additional cookie Related: #9578 --- youtube_dl/extractor/theplatform.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 02dbef913..5793ec6ef 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -14,11 +14,13 @@ from ..compat import ( compat_urllib_parse_urlparse, ) from ..utils import ( + determine_ext, ExtractorError, float_or_none, int_or_none, sanitized_Request, unsmuggle_url, + update_url_query, xpath_with_ns, mimetype2ext, find_xpath_attr, @@ -48,6 +50,12 @@ class ThePlatformBaseIE(OnceIE): if OnceIE.suitable(_format['url']): formats.extend(self._extract_once_formats(_format['url'])) else: + media_url = _format['url'] + if determine_ext(media_url) == 'm3u8': + hdnea2 = self._get_cookies(media_url).get('hdnea2') + if hdnea2: + _format['url'] = update_url_query(media_url, {'hdnea3': hdnea2.value}) + formats.append(_format) subtitles = self._parse_smil_subtitles(meta, default_ns) From e6e90515db983ca447cf7a59bbc153907d4fff4a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 8 Jun 2016 20:50:01 +0800 Subject: [PATCH 15/92] [nbc] Add the test case from #9578 Closes #9578 --- youtube_dl/extractor/nbc.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 46504cd5f..f27c7f139 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -67,6 +67,23 @@ class NBCIE(InfoExtractor): # This video has expired but with an escaped embedURL 'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515', 'only_matching': True, + }, + { + # HLS streams requires the 'hdnea3' cookie + 'url': 'http://www.nbc.com/Kings/video/goliath/n1806', + 'info_dict': { + 'id': 'n1806', + 'ext': 'mp4', + 'title': 'Goliath', + 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.', + 'timestamp': 1237100400, + 'upload_date': '20090315', + 'uploader': 'NBCU-COM', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Only works from US', } ] From fc0a45fa416ad3e3ecf5936061efbb0328afa6b5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 8 Jun 2016 21:12:14 +0800 Subject: [PATCH 16/92] [twitter] Detect suspended accounts and update _TESTS --- youtube_dl/extractor/twitter.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index ea673054f..129103c64 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -207,6 +207,7 @@ class TwitterIE(InfoExtractor): 'uploader_id': 'giphz', }, 'expected_warnings': ['height', 'width'], + 'skip': 'Account suspended', }, { 'url': 'https://twitter.com/starwars/status/665052190608723968', 'md5': '39b7199856dee6cd4432e72c74bc69d4', @@ -278,7 +279,11 @@ class TwitterIE(InfoExtractor): user_id = mobj.group('user_id') twid = mobj.group('id') - webpage = self._download_webpage(self._TEMPLATE_URL % (user_id, twid), twid) + webpage, urlh = self._download_webpage_handle( + self._TEMPLATE_URL % (user_id, twid), twid) + + if 'twitter.com/account/suspended' in urlh.geturl(): + raise ExtractorError('Account suspended by Twitter.', expected=True) username = remove_end(self._og_search_title(webpage), ' on Twitter') From c6308b3153acc57300f750f0061c63ffcba4d150 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 8 Jun 2016 21:28:10 +0800 Subject: [PATCH 17/92] [twitter] Fix extraction for videos with HLS streams Closes #9623 --- youtube_dl/extractor/twitter.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 129103c64..76421e533 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, float_or_none, xpath_text, remove_end, @@ -116,13 +117,16 @@ class TwitterCardIE(TwitterBaseIE): video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source') if video_url: - f = { - 'url': video_url, - } + if determine_ext(video_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls')) + else: + f = { + 'url': video_url, + } - _search_dimensions_in_video_url(f, video_url) + _search_dimensions_in_video_url(f, video_url) - formats.append(f) + formats.append(f) vmap_url = config.get('vmapUrl') or config.get('vmap_url') if vmap_url: @@ -263,7 +267,6 @@ class TwitterIE(InfoExtractor): 'add_ie': ['Vine'], }, { 'url': 'https://twitter.com/captainamerica/status/719944021058060289', - # md5 constantly changes 'info_dict': { 'id': '719944021058060289', 'ext': 'mp4', @@ -272,6 +275,9 @@ class TwitterIE(InfoExtractor): 'uploader_id': 'captainamerica', 'uploader': 'Captain America', }, + 'params': { + 'skip_download': True, # requires ffmpeg + }, }] def _real_extract(self, url): From 6da8d7de69af144a96e9e50168e66f66af54129f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 8 Jun 2016 21:48:12 +0800 Subject: [PATCH 18/92] [twitter] Update _TESTS --- youtube_dl/extractor/twitter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 76421e533..b73842986 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -53,7 +53,7 @@ class TwitterCardIE(TwitterBaseIE): 'id': 'dq4Oj5quskI', 'ext': 'mp4', 'title': 'Ubuntu 11.10 Overview', - 'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/', + 'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10...', 'upload_date': '20111013', 'uploader': 'OMG! Ubuntu!', 'uploader_id': 'omgubuntu', @@ -244,10 +244,10 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'jay - BEAT PROD: @suhmeduh #Damndaniel', - 'description': 'jay on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', + 'title': 'Donte The Dumbass - BEAT PROD: @suhmeduh #Damndaniel', + 'description': 'Donte The Dumbass on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', 'thumbnail': 're:^https?://.*\.jpg', - 'uploader': 'jay', + 'uploader': 'Donte The Dumbass', 'uploader_id': 'jaydingeer', }, 'params': { From 411c590a1f997f9efd71be8f434821acbf33a35f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 8 Jun 2016 23:45:46 +0800 Subject: [PATCH 19/92] [youku:show] Add new extractor --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/youku.py | 52 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d107080f5..676a0400c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1013,7 +1013,10 @@ from .yesjapan import YesJapanIE from .yinyuetai import YinYueTaiIE from .ynet import YnetIE from .youjizz import YouJizzIE -from .youku import YoukuIE +from .youku import ( + YoukuIE, + YoukuShowIE, +) from .youporn import YouPornIE from .yourupload import YourUploadIE from .youtube import ( diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index dbccbe228..147608ebe 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -2,7 +2,9 @@ from __future__ import unicode_literals import base64 +import itertools import random +import re import string import time @@ -13,6 +15,7 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + get_element_by_attribute, sanitized_Request, ) @@ -285,3 +288,52 @@ class YoukuIE(InfoExtractor): 'title': title, 'entries': entries, } + + +class YoukuShowIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?youku\.com/show_page/id_(?P[0-9a-z]+)\.html' + IE_NAME = 'youku:show' + + _TEST = { + 'url': 'http://www.youku.com/show_page/id_zc7c670be07ff11e48b3f.html', + 'info_dict': { + 'id': 'zc7c670be07ff11e48b3f', + 'title': '花千骨 未删减版', + 'description': 'md5:578d4f2145ae3f9128d9d4d863312910', + }, + 'playlist_count': 50, + } + + _PAGE_SIZE = 40 + + def _find_videos_in_page(self, webpage): + videos = re.findall( + r'
  • ]+href="(?Phttps?://v\.youku\.com/[^"]+)"[^>]+title="(?P[^"]+)"', webpage) + return [ + self.url_result(video_url, YoukuIE.ie_key(), title) + for video_url, title in videos] + + def _real_extract(self, url): + show_id = self._match_id(url) + webpage = self._download_webpage(url, show_id) + + entries = self._find_videos_in_page(webpage) + + playlist_title = self._html_search_regex( + r'<span[^>]+class="name">([^<]+)</span>', webpage, 'playlist title', fatal=False) + detail_div = get_element_by_attribute('class', 'detail', webpage) or '' + playlist_description = self._html_search_regex( + r'<span[^>]+style="display:none"[^>]*>([^<]+)</span>', + detail_div, 'playlist description', fatal=False) + + for idx in itertools.count(1): + episodes_page = self._download_webpage( + 'http://www.youku.com/show_episode/id_%s.html' % show_id, + show_id, query={'divid': 'reload_%d' % (idx * self._PAGE_SIZE + 1)}, + note='Downloading episodes page %d' % idx) + new_entries = self._find_videos_in_page(episodes_page) + entries.extend(new_entries) + if len(new_entries) < self._PAGE_SIZE: + break + + return self.playlist_result(entries, show_id, playlist_title, playlist_description) From 11380753b5aa9d8128ef28a968ab325973276fa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:00:47 +0700 Subject: [PATCH 20/92] [vessel] Add support for embed urls and improve extraction --- youtube_dl/extractor/vessel.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index 1a0ff3395..e027c018b 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import json +import re from .common import InfoExtractor from ..utils import ( @@ -12,11 +13,11 @@ from ..utils import ( class VesselIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)' _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s' _LOGIN_URL = 'https://www.vessel.com/api/account/login' _NETRC_MACHINE = 'vessel' - _TEST = { + _TESTS = [{ 'url': 'https://www.vessel.com/videos/HDN7G5UMs', 'md5': '455cdf8beb71c6dd797fd2f3818d05c4', 'info_dict': { @@ -28,7 +29,16 @@ class VesselIE(InfoExtractor): 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?', 'timestamp': int, }, - } + }, { + 'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return [url for _, url in re.findall( + r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z]+.*?)\1', + webpage)] @staticmethod def make_json_request(url, data): @@ -98,16 +108,19 @@ class VesselIE(InfoExtractor): formats = [] for f in video_asset.get('sources', []): - if f['name'] == 'hls-index': + location = f.get('location') + if not location: + continue + if f.get('name') == 'hls-index': formats.extend(self._extract_m3u8_formats( - f['location'], video_id, ext='mp4', m3u8_id='m3u8')) + location, video_id, ext='mp4', m3u8_id='m3u8')) else: formats.append({ - 'format_id': f['name'], + 'format_id': f.get('name'), 'tbr': f.get('bitrate'), 'height': f.get('height'), 'width': f.get('width'), - 'url': f['location'], + 'url': location, }) self._sort_formats(formats) From 48a5eabc487058ccaa1076b74ad9106fc6019955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:02:27 +0700 Subject: [PATCH 21/92] [extractor/generic] Add support vessel embeds (Closes #7083) --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b4138381d..90575ab0e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -63,6 +63,7 @@ from .instagram import InstagramIE from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE from .theplatform import ThePlatformIE +from .vessel import VesselIE class GenericIE(InfoExtractor): @@ -1533,6 +1534,11 @@ class GenericIE(InfoExtractor): if tp_urls: return _playlist_from_matches(tp_urls, ie='ThePlatform') + # Look for Vessel embeds + vessel_urls = VesselIE._extract_urls(webpage) + if vessel_urls: + return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key()) + # Look for embedded rtl.nl player matches = re.findall( r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', From a479b8f687245a9cb1b5c25ed9ece28c4710981f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:09:32 +0700 Subject: [PATCH 22/92] [vessel] Use native hls by default --- youtube_dl/extractor/vessel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index e027c018b..59f2b4ba4 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -113,7 +113,8 @@ class VesselIE(InfoExtractor): continue if f.get('name') == 'hls-index': formats.extend(self._extract_m3u8_formats( - location, video_id, ext='mp4', m3u8_id='m3u8')) + location, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='m3u8')) else: formats.append({ 'format_id': f.get('name'), From 39da509f6712b6b0e9d52a9c9e990a5b5cd6c2ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:12:48 +0700 Subject: [PATCH 23/92] [vessel] Extract DASH formats --- youtube_dl/extractor/vessel.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index 59f2b4ba4..c53f44584 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -111,13 +111,17 @@ class VesselIE(InfoExtractor): location = f.get('location') if not location: continue - if f.get('name') == 'hls-index': + name = f.get('name') + if name == 'hls-index': formats.extend(self._extract_m3u8_formats( location, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='m3u8')) + elif name == 'dash-index': + formats.extend(self._extract_mpd_formats( + location, video_id, mpd_id='dash', fatal=False)) else: formats.append({ - 'format_id': f.get('name'), + 'format_id': name, 'tbr': f.get('bitrate'), 'height': f.get('height'), 'width': f.get('width'), From 9d51a0a9a19f07997cfb3ff1bb9fc9c1669a455c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:13:38 +0700 Subject: [PATCH 24/92] [vessel] Make hls formats non fatal --- youtube_dl/extractor/vessel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index c53f44584..2cd617b91 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -115,7 +115,7 @@ class VesselIE(InfoExtractor): if name == 'hls-index': formats.extend(self._extract_m3u8_formats( location, video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='m3u8')) + entry_protocol='m3u8_native', m3u8_id='m3u8', fatal=False)) elif name == 'dash-index': formats.extend(self._extract_mpd_formats( location, video_id, mpd_id='dash', fatal=False)) From be6217b26142491232fb697b125015d45437832d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 05:34:19 +0700 Subject: [PATCH 25/92] [YoutubeDL] Force string conversion on non string video ids --- youtube_dl/YoutubeDL.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 3917ca9dc..5036289b0 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1223,6 +1223,10 @@ class YoutubeDL(object): if 'title' not in info_dict: raise ExtractorError('Missing "title" field in extractor result') + if not isinstance(info_dict['id'], compat_str): + self.report_warning('"id" field is not a string - forcing string conversion') + info_dict['id'] = compat_str(info_dict['id']) + if 'playlist' not in info_dict: # It isn't part of a playlist info_dict['playlist'] = None From 6c33d24b46ecfb1f2ce790e21f2410149fdfb095 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 12:58:24 +0800 Subject: [PATCH 26/92] [utils] Add audio/mpeg to mimetype2ext() Used in WDR live radios (#6147) --- youtube_dl/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 89234b39d..229de4b39 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2020,6 +2020,9 @@ def mimetype2ext(mt): ext = { 'audio/mp4': 'm4a', + # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as + # it's the most popular one + 'audio/mpeg': 'mp3', }.get(mt) if ext is not None: return ext From 50918c4ee01be6c1218a72bef35838216b2bf8d1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 13:04:30 +0800 Subject: [PATCH 27/92] [wdr] Support radio players (closes #6147) --- youtube_dl/extractor/wdr.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 6174eb19f..059e2aa08 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -10,12 +10,13 @@ from ..utils import ( strip_jsonp, unified_strdate, ExtractorError, + urlhandle_detect_ext, ) class WDRIE(InfoExtractor): _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' - _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' + _PAGE_REGEX = r'/(?:mediathek/)?(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL _TESTS = [ @@ -97,6 +98,16 @@ class WDRIE(InfoExtractor): 'description': '- Die Sendung mit der Maus -', }, }, + { + 'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html', + 'info_dict': { + 'id': 'mdb-869971', + 'ext': 'mp3', + 'title': 'Funkhaus Europa Livestream', + 'description': 'md5:2309992a6716c347891c045be50992e4', + 'upload_date': '20160101', + }, + } ] def _real_extract(self, url): @@ -107,9 +118,10 @@ class WDRIE(InfoExtractor): webpage = self._download_webpage(url, display_id) # for wdr.de the data-extension is in a tag with the class "mediaLink" + # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" # for wdrmaus its in a link to the page in a multiline "videoLink"-tag json_metadata = self._html_search_regex( - r'class=(?:"mediaLink\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', + r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', webpage, 'media link', default=None, flags=re.MULTILINE) if not json_metadata: @@ -143,15 +155,22 @@ class WDRIE(InfoExtractor): for tag_name in ['videoURL', 'audioURL']: if tag_name in metadata_media_alt: alt_url = metadata_media_alt[tag_name] - if determine_ext(alt_url) == 'm3u8': + ext = determine_ext(alt_url) + if ext == 'm3u8': m3u_fmt = self._extract_m3u8_formats( alt_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls') formats.extend(m3u_fmt) else: - formats.append({ + a_format = { 'url': alt_url - }) + } + if ext == 'unknown_video': + urlh = self._request_webpage( + alt_url, display_id, note='Determining extension') + ext = urlhandle_detect_ext(urlh) + a_format['ext'] = ext + formats.append(a_format) # check if there are flash-streams for this video if 'dflt' in metadata_media_resource and 'videoURL' in metadata_media_resource['dflt']: From 6869d634c6d7482dd53034dec8a8f2f0b8e1f9b0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 13:41:12 +0800 Subject: [PATCH 28/92] [wdr] Simplify extraction --- youtube_dl/extractor/wdr.py | 64 ++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 059e2aa08..88369d3f2 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -10,6 +10,7 @@ from ..utils import ( strip_jsonp, unified_strdate, ExtractorError, + update_url_query, urlhandle_detect_ext, ) @@ -100,9 +101,10 @@ class WDRIE(InfoExtractor): }, { 'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html', + # Live stream, MD5 unstable 'info_dict': { 'id': 'mdb-869971', - 'ext': 'mp3', + 'ext': 'flv', 'title': 'Funkhaus Europa Livestream', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20160101', @@ -150,36 +152,38 @@ class WDRIE(InfoExtractor): formats = [] # check if the metadata contains a direct URL to a file - metadata_media_alt = metadata_media_resource.get('alt') - if metadata_media_alt: - for tag_name in ['videoURL', 'audioURL']: - if tag_name in metadata_media_alt: - alt_url = metadata_media_alt[tag_name] - ext = determine_ext(alt_url) - if ext == 'm3u8': - m3u_fmt = self._extract_m3u8_formats( - alt_url, display_id, 'mp4', 'm3u8_native', - m3u8_id='hls') - formats.extend(m3u_fmt) - else: - a_format = { - 'url': alt_url - } - if ext == 'unknown_video': - urlh = self._request_webpage( - alt_url, display_id, note='Determining extension') - ext = urlhandle_detect_ext(urlh) - a_format['ext'] = ext - formats.append(a_format) + for kind, media_resource in metadata_media_resource.items(): + if kind not in ('dflt', 'alt'): + continue - # check if there are flash-streams for this video - if 'dflt' in metadata_media_resource and 'videoURL' in metadata_media_resource['dflt']: - video_url = metadata_media_resource['dflt']['videoURL'] - if video_url.endswith('.f4m'): - full_video_url = video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' - formats.extend(self._extract_f4m_formats(full_video_url, display_id, f4m_id='hds', fatal=False)) - elif video_url.endswith('.smil'): - formats.extend(self._extract_smil_formats(video_url, 'stream', fatal=False)) + for tag_name, medium_url in media_resource.items(): + if tag_name not in ('videoURL', 'audioURL'): + continue + + ext = determine_ext(medium_url) + if ext == 'm3u8': + m3u_fmt = self._extract_m3u8_formats( + medium_url, display_id, 'mp4', 'm3u8_native', + m3u8_id='hls') + formats.extend(m3u_fmt) + elif ext == 'f4m': + manifest_url = update_url_query( + medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'}) + formats.extend(self._extract_f4m_formats( + manifest_url, display_id, f4m_id='hds', fatal=False)) + elif ext == 'smil': + formats.extend(self._extract_smil_formats( + medium_url, 'stream', fatal=False)) + else: + a_format = { + 'url': medium_url + } + if ext == 'unknown_video': + urlh = self._request_webpage( + medium_url, display_id, note='Determining extension') + ext = urlhandle_detect_ext(urlh) + a_format['ext'] = ext + formats.append(a_format) subtitles = {} caption_url = metadata_media_resource.get('captionURL') From 1594a4932f7e94287c32b5d4d63a60b57ffee96a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 13:49:35 +0800 Subject: [PATCH 29/92] [wdr] Misc changes --- youtube_dl/extractor/wdr.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 88369d3f2..a9238cbeb 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -6,10 +6,10 @@ import re from .common import InfoExtractor from ..utils import ( determine_ext, + ExtractorError, js_to_json, strip_jsonp, unified_strdate, - ExtractorError, update_url_query, urlhandle_detect_ext, ) @@ -17,7 +17,7 @@ from ..utils import ( class WDRIE(InfoExtractor): _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' - _PAGE_REGEX = r'/(?:mediathek/)?(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' + _PAGE_REGEX = r'/(?:mediathek/)?[^/]+/(?P<type>[^/]+)/(?P<display_id>.+)\.html' _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL _TESTS = [ @@ -162,10 +162,9 @@ class WDRIE(InfoExtractor): ext = determine_ext(medium_url) if ext == 'm3u8': - m3u_fmt = self._extract_m3u8_formats( + formats.extend(self._extract_m3u8_formats( medium_url, display_id, 'mp4', 'm3u8_native', - m3u8_id='hls') - formats.extend(m3u_fmt) + m3u8_id='hls')) elif ext == 'f4m': manifest_url = update_url_query( medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'}) @@ -185,6 +184,8 @@ class WDRIE(InfoExtractor): a_format['ext'] = ext formats.append(a_format) + self._sort_formats(formats) + subtitles = {} caption_url = metadata_media_resource.get('captionURL') if caption_url: @@ -206,8 +207,6 @@ class WDRIE(InfoExtractor): if upload_date: upload_date = unified_strdate(upload_date) - self._sort_formats(formats) - return { 'id': metadata_tracker_data.get('trackerClipId', display_id), 'display_id': display_id, From e2713d32f49f1bfa830cc755a96691c39da88290 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 19:00:13 +0800 Subject: [PATCH 30/92] [openload] Fix extraction. Thanks @perron375 for the solution Closes #9706 --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 5049b870e..1b57462b5 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -100,7 +100,7 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True) code = self._search_regex( - r'</video>\s*</div>\s*<script[^>]+>([^<]+)</script>', + r'</video>\s*</div>\s*<script[^>]+>[^>]+</script>\s*<script[^>]+>([^<]+)</script>', webpage, 'JS code') decoded = self.openload_decode(code) From 21efee5f8bc8daf0cbb5fc3408a1fc5b9d5eadcb Mon Sep 17 00:00:00 2001 From: N1k145 <N1k145@users.noreply.github.com> Date: Thu, 9 Jun 2016 12:13:15 +0200 Subject: [PATCH 31/92] [openload] Relax _VALID_URL [openload] added to _TESTS, removed escape --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 1b57462b5..6415b8fdc 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -14,7 +14,7 @@ from ..utils import ( class OpenloadIE(InfoExtractor): - _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-]+)' + _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -31,6 +31,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://openload.io/f/ZAn6oz-VZGE/', 'only_matching': True, + }, { + 'url': 'https://openload.co/f/_-ztPaZtMhM/', + 'only_matching': True, }, { # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout # for title and ext From bb1e44cc8ee7937422fb5635f3431feb6d5fd918 Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Fri, 27 May 2016 13:37:40 +0200 Subject: [PATCH 32/92] [godtv] Add extractor [GodTV] Improvements --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/godtv.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 youtube_dl/extractor/godtv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index aa98782a5..40dcfcde3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -292,6 +292,7 @@ from .globo import ( GloboArticleIE, ) from .godtube import GodTubeIE +from .godtv import GodTVIE from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE from .googledrive import GoogleDriveIE diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py new file mode 100644 index 000000000..50f093ace --- /dev/null +++ b/youtube_dl/extractor/godtv.py @@ -0,0 +1,29 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .ooyala import OoyalaIE + + +class GodTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)+/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham', + 'info_dict': { + 'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3', + 'ext': 'mp4', + 'title': 'Randy Needham', + 'duration': 3615.08, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + ooyala_id = self._search_regex(r'"content_id"\s*:\s*"([\w-]{32})"', webpage, display_id) + + return OoyalaIE._build_url_result(ooyala_id) From c0fed3bda50f77d063f3817cfbc3d8b81c18afa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 21:29:41 +0700 Subject: [PATCH 33/92] [godtv] Improve and add support for playlists (Closes #9608) --- youtube_dl/extractor/godtv.py | 36 +++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py index 50f093ace..78d638cf0 100644 --- a/youtube_dl/extractor/godtv.py +++ b/youtube_dl/extractor/godtv.py @@ -1,13 +1,13 @@ -# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor from .ooyala import OoyalaIE +from ..utils import js_to_json class GodTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)+/(?P<id>[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham', 'info_dict': { 'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3', @@ -18,12 +18,40 @@ class GodTVIE(InfoExtractor): 'params': { 'skip_download': True, } - } + }, { + 'url': 'http://god.tv/playlist/bible-study', + 'info_dict': { + 'id': 'bible-study', + }, + 'playlist_mincount': 37, + }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - ooyala_id = self._search_regex(r'"content_id"\s*:\s*"([\w-]{32})"', webpage, display_id) + + settings = self._parse_json( + self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'settings', default='{}'), + display_id, transform_source=js_to_json, fatal=False) + + ooyala_id = None + + if settings: + playlist = settings.get('playlist') + if playlist and isinstance(playlist, list): + entries = [ + OoyalaIE._build_url_result(video['content_id']) + for video in playlist if video.get('content_id')] + if entries: + return self.playlist_result(entries, display_id) + ooyala_id = settings.get('ooyala', {}).get('content_id') + + if not ooyala_id: + ooyala_id = self._search_regex( + r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1', + webpage, 'ooyala id', group='id') return OoyalaIE._build_url_result(ooyala_id) From 416878f41f3b33cf1b10b0b30093dcd7a90bdbfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 21:33:51 +0700 Subject: [PATCH 34/92] [godtv] Add more tests --- youtube_dl/extractor/godtv.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py index 78d638cf0..7deca00aa 100644 --- a/youtube_dl/extractor/godtv.py +++ b/youtube_dl/extractor/godtv.py @@ -24,6 +24,12 @@ class GodTVIE(InfoExtractor): 'id': 'bible-study', }, 'playlist_mincount': 37, + }, { + 'url': 'http://god.tv/node/15097', + 'only_matching': True, + }, { + 'url': 'http://god.tv/live/africa', + 'only_matching': True, }] def _real_extract(self, url): From b0aebe702c538010fd92cd0807963293f112adcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 21:34:47 +0700 Subject: [PATCH 35/92] [godtv] Relax _VALID_URL --- youtube_dl/extractor/godtv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py index 7deca00aa..c5d3b4e6a 100644 --- a/youtube_dl/extractor/godtv.py +++ b/youtube_dl/extractor/godtv.py @@ -6,7 +6,7 @@ from ..utils import js_to_json class GodTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)+/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham', 'info_dict': { @@ -30,6 +30,9 @@ class GodTVIE(InfoExtractor): }, { 'url': 'http://god.tv/live/africa', 'only_matching': True, + }, { + 'url': 'http://god.tv/liveevents', + 'only_matching': True, }] def _real_extract(self, url): From bc7e7adf5154f15b74b2df3e2989f630667778ce Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 21:40:16 +0800 Subject: [PATCH 36/92] [wdr] Subtitles are TTML --- youtube_dl/extractor/wdr.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index a9238cbeb..6b83a2a04 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -34,7 +34,8 @@ class WDRIE(InfoExtractor): 'description': 'md5:87be8ff14d8dfd7a7ee46f0299b52318', 'is_live': False, 'subtitles': {'de': [{ - 'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml' + 'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml', + 'ext': 'ttml', }]}, }, }, @@ -190,7 +191,8 @@ class WDRIE(InfoExtractor): caption_url = metadata_media_resource.get('captionURL') if caption_url: subtitles['de'] = [{ - 'url': caption_url + 'url': caption_url, + 'ext': 'ttml', }] title = metadata_tracker_data.get('trackerClipTitle') From 55290788d352168844c8e64d64428a76baa63eea Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 12:28:09 +0800 Subject: [PATCH 37/92] [yahoo] Yahoo doesn't like region names in lower cases Fix test_Yahoo_7 --- youtube_dl/extractor/yahoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b376f2b93..927a964a4 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -343,7 +343,7 @@ class YahooIE(InfoExtractor): webpage, 'region', fatal=False, default='US') data = compat_urllib_parse_urlencode({ 'protocol': 'http', - 'region': region, + 'region': region.upper(), }) query_url = ( 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' From 506d0e96936f84c2b21c7ed37f4a7fca2eec86a2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 12:29:58 +0800 Subject: [PATCH 38/92] [xuite] Skip the invalid test --- youtube_dl/extractor/xuite.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index 2466410fa..0be8932ad 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -66,6 +66,7 @@ class XuiteIE(InfoExtractor): 'uploader_id': '242127761', 'categories': ['電玩動漫'], }, + 'skip': 'Video removed', }, { 'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9', 'only_matching': True, From 436214baf70c1a50fbaf1fbfca4b48f33695590c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 12:31:06 +0800 Subject: [PATCH 39/92] [xfileshare] Skip an invalid test --- youtube_dl/extractor/xfileshare.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 769003735..ee4d04c20 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -62,7 +62,8 @@ class XFileShareIE(InfoExtractor): 'ext': 'mp4', 'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4', 'thumbnail': 're:http://.*\.jpg', - } + }, + 'skip': 'Video removed', }, { 'url': 'http://vidto.me/ku5glz52nqe1.html', 'info_dict': { From e1e0a10c567e8457bf83f6b54e65963447e17a8f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 12:33:31 +0800 Subject: [PATCH 40/92] [weibo] Remove the extractor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Weibo weishipin (微視頻, tiny videos) service is dead and now all videos are hosted on Sina videos, which is covered by sina.py --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/weibo.py | 49 ------------------------------ 2 files changed, 50 deletions(-) delete mode 100644 youtube_dl/extractor/weibo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 40dcfcde3..0789e4a6e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -974,7 +974,6 @@ from .webofstories import ( WebOfStoriesIE, WebOfStoriesPlaylistIE, ) -from .weibo import WeiboIE from .weiqitv import WeiqiTVIE from .wimp import WimpIE from .wistia import WistiaIE diff --git a/youtube_dl/extractor/weibo.py b/youtube_dl/extractor/weibo.py deleted file mode 100644 index 20bb039d3..000000000 --- a/youtube_dl/extractor/weibo.py +++ /dev/null @@ -1,49 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class WeiboIE(InfoExtractor): - """ - The videos in Weibo come from different sites, this IE just finds the link - to the external video and returns it. - """ - _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm' - - _TEST = { - 'url': 'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm', - 'info_dict': { - 'id': '98322879', - 'ext': 'flv', - 'title': '魔声耳机最新广告“All Eyes On Us”', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Sina'], - } - - # Additional example videos from different sites - # Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm - # 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) - video_id = mobj.group('id') - info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id - info = self._download_json(info_url, video_id) - - videos_urls = map(lambda v: v['play_page_url'], info['result']['data']) - # Prefer sina video since they have thumbnails - videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u) - player_url = videos_urls[-1] - m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html', - player_url) - if m_sina is not None: - self.to_screen('Sina video detected') - sina_id = m_sina.group(1) - player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id - return self.url_result(player_url) From 3e74b444e7324fdda956aa816240b938eabf9c93 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 13:13:59 +0800 Subject: [PATCH 41/92] [vulture] Remove the extractor The first 10 URLs in google search "site:http://video.vulture.com/video" is dead. I guess Vulture does not host videos on their own anymore. --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/vulture.py | 69 ------------------------------ 2 files changed, 70 deletions(-) delete mode 100644 youtube_dl/extractor/vulture.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0789e4a6e..38708294a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -958,7 +958,6 @@ from .vporn import VpornIE from .vrt import VRTIE from .vube import VubeIE from .vuclip import VuClipIE -from .vulture import VultureIE from .walla import WallaIE from .washingtonpost import ( WashingtonPostIE, diff --git a/youtube_dl/extractor/vulture.py b/youtube_dl/extractor/vulture.py deleted file mode 100644 index faa167e65..000000000 --- a/youtube_dl/extractor/vulture.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import unicode_literals - -import json -import os.path -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_iso8601, -) - - -class VultureIE(InfoExtractor): - IE_NAME = 'vulture.com' - _VALID_URL = r'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/' - _TEST = { - 'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1', - 'md5': '8d997845642a2b5152820f7257871bc8', - 'info_dict': { - 'id': '6GHRQL3RV7MSD1H4', - 'ext': 'mp4', - 'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED', - 'uploader_id': 'Sarah', - 'thumbnail': 're:^http://.*\.jpg$', - 'timestamp': 1401288564, - 'upload_date': '20140528', - 'description': 'Uplifting and witty, as predicted.', - 'duration': 1015, - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - query_string = self._search_regex( - r"queryString\s*=\s*'([^']+)'", webpage, 'query string') - video_id = self._search_regex( - r'content=([^&]+)', query_string, 'video ID') - query_url = 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string - - query_webpage = self._download_webpage( - query_url, display_id, note='Downloading query page') - params_json = self._search_regex( - r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n?,\n', - query_webpage, - 'player params') - params = json.loads(params_json) - - upload_timestamp = parse_iso8601(params['posted'].replace(' ', 'T')) - uploader_id = params.get('user', {}).get('handle') - - media_item = params['media_item'] - title = os.path.splitext(media_item['title'])[0] - duration = int_or_none(media_item.get('duration_seconds')) - - return { - 'id': video_id, - 'display_id': display_id, - 'url': media_item['pipeline_xid'], - 'title': title, - 'timestamp': upload_timestamp, - 'thumbnail': params.get('thumbnail_url'), - 'uploader_id': uploader_id, - 'description': params.get('description'), - 'duration': duration, - } From 5de008e8c3e4058c20956d19f69ac3347a2722e0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 13:31:55 +0800 Subject: [PATCH 42/92] [nbcnews] Support embed widgets Used in some Vulture videos --- youtube_dl/extractor/nbc.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index f27c7f139..6b7da1149 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -266,6 +266,11 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952', 'only_matching': True, }, + { + # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html + 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682', + 'only_matching': True, + }, ] def _real_extract(self, url): @@ -289,18 +294,17 @@ class NBCNewsIE(ThePlatformIE): webpage = self._download_webpage(url, display_id) info = None bootstrap_json = self._search_regex( - r'(?m)var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$', + [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', + r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'], webpage, 'bootstrap json', default=None) - if bootstrap_json: - bootstrap = self._parse_json(bootstrap_json, display_id) + bootstrap = self._parse_json( + bootstrap_json, display_id, transform_source=unescapeHTML) + if 'results' in bootstrap: info = bootstrap['results'][0]['video'] + elif 'video' in bootstrap: + info = bootstrap['video'] else: - player_instance_json = self._search_regex( - r'videoObj\s*:\s*({.+})', webpage, 'player instance', default=None) - if not player_instance_json: - player_instance_json = self._html_search_regex( - r'data-video="([^"]+)"', webpage, 'video json') - info = self._parse_json(player_instance_json, display_id) + info = bootstrap video_id = info['mpxId'] title = info['title'] From de3eb07ed64e3d50164a6db59385a94f2675b0b4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 13:32:59 +0800 Subject: [PATCH 43/92] [generic] Detect NBC News embeds --- youtube_dl/extractor/generic.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 90575ab0e..36a3d91fc 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1032,6 +1032,17 @@ class GenericIE(InfoExtractor): 'timestamp': 1389118457, }, }, + # NBC News embed + { + 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html', + 'md5': '1aa589c675898ae6d37a17913cf68d66', + 'info_dict': { + 'id': '701714499682', + 'ext': 'mp4', + 'title': 'PREVIEW: On Assignment: David Letterman', + 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.', + }, + }, # UDN embed { 'url': 'https://video.udn.com/news/300346', @@ -1966,6 +1977,12 @@ class GenericIE(InfoExtractor): if nbc_sports_url: return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') + # Look for NBC News embeds + nbc_news_embed_url = re.search( + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage) + if nbc_news_embed_url: + return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews') + # Look for Google Drive embeds google_drive_url = GoogleDriveIE._extract_url(webpage) if google_drive_url: From cc4444662c54c24f6f82efd3ba5e60e9556d88b8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 13:33:59 +0800 Subject: [PATCH 44/92] [generic] Remove Vulture embed detection Vulture.com videos now hosts on YouTube, Vimeo, MTV, NBC News or Hulu. Here's an example of Hulu: http://www.vulture.com/2016/06/kimmel-interviews-mariah-carey-in-a-bathtub.html --- youtube_dl/extractor/generic.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 36a3d91fc..798c109c6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1857,14 +1857,6 @@ class GenericIE(InfoExtractor): url = unescapeHTML(mobj.group('url')) return self.url_result(url) - # Look for embedded vulture.com player - mobj = re.search( - r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"', - webpage) - if mobj is not None: - url = unescapeHTML(mobj.group('url')) - return self.url_result(url, ie='Vulture') - # Look for embedded mtvservices player mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage) if mtvservices_url: From 9631a94fb5e5ee9b92135f938df00866535fc6c6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 15:05:24 +0800 Subject: [PATCH 45/92] [compat] Add compat_html_entities_html5 Used in tset_Vporn_1. Also Related to #9270 --- youtube_dl/compat.py | 2240 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 2239 insertions(+), 1 deletion(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index e3cab4dd0..0243949a4 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -63,6 +63,2244 @@ try: except ImportError: # Python 2 import htmlentitydefs as compat_html_entities +try: # Python >= 3.3 + from compat_html_entities import html as compat_html_entities_html5 +except ImportError: + # Copied from CPython 3.5.1 html/entities.py + compat_html_entities_html5 = { + 'Aacute': '\xc1', + 'aacute': '\xe1', + 'Aacute;': '\xc1', + 'aacute;': '\xe1', + 'Abreve;': '\u0102', + 'abreve;': '\u0103', + 'ac;': '\u223e', + 'acd;': '\u223f', + 'acE;': '\u223e\u0333', + 'Acirc': '\xc2', + 'acirc': '\xe2', + 'Acirc;': '\xc2', + 'acirc;': '\xe2', + 'acute': '\xb4', + 'acute;': '\xb4', + 'Acy;': '\u0410', + 'acy;': '\u0430', + 'AElig': '\xc6', + 'aelig': '\xe6', + 'AElig;': '\xc6', + 'aelig;': '\xe6', + 'af;': '\u2061', + 'Afr;': '\U0001d504', + 'afr;': '\U0001d51e', + 'Agrave': '\xc0', + 'agrave': '\xe0', + 'Agrave;': '\xc0', + 'agrave;': '\xe0', + 'alefsym;': '\u2135', + 'aleph;': '\u2135', + 'Alpha;': '\u0391', + 'alpha;': '\u03b1', + 'Amacr;': '\u0100', + 'amacr;': '\u0101', + 'amalg;': '\u2a3f', + 'AMP': '&', + 'amp': '&', + 'AMP;': '&', + 'amp;': '&', + 'And;': '\u2a53', + 'and;': '\u2227', + 'andand;': '\u2a55', + 'andd;': '\u2a5c', + 'andslope;': '\u2a58', + 'andv;': '\u2a5a', + 'ang;': '\u2220', + 'ange;': '\u29a4', + 'angle;': '\u2220', + 'angmsd;': '\u2221', + 'angmsdaa;': '\u29a8', + 'angmsdab;': '\u29a9', + 'angmsdac;': '\u29aa', + 'angmsdad;': '\u29ab', + 'angmsdae;': '\u29ac', + 'angmsdaf;': '\u29ad', + 'angmsdag;': '\u29ae', + 'angmsdah;': '\u29af', + 'angrt;': '\u221f', + 'angrtvb;': '\u22be', + 'angrtvbd;': '\u299d', + 'angsph;': '\u2222', + 'angst;': '\xc5', + 'angzarr;': '\u237c', + 'Aogon;': '\u0104', + 'aogon;': '\u0105', + 'Aopf;': '\U0001d538', + 'aopf;': '\U0001d552', + 'ap;': '\u2248', + 'apacir;': '\u2a6f', + 'apE;': '\u2a70', + 'ape;': '\u224a', + 'apid;': '\u224b', + 'apos;': "'", + 'ApplyFunction;': '\u2061', + 'approx;': '\u2248', + 'approxeq;': '\u224a', + 'Aring': '\xc5', + 'aring': '\xe5', + 'Aring;': '\xc5', + 'aring;': '\xe5', + 'Ascr;': '\U0001d49c', + 'ascr;': '\U0001d4b6', + 'Assign;': '\u2254', + 'ast;': '*', + 'asymp;': '\u2248', + 'asympeq;': '\u224d', + 'Atilde': '\xc3', + 'atilde': '\xe3', + 'Atilde;': '\xc3', + 'atilde;': '\xe3', + 'Auml': '\xc4', + 'auml': '\xe4', + 'Auml;': '\xc4', + 'auml;': '\xe4', + 'awconint;': '\u2233', + 'awint;': '\u2a11', + 'backcong;': '\u224c', + 'backepsilon;': '\u03f6', + 'backprime;': '\u2035', + 'backsim;': '\u223d', + 'backsimeq;': '\u22cd', + 'Backslash;': '\u2216', + 'Barv;': '\u2ae7', + 'barvee;': '\u22bd', + 'Barwed;': '\u2306', + 'barwed;': '\u2305', + 'barwedge;': '\u2305', + 'bbrk;': '\u23b5', + 'bbrktbrk;': '\u23b6', + 'bcong;': '\u224c', + 'Bcy;': '\u0411', + 'bcy;': '\u0431', + 'bdquo;': '\u201e', + 'becaus;': '\u2235', + 'Because;': '\u2235', + 'because;': '\u2235', + 'bemptyv;': '\u29b0', + 'bepsi;': '\u03f6', + 'bernou;': '\u212c', + 'Bernoullis;': '\u212c', + 'Beta;': '\u0392', + 'beta;': '\u03b2', + 'beth;': '\u2136', + 'between;': '\u226c', + 'Bfr;': '\U0001d505', + 'bfr;': '\U0001d51f', + 'bigcap;': '\u22c2', + 'bigcirc;': '\u25ef', + 'bigcup;': '\u22c3', + 'bigodot;': '\u2a00', + 'bigoplus;': '\u2a01', + 'bigotimes;': '\u2a02', + 'bigsqcup;': '\u2a06', + 'bigstar;': '\u2605', + 'bigtriangledown;': '\u25bd', + 'bigtriangleup;': '\u25b3', + 'biguplus;': '\u2a04', + 'bigvee;': '\u22c1', + 'bigwedge;': '\u22c0', + 'bkarow;': '\u290d', + 'blacklozenge;': '\u29eb', + 'blacksquare;': '\u25aa', + 'blacktriangle;': '\u25b4', + 'blacktriangledown;': '\u25be', + 'blacktriangleleft;': '\u25c2', + 'blacktriangleright;': '\u25b8', + 'blank;': '\u2423', + 'blk12;': '\u2592', + 'blk14;': '\u2591', + 'blk34;': '\u2593', + 'block;': '\u2588', + 'bne;': '=\u20e5', + 'bnequiv;': '\u2261\u20e5', + 'bNot;': '\u2aed', + 'bnot;': '\u2310', + 'Bopf;': '\U0001d539', + 'bopf;': '\U0001d553', + 'bot;': '\u22a5', + 'bottom;': '\u22a5', + 'bowtie;': '\u22c8', + 'boxbox;': '\u29c9', + 'boxDL;': '\u2557', + 'boxDl;': '\u2556', + 'boxdL;': '\u2555', + 'boxdl;': '\u2510', + 'boxDR;': '\u2554', + 'boxDr;': '\u2553', + 'boxdR;': '\u2552', + 'boxdr;': '\u250c', + 'boxH;': '\u2550', + 'boxh;': '\u2500', + 'boxHD;': '\u2566', + 'boxHd;': '\u2564', + 'boxhD;': '\u2565', + 'boxhd;': '\u252c', + 'boxHU;': '\u2569', + 'boxHu;': '\u2567', + 'boxhU;': '\u2568', + 'boxhu;': '\u2534', + 'boxminus;': '\u229f', + 'boxplus;': '\u229e', + 'boxtimes;': '\u22a0', + 'boxUL;': '\u255d', + 'boxUl;': '\u255c', + 'boxuL;': '\u255b', + 'boxul;': '\u2518', + 'boxUR;': '\u255a', + 'boxUr;': '\u2559', + 'boxuR;': '\u2558', + 'boxur;': '\u2514', + 'boxV;': '\u2551', + 'boxv;': '\u2502', + 'boxVH;': '\u256c', + 'boxVh;': '\u256b', + 'boxvH;': '\u256a', + 'boxvh;': '\u253c', + 'boxVL;': '\u2563', + 'boxVl;': '\u2562', + 'boxvL;': '\u2561', + 'boxvl;': '\u2524', + 'boxVR;': '\u2560', + 'boxVr;': '\u255f', + 'boxvR;': '\u255e', + 'boxvr;': '\u251c', + 'bprime;': '\u2035', + 'Breve;': '\u02d8', + 'breve;': '\u02d8', + 'brvbar': '\xa6', + 'brvbar;': '\xa6', + 'Bscr;': '\u212c', + 'bscr;': '\U0001d4b7', + 'bsemi;': '\u204f', + 'bsim;': '\u223d', + 'bsime;': '\u22cd', + 'bsol;': '\\', + 'bsolb;': '\u29c5', + 'bsolhsub;': '\u27c8', + 'bull;': '\u2022', + 'bullet;': '\u2022', + 'bump;': '\u224e', + 'bumpE;': '\u2aae', + 'bumpe;': '\u224f', + 'Bumpeq;': '\u224e', + 'bumpeq;': '\u224f', + 'Cacute;': '\u0106', + 'cacute;': '\u0107', + 'Cap;': '\u22d2', + 'cap;': '\u2229', + 'capand;': '\u2a44', + 'capbrcup;': '\u2a49', + 'capcap;': '\u2a4b', + 'capcup;': '\u2a47', + 'capdot;': '\u2a40', + 'CapitalDifferentialD;': '\u2145', + 'caps;': '\u2229\ufe00', + 'caret;': '\u2041', + 'caron;': '\u02c7', + 'Cayleys;': '\u212d', + 'ccaps;': '\u2a4d', + 'Ccaron;': '\u010c', + 'ccaron;': '\u010d', + 'Ccedil': '\xc7', + 'ccedil': '\xe7', + 'Ccedil;': '\xc7', + 'ccedil;': '\xe7', + 'Ccirc;': '\u0108', + 'ccirc;': '\u0109', + 'Cconint;': '\u2230', + 'ccups;': '\u2a4c', + 'ccupssm;': '\u2a50', + 'Cdot;': '\u010a', + 'cdot;': '\u010b', + 'cedil': '\xb8', + 'cedil;': '\xb8', + 'Cedilla;': '\xb8', + 'cemptyv;': '\u29b2', + 'cent': '\xa2', + 'cent;': '\xa2', + 'CenterDot;': '\xb7', + 'centerdot;': '\xb7', + 'Cfr;': '\u212d', + 'cfr;': '\U0001d520', + 'CHcy;': '\u0427', + 'chcy;': '\u0447', + 'check;': '\u2713', + 'checkmark;': '\u2713', + 'Chi;': '\u03a7', + 'chi;': '\u03c7', + 'cir;': '\u25cb', + 'circ;': '\u02c6', + 'circeq;': '\u2257', + 'circlearrowleft;': '\u21ba', + 'circlearrowright;': '\u21bb', + 'circledast;': '\u229b', + 'circledcirc;': '\u229a', + 'circleddash;': '\u229d', + 'CircleDot;': '\u2299', + 'circledR;': '\xae', + 'circledS;': '\u24c8', + 'CircleMinus;': '\u2296', + 'CirclePlus;': '\u2295', + 'CircleTimes;': '\u2297', + 'cirE;': '\u29c3', + 'cire;': '\u2257', + 'cirfnint;': '\u2a10', + 'cirmid;': '\u2aef', + 'cirscir;': '\u29c2', + 'ClockwiseContourIntegral;': '\u2232', + 'CloseCurlyDoubleQuote;': '\u201d', + 'CloseCurlyQuote;': '\u2019', + 'clubs;': '\u2663', + 'clubsuit;': '\u2663', + 'Colon;': '\u2237', + 'colon;': ':', + 'Colone;': '\u2a74', + 'colone;': '\u2254', + 'coloneq;': '\u2254', + 'comma;': ',', + 'commat;': '@', + 'comp;': '\u2201', + 'compfn;': '\u2218', + 'complement;': '\u2201', + 'complexes;': '\u2102', + 'cong;': '\u2245', + 'congdot;': '\u2a6d', + 'Congruent;': '\u2261', + 'Conint;': '\u222f', + 'conint;': '\u222e', + 'ContourIntegral;': '\u222e', + 'Copf;': '\u2102', + 'copf;': '\U0001d554', + 'coprod;': '\u2210', + 'Coproduct;': '\u2210', + 'COPY': '\xa9', + 'copy': '\xa9', + 'COPY;': '\xa9', + 'copy;': '\xa9', + 'copysr;': '\u2117', + 'CounterClockwiseContourIntegral;': '\u2233', + 'crarr;': '\u21b5', + 'Cross;': '\u2a2f', + 'cross;': '\u2717', + 'Cscr;': '\U0001d49e', + 'cscr;': '\U0001d4b8', + 'csub;': '\u2acf', + 'csube;': '\u2ad1', + 'csup;': '\u2ad0', + 'csupe;': '\u2ad2', + 'ctdot;': '\u22ef', + 'cudarrl;': '\u2938', + 'cudarrr;': '\u2935', + 'cuepr;': '\u22de', + 'cuesc;': '\u22df', + 'cularr;': '\u21b6', + 'cularrp;': '\u293d', + 'Cup;': '\u22d3', + 'cup;': '\u222a', + 'cupbrcap;': '\u2a48', + 'CupCap;': '\u224d', + 'cupcap;': '\u2a46', + 'cupcup;': '\u2a4a', + 'cupdot;': '\u228d', + 'cupor;': '\u2a45', + 'cups;': '\u222a\ufe00', + 'curarr;': '\u21b7', + 'curarrm;': '\u293c', + 'curlyeqprec;': '\u22de', + 'curlyeqsucc;': '\u22df', + 'curlyvee;': '\u22ce', + 'curlywedge;': '\u22cf', + 'curren': '\xa4', + 'curren;': '\xa4', + 'curvearrowleft;': '\u21b6', + 'curvearrowright;': '\u21b7', + 'cuvee;': '\u22ce', + 'cuwed;': '\u22cf', + 'cwconint;': '\u2232', + 'cwint;': '\u2231', + 'cylcty;': '\u232d', + 'Dagger;': '\u2021', + 'dagger;': '\u2020', + 'daleth;': '\u2138', + 'Darr;': '\u21a1', + 'dArr;': '\u21d3', + 'darr;': '\u2193', + 'dash;': '\u2010', + 'Dashv;': '\u2ae4', + 'dashv;': '\u22a3', + 'dbkarow;': '\u290f', + 'dblac;': '\u02dd', + 'Dcaron;': '\u010e', + 'dcaron;': '\u010f', + 'Dcy;': '\u0414', + 'dcy;': '\u0434', + 'DD;': '\u2145', + 'dd;': '\u2146', + 'ddagger;': '\u2021', + 'ddarr;': '\u21ca', + 'DDotrahd;': '\u2911', + 'ddotseq;': '\u2a77', + 'deg': '\xb0', + 'deg;': '\xb0', + 'Del;': '\u2207', + 'Delta;': '\u0394', + 'delta;': '\u03b4', + 'demptyv;': '\u29b1', + 'dfisht;': '\u297f', + 'Dfr;': '\U0001d507', + 'dfr;': '\U0001d521', + 'dHar;': '\u2965', + 'dharl;': '\u21c3', + 'dharr;': '\u21c2', + 'DiacriticalAcute;': '\xb4', + 'DiacriticalDot;': '\u02d9', + 'DiacriticalDoubleAcute;': '\u02dd', + 'DiacriticalGrave;': '`', + 'DiacriticalTilde;': '\u02dc', + 'diam;': '\u22c4', + 'Diamond;': '\u22c4', + 'diamond;': '\u22c4', + 'diamondsuit;': '\u2666', + 'diams;': '\u2666', + 'die;': '\xa8', + 'DifferentialD;': '\u2146', + 'digamma;': '\u03dd', + 'disin;': '\u22f2', + 'div;': '\xf7', + 'divide': '\xf7', + 'divide;': '\xf7', + 'divideontimes;': '\u22c7', + 'divonx;': '\u22c7', + 'DJcy;': '\u0402', + 'djcy;': '\u0452', + 'dlcorn;': '\u231e', + 'dlcrop;': '\u230d', + 'dollar;': '$', + 'Dopf;': '\U0001d53b', + 'dopf;': '\U0001d555', + 'Dot;': '\xa8', + 'dot;': '\u02d9', + 'DotDot;': '\u20dc', + 'doteq;': '\u2250', + 'doteqdot;': '\u2251', + 'DotEqual;': '\u2250', + 'dotminus;': '\u2238', + 'dotplus;': '\u2214', + 'dotsquare;': '\u22a1', + 'doublebarwedge;': '\u2306', + 'DoubleContourIntegral;': '\u222f', + 'DoubleDot;': '\xa8', + 'DoubleDownArrow;': '\u21d3', + 'DoubleLeftArrow;': '\u21d0', + 'DoubleLeftRightArrow;': '\u21d4', + 'DoubleLeftTee;': '\u2ae4', + 'DoubleLongLeftArrow;': '\u27f8', + 'DoubleLongLeftRightArrow;': '\u27fa', + 'DoubleLongRightArrow;': '\u27f9', + 'DoubleRightArrow;': '\u21d2', + 'DoubleRightTee;': '\u22a8', + 'DoubleUpArrow;': '\u21d1', + 'DoubleUpDownArrow;': '\u21d5', + 'DoubleVerticalBar;': '\u2225', + 'DownArrow;': '\u2193', + 'Downarrow;': '\u21d3', + 'downarrow;': '\u2193', + 'DownArrowBar;': '\u2913', + 'DownArrowUpArrow;': '\u21f5', + 'DownBreve;': '\u0311', + 'downdownarrows;': '\u21ca', + 'downharpoonleft;': '\u21c3', + 'downharpoonright;': '\u21c2', + 'DownLeftRightVector;': '\u2950', + 'DownLeftTeeVector;': '\u295e', + 'DownLeftVector;': '\u21bd', + 'DownLeftVectorBar;': '\u2956', + 'DownRightTeeVector;': '\u295f', + 'DownRightVector;': '\u21c1', + 'DownRightVectorBar;': '\u2957', + 'DownTee;': '\u22a4', + 'DownTeeArrow;': '\u21a7', + 'drbkarow;': '\u2910', + 'drcorn;': '\u231f', + 'drcrop;': '\u230c', + 'Dscr;': '\U0001d49f', + 'dscr;': '\U0001d4b9', + 'DScy;': '\u0405', + 'dscy;': '\u0455', + 'dsol;': '\u29f6', + 'Dstrok;': '\u0110', + 'dstrok;': '\u0111', + 'dtdot;': '\u22f1', + 'dtri;': '\u25bf', + 'dtrif;': '\u25be', + 'duarr;': '\u21f5', + 'duhar;': '\u296f', + 'dwangle;': '\u29a6', + 'DZcy;': '\u040f', + 'dzcy;': '\u045f', + 'dzigrarr;': '\u27ff', + 'Eacute': '\xc9', + 'eacute': '\xe9', + 'Eacute;': '\xc9', + 'eacute;': '\xe9', + 'easter;': '\u2a6e', + 'Ecaron;': '\u011a', + 'ecaron;': '\u011b', + 'ecir;': '\u2256', + 'Ecirc': '\xca', + 'ecirc': '\xea', + 'Ecirc;': '\xca', + 'ecirc;': '\xea', + 'ecolon;': '\u2255', + 'Ecy;': '\u042d', + 'ecy;': '\u044d', + 'eDDot;': '\u2a77', + 'Edot;': '\u0116', + 'eDot;': '\u2251', + 'edot;': '\u0117', + 'ee;': '\u2147', + 'efDot;': '\u2252', + 'Efr;': '\U0001d508', + 'efr;': '\U0001d522', + 'eg;': '\u2a9a', + 'Egrave': '\xc8', + 'egrave': '\xe8', + 'Egrave;': '\xc8', + 'egrave;': '\xe8', + 'egs;': '\u2a96', + 'egsdot;': '\u2a98', + 'el;': '\u2a99', + 'Element;': '\u2208', + 'elinters;': '\u23e7', + 'ell;': '\u2113', + 'els;': '\u2a95', + 'elsdot;': '\u2a97', + 'Emacr;': '\u0112', + 'emacr;': '\u0113', + 'empty;': '\u2205', + 'emptyset;': '\u2205', + 'EmptySmallSquare;': '\u25fb', + 'emptyv;': '\u2205', + 'EmptyVerySmallSquare;': '\u25ab', + 'emsp13;': '\u2004', + 'emsp14;': '\u2005', + 'emsp;': '\u2003', + 'ENG;': '\u014a', + 'eng;': '\u014b', + 'ensp;': '\u2002', + 'Eogon;': '\u0118', + 'eogon;': '\u0119', + 'Eopf;': '\U0001d53c', + 'eopf;': '\U0001d556', + 'epar;': '\u22d5', + 'eparsl;': '\u29e3', + 'eplus;': '\u2a71', + 'epsi;': '\u03b5', + 'Epsilon;': '\u0395', + 'epsilon;': '\u03b5', + 'epsiv;': '\u03f5', + 'eqcirc;': '\u2256', + 'eqcolon;': '\u2255', + 'eqsim;': '\u2242', + 'eqslantgtr;': '\u2a96', + 'eqslantless;': '\u2a95', + 'Equal;': '\u2a75', + 'equals;': '=', + 'EqualTilde;': '\u2242', + 'equest;': '\u225f', + 'Equilibrium;': '\u21cc', + 'equiv;': '\u2261', + 'equivDD;': '\u2a78', + 'eqvparsl;': '\u29e5', + 'erarr;': '\u2971', + 'erDot;': '\u2253', + 'Escr;': '\u2130', + 'escr;': '\u212f', + 'esdot;': '\u2250', + 'Esim;': '\u2a73', + 'esim;': '\u2242', + 'Eta;': '\u0397', + 'eta;': '\u03b7', + 'ETH': '\xd0', + 'eth': '\xf0', + 'ETH;': '\xd0', + 'eth;': '\xf0', + 'Euml': '\xcb', + 'euml': '\xeb', + 'Euml;': '\xcb', + 'euml;': '\xeb', + 'euro;': '\u20ac', + 'excl;': '!', + 'exist;': '\u2203', + 'Exists;': '\u2203', + 'expectation;': '\u2130', + 'ExponentialE;': '\u2147', + 'exponentiale;': '\u2147', + 'fallingdotseq;': '\u2252', + 'Fcy;': '\u0424', + 'fcy;': '\u0444', + 'female;': '\u2640', + 'ffilig;': '\ufb03', + 'fflig;': '\ufb00', + 'ffllig;': '\ufb04', + 'Ffr;': '\U0001d509', + 'ffr;': '\U0001d523', + 'filig;': '\ufb01', + 'FilledSmallSquare;': '\u25fc', + 'FilledVerySmallSquare;': '\u25aa', + 'fjlig;': 'fj', + 'flat;': '\u266d', + 'fllig;': '\ufb02', + 'fltns;': '\u25b1', + 'fnof;': '\u0192', + 'Fopf;': '\U0001d53d', + 'fopf;': '\U0001d557', + 'ForAll;': '\u2200', + 'forall;': '\u2200', + 'fork;': '\u22d4', + 'forkv;': '\u2ad9', + 'Fouriertrf;': '\u2131', + 'fpartint;': '\u2a0d', + 'frac12': '\xbd', + 'frac12;': '\xbd', + 'frac13;': '\u2153', + 'frac14': '\xbc', + 'frac14;': '\xbc', + 'frac15;': '\u2155', + 'frac16;': '\u2159', + 'frac18;': '\u215b', + 'frac23;': '\u2154', + 'frac25;': '\u2156', + 'frac34': '\xbe', + 'frac34;': '\xbe', + 'frac35;': '\u2157', + 'frac38;': '\u215c', + 'frac45;': '\u2158', + 'frac56;': '\u215a', + 'frac58;': '\u215d', + 'frac78;': '\u215e', + 'frasl;': '\u2044', + 'frown;': '\u2322', + 'Fscr;': '\u2131', + 'fscr;': '\U0001d4bb', + 'gacute;': '\u01f5', + 'Gamma;': '\u0393', + 'gamma;': '\u03b3', + 'Gammad;': '\u03dc', + 'gammad;': '\u03dd', + 'gap;': '\u2a86', + 'Gbreve;': '\u011e', + 'gbreve;': '\u011f', + 'Gcedil;': '\u0122', + 'Gcirc;': '\u011c', + 'gcirc;': '\u011d', + 'Gcy;': '\u0413', + 'gcy;': '\u0433', + 'Gdot;': '\u0120', + 'gdot;': '\u0121', + 'gE;': '\u2267', + 'ge;': '\u2265', + 'gEl;': '\u2a8c', + 'gel;': '\u22db', + 'geq;': '\u2265', + 'geqq;': '\u2267', + 'geqslant;': '\u2a7e', + 'ges;': '\u2a7e', + 'gescc;': '\u2aa9', + 'gesdot;': '\u2a80', + 'gesdoto;': '\u2a82', + 'gesdotol;': '\u2a84', + 'gesl;': '\u22db\ufe00', + 'gesles;': '\u2a94', + 'Gfr;': '\U0001d50a', + 'gfr;': '\U0001d524', + 'Gg;': '\u22d9', + 'gg;': '\u226b', + 'ggg;': '\u22d9', + 'gimel;': '\u2137', + 'GJcy;': '\u0403', + 'gjcy;': '\u0453', + 'gl;': '\u2277', + 'gla;': '\u2aa5', + 'glE;': '\u2a92', + 'glj;': '\u2aa4', + 'gnap;': '\u2a8a', + 'gnapprox;': '\u2a8a', + 'gnE;': '\u2269', + 'gne;': '\u2a88', + 'gneq;': '\u2a88', + 'gneqq;': '\u2269', + 'gnsim;': '\u22e7', + 'Gopf;': '\U0001d53e', + 'gopf;': '\U0001d558', + 'grave;': '`', + 'GreaterEqual;': '\u2265', + 'GreaterEqualLess;': '\u22db', + 'GreaterFullEqual;': '\u2267', + 'GreaterGreater;': '\u2aa2', + 'GreaterLess;': '\u2277', + 'GreaterSlantEqual;': '\u2a7e', + 'GreaterTilde;': '\u2273', + 'Gscr;': '\U0001d4a2', + 'gscr;': '\u210a', + 'gsim;': '\u2273', + 'gsime;': '\u2a8e', + 'gsiml;': '\u2a90', + 'GT': '>', + 'gt': '>', + 'GT;': '>', + 'Gt;': '\u226b', + 'gt;': '>', + 'gtcc;': '\u2aa7', + 'gtcir;': '\u2a7a', + 'gtdot;': '\u22d7', + 'gtlPar;': '\u2995', + 'gtquest;': '\u2a7c', + 'gtrapprox;': '\u2a86', + 'gtrarr;': '\u2978', + 'gtrdot;': '\u22d7', + 'gtreqless;': '\u22db', + 'gtreqqless;': '\u2a8c', + 'gtrless;': '\u2277', + 'gtrsim;': '\u2273', + 'gvertneqq;': '\u2269\ufe00', + 'gvnE;': '\u2269\ufe00', + 'Hacek;': '\u02c7', + 'hairsp;': '\u200a', + 'half;': '\xbd', + 'hamilt;': '\u210b', + 'HARDcy;': '\u042a', + 'hardcy;': '\u044a', + 'hArr;': '\u21d4', + 'harr;': '\u2194', + 'harrcir;': '\u2948', + 'harrw;': '\u21ad', + 'Hat;': '^', + 'hbar;': '\u210f', + 'Hcirc;': '\u0124', + 'hcirc;': '\u0125', + 'hearts;': '\u2665', + 'heartsuit;': '\u2665', + 'hellip;': '\u2026', + 'hercon;': '\u22b9', + 'Hfr;': '\u210c', + 'hfr;': '\U0001d525', + 'HilbertSpace;': '\u210b', + 'hksearow;': '\u2925', + 'hkswarow;': '\u2926', + 'hoarr;': '\u21ff', + 'homtht;': '\u223b', + 'hookleftarrow;': '\u21a9', + 'hookrightarrow;': '\u21aa', + 'Hopf;': '\u210d', + 'hopf;': '\U0001d559', + 'horbar;': '\u2015', + 'HorizontalLine;': '\u2500', + 'Hscr;': '\u210b', + 'hscr;': '\U0001d4bd', + 'hslash;': '\u210f', + 'Hstrok;': '\u0126', + 'hstrok;': '\u0127', + 'HumpDownHump;': '\u224e', + 'HumpEqual;': '\u224f', + 'hybull;': '\u2043', + 'hyphen;': '\u2010', + 'Iacute': '\xcd', + 'iacute': '\xed', + 'Iacute;': '\xcd', + 'iacute;': '\xed', + 'ic;': '\u2063', + 'Icirc': '\xce', + 'icirc': '\xee', + 'Icirc;': '\xce', + 'icirc;': '\xee', + 'Icy;': '\u0418', + 'icy;': '\u0438', + 'Idot;': '\u0130', + 'IEcy;': '\u0415', + 'iecy;': '\u0435', + 'iexcl': '\xa1', + 'iexcl;': '\xa1', + 'iff;': '\u21d4', + 'Ifr;': '\u2111', + 'ifr;': '\U0001d526', + 'Igrave': '\xcc', + 'igrave': '\xec', + 'Igrave;': '\xcc', + 'igrave;': '\xec', + 'ii;': '\u2148', + 'iiiint;': '\u2a0c', + 'iiint;': '\u222d', + 'iinfin;': '\u29dc', + 'iiota;': '\u2129', + 'IJlig;': '\u0132', + 'ijlig;': '\u0133', + 'Im;': '\u2111', + 'Imacr;': '\u012a', + 'imacr;': '\u012b', + 'image;': '\u2111', + 'ImaginaryI;': '\u2148', + 'imagline;': '\u2110', + 'imagpart;': '\u2111', + 'imath;': '\u0131', + 'imof;': '\u22b7', + 'imped;': '\u01b5', + 'Implies;': '\u21d2', + 'in;': '\u2208', + 'incare;': '\u2105', + 'infin;': '\u221e', + 'infintie;': '\u29dd', + 'inodot;': '\u0131', + 'Int;': '\u222c', + 'int;': '\u222b', + 'intcal;': '\u22ba', + 'integers;': '\u2124', + 'Integral;': '\u222b', + 'intercal;': '\u22ba', + 'Intersection;': '\u22c2', + 'intlarhk;': '\u2a17', + 'intprod;': '\u2a3c', + 'InvisibleComma;': '\u2063', + 'InvisibleTimes;': '\u2062', + 'IOcy;': '\u0401', + 'iocy;': '\u0451', + 'Iogon;': '\u012e', + 'iogon;': '\u012f', + 'Iopf;': '\U0001d540', + 'iopf;': '\U0001d55a', + 'Iota;': '\u0399', + 'iota;': '\u03b9', + 'iprod;': '\u2a3c', + 'iquest': '\xbf', + 'iquest;': '\xbf', + 'Iscr;': '\u2110', + 'iscr;': '\U0001d4be', + 'isin;': '\u2208', + 'isindot;': '\u22f5', + 'isinE;': '\u22f9', + 'isins;': '\u22f4', + 'isinsv;': '\u22f3', + 'isinv;': '\u2208', + 'it;': '\u2062', + 'Itilde;': '\u0128', + 'itilde;': '\u0129', + 'Iukcy;': '\u0406', + 'iukcy;': '\u0456', + 'Iuml': '\xcf', + 'iuml': '\xef', + 'Iuml;': '\xcf', + 'iuml;': '\xef', + 'Jcirc;': '\u0134', + 'jcirc;': '\u0135', + 'Jcy;': '\u0419', + 'jcy;': '\u0439', + 'Jfr;': '\U0001d50d', + 'jfr;': '\U0001d527', + 'jmath;': '\u0237', + 'Jopf;': '\U0001d541', + 'jopf;': '\U0001d55b', + 'Jscr;': '\U0001d4a5', + 'jscr;': '\U0001d4bf', + 'Jsercy;': '\u0408', + 'jsercy;': '\u0458', + 'Jukcy;': '\u0404', + 'jukcy;': '\u0454', + 'Kappa;': '\u039a', + 'kappa;': '\u03ba', + 'kappav;': '\u03f0', + 'Kcedil;': '\u0136', + 'kcedil;': '\u0137', + 'Kcy;': '\u041a', + 'kcy;': '\u043a', + 'Kfr;': '\U0001d50e', + 'kfr;': '\U0001d528', + 'kgreen;': '\u0138', + 'KHcy;': '\u0425', + 'khcy;': '\u0445', + 'KJcy;': '\u040c', + 'kjcy;': '\u045c', + 'Kopf;': '\U0001d542', + 'kopf;': '\U0001d55c', + 'Kscr;': '\U0001d4a6', + 'kscr;': '\U0001d4c0', + 'lAarr;': '\u21da', + 'Lacute;': '\u0139', + 'lacute;': '\u013a', + 'laemptyv;': '\u29b4', + 'lagran;': '\u2112', + 'Lambda;': '\u039b', + 'lambda;': '\u03bb', + 'Lang;': '\u27ea', + 'lang;': '\u27e8', + 'langd;': '\u2991', + 'langle;': '\u27e8', + 'lap;': '\u2a85', + 'Laplacetrf;': '\u2112', + 'laquo': '\xab', + 'laquo;': '\xab', + 'Larr;': '\u219e', + 'lArr;': '\u21d0', + 'larr;': '\u2190', + 'larrb;': '\u21e4', + 'larrbfs;': '\u291f', + 'larrfs;': '\u291d', + 'larrhk;': '\u21a9', + 'larrlp;': '\u21ab', + 'larrpl;': '\u2939', + 'larrsim;': '\u2973', + 'larrtl;': '\u21a2', + 'lat;': '\u2aab', + 'lAtail;': '\u291b', + 'latail;': '\u2919', + 'late;': '\u2aad', + 'lates;': '\u2aad\ufe00', + 'lBarr;': '\u290e', + 'lbarr;': '\u290c', + 'lbbrk;': '\u2772', + 'lbrace;': '{', + 'lbrack;': '[', + 'lbrke;': '\u298b', + 'lbrksld;': '\u298f', + 'lbrkslu;': '\u298d', + 'Lcaron;': '\u013d', + 'lcaron;': '\u013e', + 'Lcedil;': '\u013b', + 'lcedil;': '\u013c', + 'lceil;': '\u2308', + 'lcub;': '{', + 'Lcy;': '\u041b', + 'lcy;': '\u043b', + 'ldca;': '\u2936', + 'ldquo;': '\u201c', + 'ldquor;': '\u201e', + 'ldrdhar;': '\u2967', + 'ldrushar;': '\u294b', + 'ldsh;': '\u21b2', + 'lE;': '\u2266', + 'le;': '\u2264', + 'LeftAngleBracket;': '\u27e8', + 'LeftArrow;': '\u2190', + 'Leftarrow;': '\u21d0', + 'leftarrow;': '\u2190', + 'LeftArrowBar;': '\u21e4', + 'LeftArrowRightArrow;': '\u21c6', + 'leftarrowtail;': '\u21a2', + 'LeftCeiling;': '\u2308', + 'LeftDoubleBracket;': '\u27e6', + 'LeftDownTeeVector;': '\u2961', + 'LeftDownVector;': '\u21c3', + 'LeftDownVectorBar;': '\u2959', + 'LeftFloor;': '\u230a', + 'leftharpoondown;': '\u21bd', + 'leftharpoonup;': '\u21bc', + 'leftleftarrows;': '\u21c7', + 'LeftRightArrow;': '\u2194', + 'Leftrightarrow;': '\u21d4', + 'leftrightarrow;': '\u2194', + 'leftrightarrows;': '\u21c6', + 'leftrightharpoons;': '\u21cb', + 'leftrightsquigarrow;': '\u21ad', + 'LeftRightVector;': '\u294e', + 'LeftTee;': '\u22a3', + 'LeftTeeArrow;': '\u21a4', + 'LeftTeeVector;': '\u295a', + 'leftthreetimes;': '\u22cb', + 'LeftTriangle;': '\u22b2', + 'LeftTriangleBar;': '\u29cf', + 'LeftTriangleEqual;': '\u22b4', + 'LeftUpDownVector;': '\u2951', + 'LeftUpTeeVector;': '\u2960', + 'LeftUpVector;': '\u21bf', + 'LeftUpVectorBar;': '\u2958', + 'LeftVector;': '\u21bc', + 'LeftVectorBar;': '\u2952', + 'lEg;': '\u2a8b', + 'leg;': '\u22da', + 'leq;': '\u2264', + 'leqq;': '\u2266', + 'leqslant;': '\u2a7d', + 'les;': '\u2a7d', + 'lescc;': '\u2aa8', + 'lesdot;': '\u2a7f', + 'lesdoto;': '\u2a81', + 'lesdotor;': '\u2a83', + 'lesg;': '\u22da\ufe00', + 'lesges;': '\u2a93', + 'lessapprox;': '\u2a85', + 'lessdot;': '\u22d6', + 'lesseqgtr;': '\u22da', + 'lesseqqgtr;': '\u2a8b', + 'LessEqualGreater;': '\u22da', + 'LessFullEqual;': '\u2266', + 'LessGreater;': '\u2276', + 'lessgtr;': '\u2276', + 'LessLess;': '\u2aa1', + 'lesssim;': '\u2272', + 'LessSlantEqual;': '\u2a7d', + 'LessTilde;': '\u2272', + 'lfisht;': '\u297c', + 'lfloor;': '\u230a', + 'Lfr;': '\U0001d50f', + 'lfr;': '\U0001d529', + 'lg;': '\u2276', + 'lgE;': '\u2a91', + 'lHar;': '\u2962', + 'lhard;': '\u21bd', + 'lharu;': '\u21bc', + 'lharul;': '\u296a', + 'lhblk;': '\u2584', + 'LJcy;': '\u0409', + 'ljcy;': '\u0459', + 'Ll;': '\u22d8', + 'll;': '\u226a', + 'llarr;': '\u21c7', + 'llcorner;': '\u231e', + 'Lleftarrow;': '\u21da', + 'llhard;': '\u296b', + 'lltri;': '\u25fa', + 'Lmidot;': '\u013f', + 'lmidot;': '\u0140', + 'lmoust;': '\u23b0', + 'lmoustache;': '\u23b0', + 'lnap;': '\u2a89', + 'lnapprox;': '\u2a89', + 'lnE;': '\u2268', + 'lne;': '\u2a87', + 'lneq;': '\u2a87', + 'lneqq;': '\u2268', + 'lnsim;': '\u22e6', + 'loang;': '\u27ec', + 'loarr;': '\u21fd', + 'lobrk;': '\u27e6', + 'LongLeftArrow;': '\u27f5', + 'Longleftarrow;': '\u27f8', + 'longleftarrow;': '\u27f5', + 'LongLeftRightArrow;': '\u27f7', + 'Longleftrightarrow;': '\u27fa', + 'longleftrightarrow;': '\u27f7', + 'longmapsto;': '\u27fc', + 'LongRightArrow;': '\u27f6', + 'Longrightarrow;': '\u27f9', + 'longrightarrow;': '\u27f6', + 'looparrowleft;': '\u21ab', + 'looparrowright;': '\u21ac', + 'lopar;': '\u2985', + 'Lopf;': '\U0001d543', + 'lopf;': '\U0001d55d', + 'loplus;': '\u2a2d', + 'lotimes;': '\u2a34', + 'lowast;': '\u2217', + 'lowbar;': '_', + 'LowerLeftArrow;': '\u2199', + 'LowerRightArrow;': '\u2198', + 'loz;': '\u25ca', + 'lozenge;': '\u25ca', + 'lozf;': '\u29eb', + 'lpar;': '(', + 'lparlt;': '\u2993', + 'lrarr;': '\u21c6', + 'lrcorner;': '\u231f', + 'lrhar;': '\u21cb', + 'lrhard;': '\u296d', + 'lrm;': '\u200e', + 'lrtri;': '\u22bf', + 'lsaquo;': '\u2039', + 'Lscr;': '\u2112', + 'lscr;': '\U0001d4c1', + 'Lsh;': '\u21b0', + 'lsh;': '\u21b0', + 'lsim;': '\u2272', + 'lsime;': '\u2a8d', + 'lsimg;': '\u2a8f', + 'lsqb;': '[', + 'lsquo;': '\u2018', + 'lsquor;': '\u201a', + 'Lstrok;': '\u0141', + 'lstrok;': '\u0142', + 'LT': '<', + 'lt': '<', + 'LT;': '<', + 'Lt;': '\u226a', + 'lt;': '<', + 'ltcc;': '\u2aa6', + 'ltcir;': '\u2a79', + 'ltdot;': '\u22d6', + 'lthree;': '\u22cb', + 'ltimes;': '\u22c9', + 'ltlarr;': '\u2976', + 'ltquest;': '\u2a7b', + 'ltri;': '\u25c3', + 'ltrie;': '\u22b4', + 'ltrif;': '\u25c2', + 'ltrPar;': '\u2996', + 'lurdshar;': '\u294a', + 'luruhar;': '\u2966', + 'lvertneqq;': '\u2268\ufe00', + 'lvnE;': '\u2268\ufe00', + 'macr': '\xaf', + 'macr;': '\xaf', + 'male;': '\u2642', + 'malt;': '\u2720', + 'maltese;': '\u2720', + 'Map;': '\u2905', + 'map;': '\u21a6', + 'mapsto;': '\u21a6', + 'mapstodown;': '\u21a7', + 'mapstoleft;': '\u21a4', + 'mapstoup;': '\u21a5', + 'marker;': '\u25ae', + 'mcomma;': '\u2a29', + 'Mcy;': '\u041c', + 'mcy;': '\u043c', + 'mdash;': '\u2014', + 'mDDot;': '\u223a', + 'measuredangle;': '\u2221', + 'MediumSpace;': '\u205f', + 'Mellintrf;': '\u2133', + 'Mfr;': '\U0001d510', + 'mfr;': '\U0001d52a', + 'mho;': '\u2127', + 'micro': '\xb5', + 'micro;': '\xb5', + 'mid;': '\u2223', + 'midast;': '*', + 'midcir;': '\u2af0', + 'middot': '\xb7', + 'middot;': '\xb7', + 'minus;': '\u2212', + 'minusb;': '\u229f', + 'minusd;': '\u2238', + 'minusdu;': '\u2a2a', + 'MinusPlus;': '\u2213', + 'mlcp;': '\u2adb', + 'mldr;': '\u2026', + 'mnplus;': '\u2213', + 'models;': '\u22a7', + 'Mopf;': '\U0001d544', + 'mopf;': '\U0001d55e', + 'mp;': '\u2213', + 'Mscr;': '\u2133', + 'mscr;': '\U0001d4c2', + 'mstpos;': '\u223e', + 'Mu;': '\u039c', + 'mu;': '\u03bc', + 'multimap;': '\u22b8', + 'mumap;': '\u22b8', + 'nabla;': '\u2207', + 'Nacute;': '\u0143', + 'nacute;': '\u0144', + 'nang;': '\u2220\u20d2', + 'nap;': '\u2249', + 'napE;': '\u2a70\u0338', + 'napid;': '\u224b\u0338', + 'napos;': '\u0149', + 'napprox;': '\u2249', + 'natur;': '\u266e', + 'natural;': '\u266e', + 'naturals;': '\u2115', + 'nbsp': '\xa0', + 'nbsp;': '\xa0', + 'nbump;': '\u224e\u0338', + 'nbumpe;': '\u224f\u0338', + 'ncap;': '\u2a43', + 'Ncaron;': '\u0147', + 'ncaron;': '\u0148', + 'Ncedil;': '\u0145', + 'ncedil;': '\u0146', + 'ncong;': '\u2247', + 'ncongdot;': '\u2a6d\u0338', + 'ncup;': '\u2a42', + 'Ncy;': '\u041d', + 'ncy;': '\u043d', + 'ndash;': '\u2013', + 'ne;': '\u2260', + 'nearhk;': '\u2924', + 'neArr;': '\u21d7', + 'nearr;': '\u2197', + 'nearrow;': '\u2197', + 'nedot;': '\u2250\u0338', + 'NegativeMediumSpace;': '\u200b', + 'NegativeThickSpace;': '\u200b', + 'NegativeThinSpace;': '\u200b', + 'NegativeVeryThinSpace;': '\u200b', + 'nequiv;': '\u2262', + 'nesear;': '\u2928', + 'nesim;': '\u2242\u0338', + 'NestedGreaterGreater;': '\u226b', + 'NestedLessLess;': '\u226a', + 'NewLine;': '\n', + 'nexist;': '\u2204', + 'nexists;': '\u2204', + 'Nfr;': '\U0001d511', + 'nfr;': '\U0001d52b', + 'ngE;': '\u2267\u0338', + 'nge;': '\u2271', + 'ngeq;': '\u2271', + 'ngeqq;': '\u2267\u0338', + 'ngeqslant;': '\u2a7e\u0338', + 'nges;': '\u2a7e\u0338', + 'nGg;': '\u22d9\u0338', + 'ngsim;': '\u2275', + 'nGt;': '\u226b\u20d2', + 'ngt;': '\u226f', + 'ngtr;': '\u226f', + 'nGtv;': '\u226b\u0338', + 'nhArr;': '\u21ce', + 'nharr;': '\u21ae', + 'nhpar;': '\u2af2', + 'ni;': '\u220b', + 'nis;': '\u22fc', + 'nisd;': '\u22fa', + 'niv;': '\u220b', + 'NJcy;': '\u040a', + 'njcy;': '\u045a', + 'nlArr;': '\u21cd', + 'nlarr;': '\u219a', + 'nldr;': '\u2025', + 'nlE;': '\u2266\u0338', + 'nle;': '\u2270', + 'nLeftarrow;': '\u21cd', + 'nleftarrow;': '\u219a', + 'nLeftrightarrow;': '\u21ce', + 'nleftrightarrow;': '\u21ae', + 'nleq;': '\u2270', + 'nleqq;': '\u2266\u0338', + 'nleqslant;': '\u2a7d\u0338', + 'nles;': '\u2a7d\u0338', + 'nless;': '\u226e', + 'nLl;': '\u22d8\u0338', + 'nlsim;': '\u2274', + 'nLt;': '\u226a\u20d2', + 'nlt;': '\u226e', + 'nltri;': '\u22ea', + 'nltrie;': '\u22ec', + 'nLtv;': '\u226a\u0338', + 'nmid;': '\u2224', + 'NoBreak;': '\u2060', + 'NonBreakingSpace;': '\xa0', + 'Nopf;': '\u2115', + 'nopf;': '\U0001d55f', + 'not': '\xac', + 'Not;': '\u2aec', + 'not;': '\xac', + 'NotCongruent;': '\u2262', + 'NotCupCap;': '\u226d', + 'NotDoubleVerticalBar;': '\u2226', + 'NotElement;': '\u2209', + 'NotEqual;': '\u2260', + 'NotEqualTilde;': '\u2242\u0338', + 'NotExists;': '\u2204', + 'NotGreater;': '\u226f', + 'NotGreaterEqual;': '\u2271', + 'NotGreaterFullEqual;': '\u2267\u0338', + 'NotGreaterGreater;': '\u226b\u0338', + 'NotGreaterLess;': '\u2279', + 'NotGreaterSlantEqual;': '\u2a7e\u0338', + 'NotGreaterTilde;': '\u2275', + 'NotHumpDownHump;': '\u224e\u0338', + 'NotHumpEqual;': '\u224f\u0338', + 'notin;': '\u2209', + 'notindot;': '\u22f5\u0338', + 'notinE;': '\u22f9\u0338', + 'notinva;': '\u2209', + 'notinvb;': '\u22f7', + 'notinvc;': '\u22f6', + 'NotLeftTriangle;': '\u22ea', + 'NotLeftTriangleBar;': '\u29cf\u0338', + 'NotLeftTriangleEqual;': '\u22ec', + 'NotLess;': '\u226e', + 'NotLessEqual;': '\u2270', + 'NotLessGreater;': '\u2278', + 'NotLessLess;': '\u226a\u0338', + 'NotLessSlantEqual;': '\u2a7d\u0338', + 'NotLessTilde;': '\u2274', + 'NotNestedGreaterGreater;': '\u2aa2\u0338', + 'NotNestedLessLess;': '\u2aa1\u0338', + 'notni;': '\u220c', + 'notniva;': '\u220c', + 'notnivb;': '\u22fe', + 'notnivc;': '\u22fd', + 'NotPrecedes;': '\u2280', + 'NotPrecedesEqual;': '\u2aaf\u0338', + 'NotPrecedesSlantEqual;': '\u22e0', + 'NotReverseElement;': '\u220c', + 'NotRightTriangle;': '\u22eb', + 'NotRightTriangleBar;': '\u29d0\u0338', + 'NotRightTriangleEqual;': '\u22ed', + 'NotSquareSubset;': '\u228f\u0338', + 'NotSquareSubsetEqual;': '\u22e2', + 'NotSquareSuperset;': '\u2290\u0338', + 'NotSquareSupersetEqual;': '\u22e3', + 'NotSubset;': '\u2282\u20d2', + 'NotSubsetEqual;': '\u2288', + 'NotSucceeds;': '\u2281', + 'NotSucceedsEqual;': '\u2ab0\u0338', + 'NotSucceedsSlantEqual;': '\u22e1', + 'NotSucceedsTilde;': '\u227f\u0338', + 'NotSuperset;': '\u2283\u20d2', + 'NotSupersetEqual;': '\u2289', + 'NotTilde;': '\u2241', + 'NotTildeEqual;': '\u2244', + 'NotTildeFullEqual;': '\u2247', + 'NotTildeTilde;': '\u2249', + 'NotVerticalBar;': '\u2224', + 'npar;': '\u2226', + 'nparallel;': '\u2226', + 'nparsl;': '\u2afd\u20e5', + 'npart;': '\u2202\u0338', + 'npolint;': '\u2a14', + 'npr;': '\u2280', + 'nprcue;': '\u22e0', + 'npre;': '\u2aaf\u0338', + 'nprec;': '\u2280', + 'npreceq;': '\u2aaf\u0338', + 'nrArr;': '\u21cf', + 'nrarr;': '\u219b', + 'nrarrc;': '\u2933\u0338', + 'nrarrw;': '\u219d\u0338', + 'nRightarrow;': '\u21cf', + 'nrightarrow;': '\u219b', + 'nrtri;': '\u22eb', + 'nrtrie;': '\u22ed', + 'nsc;': '\u2281', + 'nsccue;': '\u22e1', + 'nsce;': '\u2ab0\u0338', + 'Nscr;': '\U0001d4a9', + 'nscr;': '\U0001d4c3', + 'nshortmid;': '\u2224', + 'nshortparallel;': '\u2226', + 'nsim;': '\u2241', + 'nsime;': '\u2244', + 'nsimeq;': '\u2244', + 'nsmid;': '\u2224', + 'nspar;': '\u2226', + 'nsqsube;': '\u22e2', + 'nsqsupe;': '\u22e3', + 'nsub;': '\u2284', + 'nsubE;': '\u2ac5\u0338', + 'nsube;': '\u2288', + 'nsubset;': '\u2282\u20d2', + 'nsubseteq;': '\u2288', + 'nsubseteqq;': '\u2ac5\u0338', + 'nsucc;': '\u2281', + 'nsucceq;': '\u2ab0\u0338', + 'nsup;': '\u2285', + 'nsupE;': '\u2ac6\u0338', + 'nsupe;': '\u2289', + 'nsupset;': '\u2283\u20d2', + 'nsupseteq;': '\u2289', + 'nsupseteqq;': '\u2ac6\u0338', + 'ntgl;': '\u2279', + 'Ntilde': '\xd1', + 'ntilde': '\xf1', + 'Ntilde;': '\xd1', + 'ntilde;': '\xf1', + 'ntlg;': '\u2278', + 'ntriangleleft;': '\u22ea', + 'ntrianglelefteq;': '\u22ec', + 'ntriangleright;': '\u22eb', + 'ntrianglerighteq;': '\u22ed', + 'Nu;': '\u039d', + 'nu;': '\u03bd', + 'num;': '#', + 'numero;': '\u2116', + 'numsp;': '\u2007', + 'nvap;': '\u224d\u20d2', + 'nVDash;': '\u22af', + 'nVdash;': '\u22ae', + 'nvDash;': '\u22ad', + 'nvdash;': '\u22ac', + 'nvge;': '\u2265\u20d2', + 'nvgt;': '>\u20d2', + 'nvHarr;': '\u2904', + 'nvinfin;': '\u29de', + 'nvlArr;': '\u2902', + 'nvle;': '\u2264\u20d2', + 'nvlt;': '<\u20d2', + 'nvltrie;': '\u22b4\u20d2', + 'nvrArr;': '\u2903', + 'nvrtrie;': '\u22b5\u20d2', + 'nvsim;': '\u223c\u20d2', + 'nwarhk;': '\u2923', + 'nwArr;': '\u21d6', + 'nwarr;': '\u2196', + 'nwarrow;': '\u2196', + 'nwnear;': '\u2927', + 'Oacute': '\xd3', + 'oacute': '\xf3', + 'Oacute;': '\xd3', + 'oacute;': '\xf3', + 'oast;': '\u229b', + 'ocir;': '\u229a', + 'Ocirc': '\xd4', + 'ocirc': '\xf4', + 'Ocirc;': '\xd4', + 'ocirc;': '\xf4', + 'Ocy;': '\u041e', + 'ocy;': '\u043e', + 'odash;': '\u229d', + 'Odblac;': '\u0150', + 'odblac;': '\u0151', + 'odiv;': '\u2a38', + 'odot;': '\u2299', + 'odsold;': '\u29bc', + 'OElig;': '\u0152', + 'oelig;': '\u0153', + 'ofcir;': '\u29bf', + 'Ofr;': '\U0001d512', + 'ofr;': '\U0001d52c', + 'ogon;': '\u02db', + 'Ograve': '\xd2', + 'ograve': '\xf2', + 'Ograve;': '\xd2', + 'ograve;': '\xf2', + 'ogt;': '\u29c1', + 'ohbar;': '\u29b5', + 'ohm;': '\u03a9', + 'oint;': '\u222e', + 'olarr;': '\u21ba', + 'olcir;': '\u29be', + 'olcross;': '\u29bb', + 'oline;': '\u203e', + 'olt;': '\u29c0', + 'Omacr;': '\u014c', + 'omacr;': '\u014d', + 'Omega;': '\u03a9', + 'omega;': '\u03c9', + 'Omicron;': '\u039f', + 'omicron;': '\u03bf', + 'omid;': '\u29b6', + 'ominus;': '\u2296', + 'Oopf;': '\U0001d546', + 'oopf;': '\U0001d560', + 'opar;': '\u29b7', + 'OpenCurlyDoubleQuote;': '\u201c', + 'OpenCurlyQuote;': '\u2018', + 'operp;': '\u29b9', + 'oplus;': '\u2295', + 'Or;': '\u2a54', + 'or;': '\u2228', + 'orarr;': '\u21bb', + 'ord;': '\u2a5d', + 'order;': '\u2134', + 'orderof;': '\u2134', + 'ordf': '\xaa', + 'ordf;': '\xaa', + 'ordm': '\xba', + 'ordm;': '\xba', + 'origof;': '\u22b6', + 'oror;': '\u2a56', + 'orslope;': '\u2a57', + 'orv;': '\u2a5b', + 'oS;': '\u24c8', + 'Oscr;': '\U0001d4aa', + 'oscr;': '\u2134', + 'Oslash': '\xd8', + 'oslash': '\xf8', + 'Oslash;': '\xd8', + 'oslash;': '\xf8', + 'osol;': '\u2298', + 'Otilde': '\xd5', + 'otilde': '\xf5', + 'Otilde;': '\xd5', + 'otilde;': '\xf5', + 'Otimes;': '\u2a37', + 'otimes;': '\u2297', + 'otimesas;': '\u2a36', + 'Ouml': '\xd6', + 'ouml': '\xf6', + 'Ouml;': '\xd6', + 'ouml;': '\xf6', + 'ovbar;': '\u233d', + 'OverBar;': '\u203e', + 'OverBrace;': '\u23de', + 'OverBracket;': '\u23b4', + 'OverParenthesis;': '\u23dc', + 'par;': '\u2225', + 'para': '\xb6', + 'para;': '\xb6', + 'parallel;': '\u2225', + 'parsim;': '\u2af3', + 'parsl;': '\u2afd', + 'part;': '\u2202', + 'PartialD;': '\u2202', + 'Pcy;': '\u041f', + 'pcy;': '\u043f', + 'percnt;': '%', + 'period;': '.', + 'permil;': '\u2030', + 'perp;': '\u22a5', + 'pertenk;': '\u2031', + 'Pfr;': '\U0001d513', + 'pfr;': '\U0001d52d', + 'Phi;': '\u03a6', + 'phi;': '\u03c6', + 'phiv;': '\u03d5', + 'phmmat;': '\u2133', + 'phone;': '\u260e', + 'Pi;': '\u03a0', + 'pi;': '\u03c0', + 'pitchfork;': '\u22d4', + 'piv;': '\u03d6', + 'planck;': '\u210f', + 'planckh;': '\u210e', + 'plankv;': '\u210f', + 'plus;': '+', + 'plusacir;': '\u2a23', + 'plusb;': '\u229e', + 'pluscir;': '\u2a22', + 'plusdo;': '\u2214', + 'plusdu;': '\u2a25', + 'pluse;': '\u2a72', + 'PlusMinus;': '\xb1', + 'plusmn': '\xb1', + 'plusmn;': '\xb1', + 'plussim;': '\u2a26', + 'plustwo;': '\u2a27', + 'pm;': '\xb1', + 'Poincareplane;': '\u210c', + 'pointint;': '\u2a15', + 'Popf;': '\u2119', + 'popf;': '\U0001d561', + 'pound': '\xa3', + 'pound;': '\xa3', + 'Pr;': '\u2abb', + 'pr;': '\u227a', + 'prap;': '\u2ab7', + 'prcue;': '\u227c', + 'prE;': '\u2ab3', + 'pre;': '\u2aaf', + 'prec;': '\u227a', + 'precapprox;': '\u2ab7', + 'preccurlyeq;': '\u227c', + 'Precedes;': '\u227a', + 'PrecedesEqual;': '\u2aaf', + 'PrecedesSlantEqual;': '\u227c', + 'PrecedesTilde;': '\u227e', + 'preceq;': '\u2aaf', + 'precnapprox;': '\u2ab9', + 'precneqq;': '\u2ab5', + 'precnsim;': '\u22e8', + 'precsim;': '\u227e', + 'Prime;': '\u2033', + 'prime;': '\u2032', + 'primes;': '\u2119', + 'prnap;': '\u2ab9', + 'prnE;': '\u2ab5', + 'prnsim;': '\u22e8', + 'prod;': '\u220f', + 'Product;': '\u220f', + 'profalar;': '\u232e', + 'profline;': '\u2312', + 'profsurf;': '\u2313', + 'prop;': '\u221d', + 'Proportion;': '\u2237', + 'Proportional;': '\u221d', + 'propto;': '\u221d', + 'prsim;': '\u227e', + 'prurel;': '\u22b0', + 'Pscr;': '\U0001d4ab', + 'pscr;': '\U0001d4c5', + 'Psi;': '\u03a8', + 'psi;': '\u03c8', + 'puncsp;': '\u2008', + 'Qfr;': '\U0001d514', + 'qfr;': '\U0001d52e', + 'qint;': '\u2a0c', + 'Qopf;': '\u211a', + 'qopf;': '\U0001d562', + 'qprime;': '\u2057', + 'Qscr;': '\U0001d4ac', + 'qscr;': '\U0001d4c6', + 'quaternions;': '\u210d', + 'quatint;': '\u2a16', + 'quest;': '?', + 'questeq;': '\u225f', + 'QUOT': '"', + 'quot': '"', + 'QUOT;': '"', + 'quot;': '"', + 'rAarr;': '\u21db', + 'race;': '\u223d\u0331', + 'Racute;': '\u0154', + 'racute;': '\u0155', + 'radic;': '\u221a', + 'raemptyv;': '\u29b3', + 'Rang;': '\u27eb', + 'rang;': '\u27e9', + 'rangd;': '\u2992', + 'range;': '\u29a5', + 'rangle;': '\u27e9', + 'raquo': '\xbb', + 'raquo;': '\xbb', + 'Rarr;': '\u21a0', + 'rArr;': '\u21d2', + 'rarr;': '\u2192', + 'rarrap;': '\u2975', + 'rarrb;': '\u21e5', + 'rarrbfs;': '\u2920', + 'rarrc;': '\u2933', + 'rarrfs;': '\u291e', + 'rarrhk;': '\u21aa', + 'rarrlp;': '\u21ac', + 'rarrpl;': '\u2945', + 'rarrsim;': '\u2974', + 'Rarrtl;': '\u2916', + 'rarrtl;': '\u21a3', + 'rarrw;': '\u219d', + 'rAtail;': '\u291c', + 'ratail;': '\u291a', + 'ratio;': '\u2236', + 'rationals;': '\u211a', + 'RBarr;': '\u2910', + 'rBarr;': '\u290f', + 'rbarr;': '\u290d', + 'rbbrk;': '\u2773', + 'rbrace;': '}', + 'rbrack;': ']', + 'rbrke;': '\u298c', + 'rbrksld;': '\u298e', + 'rbrkslu;': '\u2990', + 'Rcaron;': '\u0158', + 'rcaron;': '\u0159', + 'Rcedil;': '\u0156', + 'rcedil;': '\u0157', + 'rceil;': '\u2309', + 'rcub;': '}', + 'Rcy;': '\u0420', + 'rcy;': '\u0440', + 'rdca;': '\u2937', + 'rdldhar;': '\u2969', + 'rdquo;': '\u201d', + 'rdquor;': '\u201d', + 'rdsh;': '\u21b3', + 'Re;': '\u211c', + 'real;': '\u211c', + 'realine;': '\u211b', + 'realpart;': '\u211c', + 'reals;': '\u211d', + 'rect;': '\u25ad', + 'REG': '\xae', + 'reg': '\xae', + 'REG;': '\xae', + 'reg;': '\xae', + 'ReverseElement;': '\u220b', + 'ReverseEquilibrium;': '\u21cb', + 'ReverseUpEquilibrium;': '\u296f', + 'rfisht;': '\u297d', + 'rfloor;': '\u230b', + 'Rfr;': '\u211c', + 'rfr;': '\U0001d52f', + 'rHar;': '\u2964', + 'rhard;': '\u21c1', + 'rharu;': '\u21c0', + 'rharul;': '\u296c', + 'Rho;': '\u03a1', + 'rho;': '\u03c1', + 'rhov;': '\u03f1', + 'RightAngleBracket;': '\u27e9', + 'RightArrow;': '\u2192', + 'Rightarrow;': '\u21d2', + 'rightarrow;': '\u2192', + 'RightArrowBar;': '\u21e5', + 'RightArrowLeftArrow;': '\u21c4', + 'rightarrowtail;': '\u21a3', + 'RightCeiling;': '\u2309', + 'RightDoubleBracket;': '\u27e7', + 'RightDownTeeVector;': '\u295d', + 'RightDownVector;': '\u21c2', + 'RightDownVectorBar;': '\u2955', + 'RightFloor;': '\u230b', + 'rightharpoondown;': '\u21c1', + 'rightharpoonup;': '\u21c0', + 'rightleftarrows;': '\u21c4', + 'rightleftharpoons;': '\u21cc', + 'rightrightarrows;': '\u21c9', + 'rightsquigarrow;': '\u219d', + 'RightTee;': '\u22a2', + 'RightTeeArrow;': '\u21a6', + 'RightTeeVector;': '\u295b', + 'rightthreetimes;': '\u22cc', + 'RightTriangle;': '\u22b3', + 'RightTriangleBar;': '\u29d0', + 'RightTriangleEqual;': '\u22b5', + 'RightUpDownVector;': '\u294f', + 'RightUpTeeVector;': '\u295c', + 'RightUpVector;': '\u21be', + 'RightUpVectorBar;': '\u2954', + 'RightVector;': '\u21c0', + 'RightVectorBar;': '\u2953', + 'ring;': '\u02da', + 'risingdotseq;': '\u2253', + 'rlarr;': '\u21c4', + 'rlhar;': '\u21cc', + 'rlm;': '\u200f', + 'rmoust;': '\u23b1', + 'rmoustache;': '\u23b1', + 'rnmid;': '\u2aee', + 'roang;': '\u27ed', + 'roarr;': '\u21fe', + 'robrk;': '\u27e7', + 'ropar;': '\u2986', + 'Ropf;': '\u211d', + 'ropf;': '\U0001d563', + 'roplus;': '\u2a2e', + 'rotimes;': '\u2a35', + 'RoundImplies;': '\u2970', + 'rpar;': ')', + 'rpargt;': '\u2994', + 'rppolint;': '\u2a12', + 'rrarr;': '\u21c9', + 'Rrightarrow;': '\u21db', + 'rsaquo;': '\u203a', + 'Rscr;': '\u211b', + 'rscr;': '\U0001d4c7', + 'Rsh;': '\u21b1', + 'rsh;': '\u21b1', + 'rsqb;': ']', + 'rsquo;': '\u2019', + 'rsquor;': '\u2019', + 'rthree;': '\u22cc', + 'rtimes;': '\u22ca', + 'rtri;': '\u25b9', + 'rtrie;': '\u22b5', + 'rtrif;': '\u25b8', + 'rtriltri;': '\u29ce', + 'RuleDelayed;': '\u29f4', + 'ruluhar;': '\u2968', + 'rx;': '\u211e', + 'Sacute;': '\u015a', + 'sacute;': '\u015b', + 'sbquo;': '\u201a', + 'Sc;': '\u2abc', + 'sc;': '\u227b', + 'scap;': '\u2ab8', + 'Scaron;': '\u0160', + 'scaron;': '\u0161', + 'sccue;': '\u227d', + 'scE;': '\u2ab4', + 'sce;': '\u2ab0', + 'Scedil;': '\u015e', + 'scedil;': '\u015f', + 'Scirc;': '\u015c', + 'scirc;': '\u015d', + 'scnap;': '\u2aba', + 'scnE;': '\u2ab6', + 'scnsim;': '\u22e9', + 'scpolint;': '\u2a13', + 'scsim;': '\u227f', + 'Scy;': '\u0421', + 'scy;': '\u0441', + 'sdot;': '\u22c5', + 'sdotb;': '\u22a1', + 'sdote;': '\u2a66', + 'searhk;': '\u2925', + 'seArr;': '\u21d8', + 'searr;': '\u2198', + 'searrow;': '\u2198', + 'sect': '\xa7', + 'sect;': '\xa7', + 'semi;': ';', + 'seswar;': '\u2929', + 'setminus;': '\u2216', + 'setmn;': '\u2216', + 'sext;': '\u2736', + 'Sfr;': '\U0001d516', + 'sfr;': '\U0001d530', + 'sfrown;': '\u2322', + 'sharp;': '\u266f', + 'SHCHcy;': '\u0429', + 'shchcy;': '\u0449', + 'SHcy;': '\u0428', + 'shcy;': '\u0448', + 'ShortDownArrow;': '\u2193', + 'ShortLeftArrow;': '\u2190', + 'shortmid;': '\u2223', + 'shortparallel;': '\u2225', + 'ShortRightArrow;': '\u2192', + 'ShortUpArrow;': '\u2191', + 'shy': '\xad', + 'shy;': '\xad', + 'Sigma;': '\u03a3', + 'sigma;': '\u03c3', + 'sigmaf;': '\u03c2', + 'sigmav;': '\u03c2', + 'sim;': '\u223c', + 'simdot;': '\u2a6a', + 'sime;': '\u2243', + 'simeq;': '\u2243', + 'simg;': '\u2a9e', + 'simgE;': '\u2aa0', + 'siml;': '\u2a9d', + 'simlE;': '\u2a9f', + 'simne;': '\u2246', + 'simplus;': '\u2a24', + 'simrarr;': '\u2972', + 'slarr;': '\u2190', + 'SmallCircle;': '\u2218', + 'smallsetminus;': '\u2216', + 'smashp;': '\u2a33', + 'smeparsl;': '\u29e4', + 'smid;': '\u2223', + 'smile;': '\u2323', + 'smt;': '\u2aaa', + 'smte;': '\u2aac', + 'smtes;': '\u2aac\ufe00', + 'SOFTcy;': '\u042c', + 'softcy;': '\u044c', + 'sol;': '/', + 'solb;': '\u29c4', + 'solbar;': '\u233f', + 'Sopf;': '\U0001d54a', + 'sopf;': '\U0001d564', + 'spades;': '\u2660', + 'spadesuit;': '\u2660', + 'spar;': '\u2225', + 'sqcap;': '\u2293', + 'sqcaps;': '\u2293\ufe00', + 'sqcup;': '\u2294', + 'sqcups;': '\u2294\ufe00', + 'Sqrt;': '\u221a', + 'sqsub;': '\u228f', + 'sqsube;': '\u2291', + 'sqsubset;': '\u228f', + 'sqsubseteq;': '\u2291', + 'sqsup;': '\u2290', + 'sqsupe;': '\u2292', + 'sqsupset;': '\u2290', + 'sqsupseteq;': '\u2292', + 'squ;': '\u25a1', + 'Square;': '\u25a1', + 'square;': '\u25a1', + 'SquareIntersection;': '\u2293', + 'SquareSubset;': '\u228f', + 'SquareSubsetEqual;': '\u2291', + 'SquareSuperset;': '\u2290', + 'SquareSupersetEqual;': '\u2292', + 'SquareUnion;': '\u2294', + 'squarf;': '\u25aa', + 'squf;': '\u25aa', + 'srarr;': '\u2192', + 'Sscr;': '\U0001d4ae', + 'sscr;': '\U0001d4c8', + 'ssetmn;': '\u2216', + 'ssmile;': '\u2323', + 'sstarf;': '\u22c6', + 'Star;': '\u22c6', + 'star;': '\u2606', + 'starf;': '\u2605', + 'straightepsilon;': '\u03f5', + 'straightphi;': '\u03d5', + 'strns;': '\xaf', + 'Sub;': '\u22d0', + 'sub;': '\u2282', + 'subdot;': '\u2abd', + 'subE;': '\u2ac5', + 'sube;': '\u2286', + 'subedot;': '\u2ac3', + 'submult;': '\u2ac1', + 'subnE;': '\u2acb', + 'subne;': '\u228a', + 'subplus;': '\u2abf', + 'subrarr;': '\u2979', + 'Subset;': '\u22d0', + 'subset;': '\u2282', + 'subseteq;': '\u2286', + 'subseteqq;': '\u2ac5', + 'SubsetEqual;': '\u2286', + 'subsetneq;': '\u228a', + 'subsetneqq;': '\u2acb', + 'subsim;': '\u2ac7', + 'subsub;': '\u2ad5', + 'subsup;': '\u2ad3', + 'succ;': '\u227b', + 'succapprox;': '\u2ab8', + 'succcurlyeq;': '\u227d', + 'Succeeds;': '\u227b', + 'SucceedsEqual;': '\u2ab0', + 'SucceedsSlantEqual;': '\u227d', + 'SucceedsTilde;': '\u227f', + 'succeq;': '\u2ab0', + 'succnapprox;': '\u2aba', + 'succneqq;': '\u2ab6', + 'succnsim;': '\u22e9', + 'succsim;': '\u227f', + 'SuchThat;': '\u220b', + 'Sum;': '\u2211', + 'sum;': '\u2211', + 'sung;': '\u266a', + 'sup1': '\xb9', + 'sup1;': '\xb9', + 'sup2': '\xb2', + 'sup2;': '\xb2', + 'sup3': '\xb3', + 'sup3;': '\xb3', + 'Sup;': '\u22d1', + 'sup;': '\u2283', + 'supdot;': '\u2abe', + 'supdsub;': '\u2ad8', + 'supE;': '\u2ac6', + 'supe;': '\u2287', + 'supedot;': '\u2ac4', + 'Superset;': '\u2283', + 'SupersetEqual;': '\u2287', + 'suphsol;': '\u27c9', + 'suphsub;': '\u2ad7', + 'suplarr;': '\u297b', + 'supmult;': '\u2ac2', + 'supnE;': '\u2acc', + 'supne;': '\u228b', + 'supplus;': '\u2ac0', + 'Supset;': '\u22d1', + 'supset;': '\u2283', + 'supseteq;': '\u2287', + 'supseteqq;': '\u2ac6', + 'supsetneq;': '\u228b', + 'supsetneqq;': '\u2acc', + 'supsim;': '\u2ac8', + 'supsub;': '\u2ad4', + 'supsup;': '\u2ad6', + 'swarhk;': '\u2926', + 'swArr;': '\u21d9', + 'swarr;': '\u2199', + 'swarrow;': '\u2199', + 'swnwar;': '\u292a', + 'szlig': '\xdf', + 'szlig;': '\xdf', + 'Tab;': '\t', + 'target;': '\u2316', + 'Tau;': '\u03a4', + 'tau;': '\u03c4', + 'tbrk;': '\u23b4', + 'Tcaron;': '\u0164', + 'tcaron;': '\u0165', + 'Tcedil;': '\u0162', + 'tcedil;': '\u0163', + 'Tcy;': '\u0422', + 'tcy;': '\u0442', + 'tdot;': '\u20db', + 'telrec;': '\u2315', + 'Tfr;': '\U0001d517', + 'tfr;': '\U0001d531', + 'there4;': '\u2234', + 'Therefore;': '\u2234', + 'therefore;': '\u2234', + 'Theta;': '\u0398', + 'theta;': '\u03b8', + 'thetasym;': '\u03d1', + 'thetav;': '\u03d1', + 'thickapprox;': '\u2248', + 'thicksim;': '\u223c', + 'ThickSpace;': '\u205f\u200a', + 'thinsp;': '\u2009', + 'ThinSpace;': '\u2009', + 'thkap;': '\u2248', + 'thksim;': '\u223c', + 'THORN': '\xde', + 'thorn': '\xfe', + 'THORN;': '\xde', + 'thorn;': '\xfe', + 'Tilde;': '\u223c', + 'tilde;': '\u02dc', + 'TildeEqual;': '\u2243', + 'TildeFullEqual;': '\u2245', + 'TildeTilde;': '\u2248', + 'times': '\xd7', + 'times;': '\xd7', + 'timesb;': '\u22a0', + 'timesbar;': '\u2a31', + 'timesd;': '\u2a30', + 'tint;': '\u222d', + 'toea;': '\u2928', + 'top;': '\u22a4', + 'topbot;': '\u2336', + 'topcir;': '\u2af1', + 'Topf;': '\U0001d54b', + 'topf;': '\U0001d565', + 'topfork;': '\u2ada', + 'tosa;': '\u2929', + 'tprime;': '\u2034', + 'TRADE;': '\u2122', + 'trade;': '\u2122', + 'triangle;': '\u25b5', + 'triangledown;': '\u25bf', + 'triangleleft;': '\u25c3', + 'trianglelefteq;': '\u22b4', + 'triangleq;': '\u225c', + 'triangleright;': '\u25b9', + 'trianglerighteq;': '\u22b5', + 'tridot;': '\u25ec', + 'trie;': '\u225c', + 'triminus;': '\u2a3a', + 'TripleDot;': '\u20db', + 'triplus;': '\u2a39', + 'trisb;': '\u29cd', + 'tritime;': '\u2a3b', + 'trpezium;': '\u23e2', + 'Tscr;': '\U0001d4af', + 'tscr;': '\U0001d4c9', + 'TScy;': '\u0426', + 'tscy;': '\u0446', + 'TSHcy;': '\u040b', + 'tshcy;': '\u045b', + 'Tstrok;': '\u0166', + 'tstrok;': '\u0167', + 'twixt;': '\u226c', + 'twoheadleftarrow;': '\u219e', + 'twoheadrightarrow;': '\u21a0', + 'Uacute': '\xda', + 'uacute': '\xfa', + 'Uacute;': '\xda', + 'uacute;': '\xfa', + 'Uarr;': '\u219f', + 'uArr;': '\u21d1', + 'uarr;': '\u2191', + 'Uarrocir;': '\u2949', + 'Ubrcy;': '\u040e', + 'ubrcy;': '\u045e', + 'Ubreve;': '\u016c', + 'ubreve;': '\u016d', + 'Ucirc': '\xdb', + 'ucirc': '\xfb', + 'Ucirc;': '\xdb', + 'ucirc;': '\xfb', + 'Ucy;': '\u0423', + 'ucy;': '\u0443', + 'udarr;': '\u21c5', + 'Udblac;': '\u0170', + 'udblac;': '\u0171', + 'udhar;': '\u296e', + 'ufisht;': '\u297e', + 'Ufr;': '\U0001d518', + 'ufr;': '\U0001d532', + 'Ugrave': '\xd9', + 'ugrave': '\xf9', + 'Ugrave;': '\xd9', + 'ugrave;': '\xf9', + 'uHar;': '\u2963', + 'uharl;': '\u21bf', + 'uharr;': '\u21be', + 'uhblk;': '\u2580', + 'ulcorn;': '\u231c', + 'ulcorner;': '\u231c', + 'ulcrop;': '\u230f', + 'ultri;': '\u25f8', + 'Umacr;': '\u016a', + 'umacr;': '\u016b', + 'uml': '\xa8', + 'uml;': '\xa8', + 'UnderBar;': '_', + 'UnderBrace;': '\u23df', + 'UnderBracket;': '\u23b5', + 'UnderParenthesis;': '\u23dd', + 'Union;': '\u22c3', + 'UnionPlus;': '\u228e', + 'Uogon;': '\u0172', + 'uogon;': '\u0173', + 'Uopf;': '\U0001d54c', + 'uopf;': '\U0001d566', + 'UpArrow;': '\u2191', + 'Uparrow;': '\u21d1', + 'uparrow;': '\u2191', + 'UpArrowBar;': '\u2912', + 'UpArrowDownArrow;': '\u21c5', + 'UpDownArrow;': '\u2195', + 'Updownarrow;': '\u21d5', + 'updownarrow;': '\u2195', + 'UpEquilibrium;': '\u296e', + 'upharpoonleft;': '\u21bf', + 'upharpoonright;': '\u21be', + 'uplus;': '\u228e', + 'UpperLeftArrow;': '\u2196', + 'UpperRightArrow;': '\u2197', + 'Upsi;': '\u03d2', + 'upsi;': '\u03c5', + 'upsih;': '\u03d2', + 'Upsilon;': '\u03a5', + 'upsilon;': '\u03c5', + 'UpTee;': '\u22a5', + 'UpTeeArrow;': '\u21a5', + 'upuparrows;': '\u21c8', + 'urcorn;': '\u231d', + 'urcorner;': '\u231d', + 'urcrop;': '\u230e', + 'Uring;': '\u016e', + 'uring;': '\u016f', + 'urtri;': '\u25f9', + 'Uscr;': '\U0001d4b0', + 'uscr;': '\U0001d4ca', + 'utdot;': '\u22f0', + 'Utilde;': '\u0168', + 'utilde;': '\u0169', + 'utri;': '\u25b5', + 'utrif;': '\u25b4', + 'uuarr;': '\u21c8', + 'Uuml': '\xdc', + 'uuml': '\xfc', + 'Uuml;': '\xdc', + 'uuml;': '\xfc', + 'uwangle;': '\u29a7', + 'vangrt;': '\u299c', + 'varepsilon;': '\u03f5', + 'varkappa;': '\u03f0', + 'varnothing;': '\u2205', + 'varphi;': '\u03d5', + 'varpi;': '\u03d6', + 'varpropto;': '\u221d', + 'vArr;': '\u21d5', + 'varr;': '\u2195', + 'varrho;': '\u03f1', + 'varsigma;': '\u03c2', + 'varsubsetneq;': '\u228a\ufe00', + 'varsubsetneqq;': '\u2acb\ufe00', + 'varsupsetneq;': '\u228b\ufe00', + 'varsupsetneqq;': '\u2acc\ufe00', + 'vartheta;': '\u03d1', + 'vartriangleleft;': '\u22b2', + 'vartriangleright;': '\u22b3', + 'Vbar;': '\u2aeb', + 'vBar;': '\u2ae8', + 'vBarv;': '\u2ae9', + 'Vcy;': '\u0412', + 'vcy;': '\u0432', + 'VDash;': '\u22ab', + 'Vdash;': '\u22a9', + 'vDash;': '\u22a8', + 'vdash;': '\u22a2', + 'Vdashl;': '\u2ae6', + 'Vee;': '\u22c1', + 'vee;': '\u2228', + 'veebar;': '\u22bb', + 'veeeq;': '\u225a', + 'vellip;': '\u22ee', + 'Verbar;': '\u2016', + 'verbar;': '|', + 'Vert;': '\u2016', + 'vert;': '|', + 'VerticalBar;': '\u2223', + 'VerticalLine;': '|', + 'VerticalSeparator;': '\u2758', + 'VerticalTilde;': '\u2240', + 'VeryThinSpace;': '\u200a', + 'Vfr;': '\U0001d519', + 'vfr;': '\U0001d533', + 'vltri;': '\u22b2', + 'vnsub;': '\u2282\u20d2', + 'vnsup;': '\u2283\u20d2', + 'Vopf;': '\U0001d54d', + 'vopf;': '\U0001d567', + 'vprop;': '\u221d', + 'vrtri;': '\u22b3', + 'Vscr;': '\U0001d4b1', + 'vscr;': '\U0001d4cb', + 'vsubnE;': '\u2acb\ufe00', + 'vsubne;': '\u228a\ufe00', + 'vsupnE;': '\u2acc\ufe00', + 'vsupne;': '\u228b\ufe00', + 'Vvdash;': '\u22aa', + 'vzigzag;': '\u299a', + 'Wcirc;': '\u0174', + 'wcirc;': '\u0175', + 'wedbar;': '\u2a5f', + 'Wedge;': '\u22c0', + 'wedge;': '\u2227', + 'wedgeq;': '\u2259', + 'weierp;': '\u2118', + 'Wfr;': '\U0001d51a', + 'wfr;': '\U0001d534', + 'Wopf;': '\U0001d54e', + 'wopf;': '\U0001d568', + 'wp;': '\u2118', + 'wr;': '\u2240', + 'wreath;': '\u2240', + 'Wscr;': '\U0001d4b2', + 'wscr;': '\U0001d4cc', + 'xcap;': '\u22c2', + 'xcirc;': '\u25ef', + 'xcup;': '\u22c3', + 'xdtri;': '\u25bd', + 'Xfr;': '\U0001d51b', + 'xfr;': '\U0001d535', + 'xhArr;': '\u27fa', + 'xharr;': '\u27f7', + 'Xi;': '\u039e', + 'xi;': '\u03be', + 'xlArr;': '\u27f8', + 'xlarr;': '\u27f5', + 'xmap;': '\u27fc', + 'xnis;': '\u22fb', + 'xodot;': '\u2a00', + 'Xopf;': '\U0001d54f', + 'xopf;': '\U0001d569', + 'xoplus;': '\u2a01', + 'xotime;': '\u2a02', + 'xrArr;': '\u27f9', + 'xrarr;': '\u27f6', + 'Xscr;': '\U0001d4b3', + 'xscr;': '\U0001d4cd', + 'xsqcup;': '\u2a06', + 'xuplus;': '\u2a04', + 'xutri;': '\u25b3', + 'xvee;': '\u22c1', + 'xwedge;': '\u22c0', + 'Yacute': '\xdd', + 'yacute': '\xfd', + 'Yacute;': '\xdd', + 'yacute;': '\xfd', + 'YAcy;': '\u042f', + 'yacy;': '\u044f', + 'Ycirc;': '\u0176', + 'ycirc;': '\u0177', + 'Ycy;': '\u042b', + 'ycy;': '\u044b', + 'yen': '\xa5', + 'yen;': '\xa5', + 'Yfr;': '\U0001d51c', + 'yfr;': '\U0001d536', + 'YIcy;': '\u0407', + 'yicy;': '\u0457', + 'Yopf;': '\U0001d550', + 'yopf;': '\U0001d56a', + 'Yscr;': '\U0001d4b4', + 'yscr;': '\U0001d4ce', + 'YUcy;': '\u042e', + 'yucy;': '\u044e', + 'yuml': '\xff', + 'Yuml;': '\u0178', + 'yuml;': '\xff', + 'Zacute;': '\u0179', + 'zacute;': '\u017a', + 'Zcaron;': '\u017d', + 'zcaron;': '\u017e', + 'Zcy;': '\u0417', + 'zcy;': '\u0437', + 'Zdot;': '\u017b', + 'zdot;': '\u017c', + 'zeetrf;': '\u2128', + 'ZeroWidthSpace;': '\u200b', + 'Zeta;': '\u0396', + 'zeta;': '\u03b6', + 'Zfr;': '\u2128', + 'zfr;': '\U0001d537', + 'ZHcy;': '\u0416', + 'zhcy;': '\u0436', + 'zigrarr;': '\u21dd', + 'Zopf;': '\u2124', + 'zopf;': '\U0001d56b', + 'Zscr;': '\U0001d4b5', + 'zscr;': '\U0001d4cf', + 'zwj;': '\u200d', + 'zwnj;': '\u200c', + } + try: import http.client as compat_http_client except ImportError: # Python 2 @@ -83,7 +2321,6 @@ try: except ImportError: # Python 2 from HTMLParser import HTMLParser as compat_HTMLParser - try: from subprocess import DEVNULL compat_subprocess_get_DEVNULL = lambda: DEVNULL @@ -626,6 +2863,7 @@ __all__ = [ 'compat_getenv', 'compat_getpass', 'compat_html_entities', + 'compat_html_entities_html5', 'compat_http_client', 'compat_http_server', 'compat_input', From 55b2f099c0c820d6c4b46609b175a44a6d7f97bf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 15:11:55 +0800 Subject: [PATCH 46/92] [utils] Decode HTML5 entities Used in test_Vporn_1. Also related to #9270 --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index feef80465..0e25de6b7 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -249,6 +249,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescapeHTML('é'), 'é') self.assertEqual(unescapeHTML('�'), '�') + # HTML5 entities + self.assertEqual(unescapeHTML('.''), '.\'') def test_date_from_str(self): self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day')) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 229de4b39..f77ab8650 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -39,6 +39,7 @@ from .compat import ( compat_chr, compat_etree_fromstring, compat_html_entities, + compat_html_entities_html5, compat_http_client, compat_kwargs, compat_parse_qs, @@ -456,12 +457,19 @@ def orderedSet(iterable): return res -def _htmlentity_transform(entity): +def _htmlentity_transform(entity_with_semicolon): """Transforms an HTML entity to a character.""" + entity = entity_with_semicolon[:-1] + # Known non-numeric HTML entity if entity in compat_html_entities.name2codepoint: return compat_chr(compat_html_entities.name2codepoint[entity]) + # TODO: HTML5 allows entities without a semicolon. For example, + # 'Éric' should be decoded as 'Éric'. + if entity_with_semicolon in compat_html_entities_html5: + return compat_html_entities_html5[entity_with_semicolon] + mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity) if mobj is not None: numstr = mobj.group(1) @@ -486,7 +494,7 @@ def unescapeHTML(s): assert type(s) == compat_str return re.sub( - r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s) + r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) def get_subprocess_encoding(): From a2252385308898074f5006ed737aeb98bb8b0402 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 15:12:53 +0800 Subject: [PATCH 47/92] [vporn] Improve error detection and update _TESTS --- youtube_dl/extractor/vporn.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py index 92c90e517..1557a0e04 100644 --- a/youtube_dl/extractor/vporn.py +++ b/youtube_dl/extractor/vporn.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, parse_duration, str_to_int, ) @@ -27,7 +28,8 @@ class VpornIE(InfoExtractor): 'duration': 393, 'age_limit': 18, 'view_count': int, - } + }, + 'skip': 'video removed', }, { 'url': 'http://www.vporn.com/female/hana-shower/523564/', @@ -40,7 +42,7 @@ class VpornIE(InfoExtractor): 'description': 'Hana showers at the bathroom.', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': 'Hmmmmm', - 'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'], + 'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female', '720p'], 'duration': 588, 'age_limit': 18, 'view_count': int, @@ -55,6 +57,10 @@ class VpornIE(InfoExtractor): webpage = self._download_webpage(url, display_id) + errmsg = 'This video has been deleted due to Copyright Infringement or by the account owner!' + if errmsg in webpage: + raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) + title = self._html_search_regex( r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip() description = self._html_search_regex( From c16f8a4659566fd7421226b0d5ddb871425b392b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:04:28 +0800 Subject: [PATCH 48/92] [voicerepublic] Force video_id to be strings Related: be6217b26142491232fb697b125015d45437832d --- youtube_dl/extractor/voicerepublic.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py index 93d15a556..4f1a99a89 100644 --- a/youtube_dl/extractor/voicerepublic.py +++ b/youtube_dl/extractor/voicerepublic.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( ExtractorError, determine_ext, @@ -16,13 +19,13 @@ class VoiceRepublicIE(InfoExtractor): _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)' _TESTS = [{ 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', - 'md5': '0554a24d1657915aa8e8f84e15dc9353', + 'md5': 'b9174d651323f17783000876347116e3', 'info_dict': { 'id': '2296', 'display_id': 'watching-the-watchers-building-a-sousveillance-state', 'ext': 'm4a', 'title': 'Watching the Watchers: Building a Sousveillance State', - 'description': 'md5:715ba964958afa2398df615809cfecb1', + 'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.', 'thumbnail': 're:^https?://.*\.(?:png|jpg)$', 'duration': 1800, 'view_count': int, @@ -52,7 +55,7 @@ class VoiceRepublicIE(InfoExtractor): if data: title = data['title'] description = data.get('teaser') - talk_id = data.get('talk_id') or display_id + talk_id = compat_str(data.get('talk_id') or display_id) talk = data['talk'] duration = int_or_none(talk.get('duration')) formats = [{ From 09728d5fbc93c769b3f8971c06e9ed0bfb168b37 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:11:28 +0800 Subject: [PATCH 49/92] [audiomack:album] Force video_id to be strings Related: be6217b26142491232fb697b125015d45437832d --- youtube_dl/extractor/audiomack.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index a52d26cec..f3bd4d444 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -6,6 +6,7 @@ import time from .common import InfoExtractor from .soundcloud import SoundcloudIE +from ..compat import compat_str from ..utils import ( ExtractorError, url_basename, @@ -136,7 +137,7 @@ class AudiomackAlbumIE(InfoExtractor): result[resultkey] = api_response[apikey] song_id = url_basename(api_response['url']).rpartition('.')[0] result['entries'].append({ - 'id': api_response.get('id', song_id), + 'id': compat_str(api_response.get('id', song_id)), 'uploader': api_response.get('artist'), 'title': api_response.get('title', song_id), 'url': api_response['url'], From daa0df9e8beac1325e5fb55d828e7a3a38e74bf6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:37:12 +0800 Subject: [PATCH 50/92] [youtube:user] Support another URL form Such an URL comes from http://www.gametrailers.com/. This is originally a test case in GenericIE, but now seems all GameTrailers videos are on YouTube. --- youtube_dl/extractor/youtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6c9f77d95..00dd602ff 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1988,7 +1988,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): class YoutubeUserIE(YoutubeChannelIE): IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' - _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)' + _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/|c/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)' _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos' IE_NAME = 'youtube:user' @@ -2001,6 +2001,9 @@ class YoutubeUserIE(YoutubeChannelIE): }, { 'url': 'ytuser:phihag', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/c/gametrailers', + 'only_matching': True, }] @classmethod From 1fa309da40bfc5e7e72639e80cf6556b3839fc81 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:39:31 +0800 Subject: [PATCH 51/92] [generic] Update test_Generic_40 The original link now redirects to an YouTube user channel. --- youtube_dl/extractor/generic.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 798c109c6..ef18ce3dc 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -627,13 +627,13 @@ class GenericIE(InfoExtractor): }, # MTVSercices embed { - 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too', - 'md5': '35727f82f58c76d996fc188f9755b0d5', + 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html', + 'md5': 'ca1aef97695ef2c1d6973256a57e5252', 'info_dict': { - 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9', + 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1', 'ext': 'mp4', - 'title': 'Review', - 'description': 'Mario\'s life in the fast lane has never looked so good.', + 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored', + 'description': 'Two valets share their love for movie star Liam Neesons.', }, }, # YouTube embed via <data-embed-url=""> From 6c0376fe4f16f53fd87f5e6a56531fc153922980 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:53:40 +0800 Subject: [PATCH 52/92] [dw] Skip an invalid test DW documentaries only last for one or two weeks. See #9475 --- youtube_dl/extractor/dw.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/dw.py b/youtube_dl/extractor/dw.py index 0f0f0b8d3..d740652f1 100644 --- a/youtube_dl/extractor/dw.py +++ b/youtube_dl/extractor/dw.py @@ -35,6 +35,7 @@ class DWIE(InfoExtractor): 'upload_date': '20160311', } }, { + # DW documentaries, only last for one or two weeks 'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798', 'md5': '56b6214ef463bfb9a3b71aeb886f3cf1', 'info_dict': { @@ -44,6 +45,7 @@ class DWIE(InfoExtractor): 'description': 'Welcome to the 90s - The Golden Decade of Hip Hop', 'upload_date': '20160521', }, + 'skip': 'Video removed', }] def _real_extract(self, url): From 836ab0c554f13751adff02d3987f6f3f79e2db09 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 18:12:57 +0800 Subject: [PATCH 53/92] [compat] Import html5 entities correctly --- youtube_dl/compat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 0243949a4..67db1c7c6 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -64,8 +64,8 @@ except ImportError: # Python 2 import htmlentitydefs as compat_html_entities try: # Python >= 3.3 - from compat_html_entities import html as compat_html_entities_html5 -except ImportError: + compat_html_entities_html5 = compat_html_entities.html5 +except AttributeError: # Copied from CPython 3.5.1 html/entities.py compat_html_entities_html5 = { 'Aacute': '\xc1', From bdf16f81403c036a0f40d10a136a46aa7d2f6f0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 10 Jun 2016 22:40:18 +0700 Subject: [PATCH 54/92] [lynda] Add support for new authentication (Closes #9740) --- youtube_dl/extractor/lynda.py | 115 ++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 55 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 86d47266f..c2678652e 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -1,84 +1,89 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_HTTPError, + compat_str, + compat_urlparse, +) from ..utils import ( ExtractorError, - clean_html, int_or_none, - sanitized_Request, urlencode_postdata, ) class LyndaBaseIE(InfoExtractor): - _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' + _SIGNIN_URL = 'https://www.lynda.com/signin' + _PASSWORD_URL = 'https://www.lynda.com/signin/password' + _USER_URL = 'https://www.lynda.com/signin/user' _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' _NETRC_MACHINE = 'lynda' def _real_initialize(self): self._login() + @staticmethod + def _check_error(json_string, key_or_keys): + keys = [key_or_keys] if isinstance(key_or_keys, compat_str) else key_or_keys + for key in keys: + error = json_string.get(key) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + + def _login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url): + action_url = self._search_regex( + r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_html, + 'post url', default=fallback_action_url, group='url') + + if not action_url.startswith('http'): + action_url = compat_urlparse.urljoin(self._SIGNIN_URL, action_url) + + form_data = self._hidden_inputs(form_html) + form_data.update(extra_form_data) + + try: + response = self._download_json( + action_url, None, note, + data=urlencode_postdata(form_data), + headers={ + 'Referer': referrer_url, + 'X-Requested-With': 'XMLHttpRequest', + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500: + response = self._parse_json(e.cause.read().decode('utf-8'), None) + self._check_error(response, ('email', 'password')) + raise + + self._check_error(response, 'ErrorMessage') + + return response, action_url + def _login(self): username, password = self._get_login_info() if username is None: return - login_form = { - 'username': username, - 'password': password, - 'remember': 'false', - 'stayPut': 'false' - } - request = sanitized_Request( - self._LOGIN_URL, urlencode_postdata(login_form)) - login_page = self._download_webpage( - request, None, 'Logging in as %s' % username) + # Step 1: download signin page + signin_page = self._download_webpage( + self._SIGNIN_URL, None, 'Downloading signin page') - # Not (yet) logged in - m = re.search(r'loginResultJson\s*=\s*\'(?P<json>[^\']+)\';', login_page) - if m is not None: - response = m.group('json') - response_json = json.loads(response) - state = response_json['state'] + # Step 2: submit email + signin_form = self._search_regex( + r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)', + signin_page, 'signin form') + signin_page, signin_url = self._login_step( + signin_form, self._PASSWORD_URL, {'email': username}, + 'Submitting email', self._SIGNIN_URL) - if state == 'notlogged': - raise ExtractorError( - 'Unable to login, incorrect username and/or password', - expected=True) - - # This is when we get popup: - # > You're already logged in to lynda.com on two devices. - # > If you log in here, we'll log you out of another device. - # So, we need to confirm this. - if state == 'conflicted': - confirm_form = { - 'username': '', - 'password': '', - 'resolve': 'true', - 'remember': 'false', - 'stayPut': 'false', - } - request = sanitized_Request( - self._LOGIN_URL, urlencode_postdata(confirm_form)) - login_page = self._download_webpage( - request, None, - 'Confirming log in and log out from another device') - - if all(not re.search(p, login_page) for p in ('isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')): - if 'login error' in login_page: - mobj = re.search( - r'(?s)<h1[^>]+class="topmost">(?P<title>[^<]+)</h1>\s*<div>(?P<description>.+?)</div>', - login_page) - if mobj: - raise ExtractorError( - 'lynda returned error: %s - %s' - % (mobj.group('title'), clean_html(mobj.group('description'))), - expected=True) - raise ExtractorError('Unable to log in') + # Step 3: submit password + password_form = signin_page['body'] + self._login_step( + password_form, self._USER_URL, {'email': username, 'password': password}, + 'Submitting password', signin_url) def _logout(self): username, _ = self._get_login_info() From 3841256c2c5fd35229cd8f2c2c8a8e2401f7016b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 10 Jun 2016 23:01:52 +0700 Subject: [PATCH 55/92] [lynda] Skip login if already logged in --- youtube_dl/extractor/lynda.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index c2678652e..7610985b4 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -71,6 +71,11 @@ class LyndaBaseIE(InfoExtractor): signin_page = self._download_webpage( self._SIGNIN_URL, None, 'Downloading signin page') + # Already logged in + if any(re.search(p, signin_page) for p in ( + 'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')): + return + # Step 2: submit email signin_form = self._search_regex( r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)', @@ -85,15 +90,6 @@ class LyndaBaseIE(InfoExtractor): password_form, self._USER_URL, {'email': username, 'password': password}, 'Submitting password', signin_url) - def _logout(self): - username, _ = self._get_login_info() - if username is None: - return - - self._download_webpage( - 'http://www.lynda.com/ajax/logout.aspx', None, - 'Logging out', 'Unable to log out', fatal=False) - class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' @@ -217,8 +213,6 @@ class LyndaCourseIE(LyndaBaseIE): 'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, course_id, 'Downloading course JSON') - self._logout() - if course.get('Status') == 'NotFound': raise ExtractorError( 'Course %s does not exist' % course_id, expected=True) From 0434358823a9b7da7656f3e6d8de28d1b42036f5 Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Fri, 10 Jun 2016 19:17:58 +0200 Subject: [PATCH 56/92] [Lynda] Extract course description --- youtube_dl/extractor/lynda.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 86d47266f..c1bca5678 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -246,5 +246,6 @@ class LyndaCourseIE(LyndaBaseIE): % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT) course_title = course.get('Title') + course_description = course.get('Description') - return self.playlist_result(entries, course_id, course_title) + return self.playlist_result(entries, course_id, course_title, course_description) From d845622b2e09ebac28e21f76f6d5c2795aa9bb50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 02:41:48 +0700 Subject: [PATCH 57/92] release 2016.06.11 --- .github/ISSUE_TEMPLATE.md | 6 +++--- README.md | 2 +- docs/supportedsites.md | 9 +++++---- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e593ee78a..16ef23066 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.03** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.03 +[debug] youtube-dl version 2016.06.11 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/README.md b/README.md index 205c485d0..2ea8acb30 100644 --- a/README.md +++ b/README.md @@ -255,7 +255,7 @@ which means you can modify it, redistribute it or use it however you like. --write-info-json Write video metadata to a .info.json file --write-annotations Write video annotations to a .annotations.xml file - --load-info FILE JSON file containing the video information + --load-info-json FILE JSON file containing the video information (created with the "--write-info-json" option) --cookies FILE File to read cookies from and dump cookie diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 619bd0825..f89c2d1f2 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -28,6 +28,7 @@ - **AdobeTVVideo** - **AdultSwim** - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network + - **AfreecaTV**: afreecatv.com - **Aftonbladet** - **AirMozilla** - **AlJazeera** @@ -43,8 +44,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek**: Saarländischer Rundfunk - **ARD:mediathek** + - **ARD:mediathek**: Saarländischer Rundfunk - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -253,6 +254,7 @@ - **Globo** - **GloboArticle** - **GodTube** + - **GodTV** - **GoldenMoustache** - **Golem** - **GoogleDrive** @@ -738,6 +740,7 @@ - **VideoPremium** - **VideoTt**: video.tt - Your True Tube (Currently broken) - **videoweed**: VideoWeed + - **Vidio** - **vidme** - **vidme:user** - **vidme:user:likes** @@ -773,7 +776,6 @@ - **VRT** - **vube**: Vube.com - **VuClip** - - **vulture.com** - **Walla** - **washingtonpost** - **washingtonpost:article** @@ -781,10 +783,8 @@ - **WatchIndianPorn**: Watch Indian Porn - **WDR** - **wdr:mobile** - - **WDRMaus**: Sendung mit der Maus - **WebOfStories** - **WebOfStoriesPlaylist** - - **Weibo** - **WeiqiTV**: WQTV - **wholecloud**: WholeCloud - **Wimp** @@ -820,6 +820,7 @@ - **Ynet** - **YouJizz** - **youku**: 优酷 + - **youku:show** - **YouPorn** - **YourUpload** - **youtube**: YouTube.com diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d24d06f4a..dafb6513a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.03' +__version__ = '2016.06.11' From 6626c214e1e0fa422d68b875cbb69dfb5aad8745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 03:00:08 +0700 Subject: [PATCH 58/92] release 2016.06.11.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 16ef23066..564cffae7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.11 +[debug] youtube-dl version 2016.06.11.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index dafb6513a..5bcb6a7b3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.11' +__version__ = '2016.06.11.1' From 9ddc289f88542f4b0bf7ad5e9c725caf8889f71b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 04:59:47 +0700 Subject: [PATCH 59/92] [README.md] Document missing playlist fields in output template --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 2ea8acb30..3ff33c156 100644 --- a/README.md +++ b/README.md @@ -511,6 +511,9 @@ The basic usage is not to set any template arguments when downloading a single f - `autonumber`: Five-digit number that will be increased with each download, starting at zero - `playlist`: Name or id of the playlist that contains the video - `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist + - `playlist_id`: Playlist identifier + - `playlist_title`: Playlist title + Available for the video that belongs to some logical chapter or section: - `chapter`: Name or title of the chapter the video belongs to From 62666af99fb55e3ba535ce630e8ce0aed1b5b0e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:13:05 +0700 Subject: [PATCH 60/92] [indavideo] Fix formats' height (Closes #9744) --- youtube_dl/extractor/indavideo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py index 9622f198a..c6f080484 100644 --- a/youtube_dl/extractor/indavideo.py +++ b/youtube_dl/extractor/indavideo.py @@ -60,7 +60,8 @@ class IndavideoEmbedIE(InfoExtractor): formats = [{ 'url': video_url, - 'height': self._search_regex(r'\.(\d{3,4})\.mp4$', video_url, 'height', default=None), + 'height': int_or_none(self._search_regex( + r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)), } for video_url in video_urls] self._sort_formats(formats) From 4cad2929cd7e90be174ae6b0ad0c7d9f47795374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:30:44 +0700 Subject: [PATCH 61/92] [limelight] Fix _VALID_URLs --- youtube_dl/extractor/limelight.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 2599d45c3..8dbc940a7 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -123,7 +123,7 @@ class LimelightBaseIE(InfoExtractor): class LimelightMediaIE(LimelightBaseIE): IE_NAME = 'limelight' - _VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})' + _VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bmediaId=)(?P<id>[a-z0-9]{32})' _TESTS = [{ 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', 'info_dict': { @@ -176,7 +176,7 @@ class LimelightMediaIE(LimelightBaseIE): class LimelightChannelIE(LimelightBaseIE): IE_NAME = 'limelight:channel' - _VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})' + _VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelId=)(?P<id>[a-z0-9]{32})' _TEST = { 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082', 'info_dict': { From 79027c0ea02d4f296aefe6ca6e5af393c2a4a209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:40:02 +0700 Subject: [PATCH 62/92] [limelight] Improve _VALID_URLs --- youtube_dl/extractor/limelight.py | 56 +++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 8dbc940a7..da5d198b9 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -123,7 +123,18 @@ class LimelightBaseIE(InfoExtractor): class LimelightMediaIE(LimelightBaseIE): IE_NAME = 'limelight' - _VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bmediaId=)(?P<id>[a-z0-9]{32})' + _VALID_URL = r'''(?x) + (?: + limelight:media:| + https?:// + (?: + link\.videoplatform\.limelight\.com/media/| + assets\.delvenetworks\.com/player/loader\.swf + ) + \?.*?\bmediaId= + ) + (?P<id>[a-z0-9]{32}) + ''' _TESTS = [{ 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', 'info_dict': { @@ -158,6 +169,9 @@ class LimelightMediaIE(LimelightBaseIE): # rtmp download 'skip_download': True, }, + }, { + 'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452', + 'only_matching': True, }] _PLAYLIST_SERVICE_PATH = 'media' _API_PATH = 'media' @@ -176,15 +190,29 @@ class LimelightMediaIE(LimelightBaseIE): class LimelightChannelIE(LimelightBaseIE): IE_NAME = 'limelight:channel' - _VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelId=)(?P<id>[a-z0-9]{32})' - _TEST = { + _VALID_URL = r'''(?x) + (?: + limelight:channel:| + https?:// + (?: + link\.videoplatform\.limelight\.com/media/| + assets\.delvenetworks\.com/player/loader\.swf + ) + \?.*?\bchannelId= + ) + (?P<id>[a-z0-9]{32}) + ''' + _TESTS = [{ 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082', 'info_dict': { 'id': 'ab6a524c379342f9b23642917020c082', 'title': 'Javascript Sample Code', }, 'playlist_mincount': 3, - } + }, { + 'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082', + 'only_matching': True, + }] _PLAYLIST_SERVICE_PATH = 'channel' _API_PATH = 'channels' @@ -207,15 +235,29 @@ class LimelightChannelIE(LimelightBaseIE): class LimelightChannelListIE(LimelightBaseIE): IE_NAME = 'limelight:channel_list' - _VALID_URL = r'(?:limelight:channel_list:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})' - _TEST = { + _VALID_URL = r'''(?x) + (?: + limelight:channel_list:| + https?:// + (?: + link\.videoplatform\.limelight\.com/media/| + assets\.delvenetworks\.com/player/loader\.swf + ) + \?.*?\bchannelListId= + ) + (?P<id>[a-z0-9]{32}) + ''' + _TESTS = [{ 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b', 'info_dict': { 'id': '301b117890c4465c8179ede21fd92e2b', 'title': 'Website - Hero Player', }, 'playlist_mincount': 2, - } + }, { + 'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b', + 'only_matching': True, + }] _PLAYLIST_SERVICE_PATH = 'channel_list' def _real_extract(self, url): From 21ac1a8ac3f2a3c301ad8c08730166a8fd82c287 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:52:50 +0700 Subject: [PATCH 63/92] [limelight] Fix typo --- youtube_dl/extractor/limelight.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index da5d198b9..a25fb8e2c 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -98,7 +98,7 @@ class LimelightBaseIE(InfoExtractor): } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')] subtitles = {} - for caption in properties.get('captions', {}): + for caption in properties.get('captions', []): lang = caption.get('language_code') subtitles_url = caption.get('url') if lang and subtitles_url: From fe458b65965e5a847a24d00138b723ce67b274e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:57:27 +0700 Subject: [PATCH 64/92] [limelight] Extract ttml subtitles (Closes #9739) --- youtube_dl/extractor/limelight.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index a25fb8e2c..5d2c3e256 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -102,9 +102,15 @@ class LimelightBaseIE(InfoExtractor): lang = caption.get('language_code') subtitles_url = caption.get('url') if lang and subtitles_url: - subtitles[lang] = [{ + subtitles.setdefault(lang, []).append({ 'url': subtitles_url, - }] + }) + closed_captions_url = properties.get('closed_captions_url') + if closed_captions_url: + subtitles.setdefault('en', []).append({ + 'url': closed_captions_url, + 'ext': 'ttml', + }) return { 'id': video_id, From 698f127c1a9dd460c8dede59df6a0e2ce69f913a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 06:14:22 +0700 Subject: [PATCH 65/92] [setup.py] Add python 3.5 classifier --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 9444d403d..c1e923f71 100644 --- a/setup.py +++ b/setup.py @@ -122,6 +122,7 @@ setup( "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", ], cmdclass={'build_lazy_extractors': build_lazy_extractors}, From 33751818d3e31270304db519849d85bec43e9c95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 08:28:51 +0700 Subject: [PATCH 66/92] release 2016.06.11.2 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 564cffae7..8fa97ee87 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.2** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.11.1 +[debug] youtube-dl version 2016.06.11.2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5bcb6a7b3..f6cc8b79e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.11.1' +__version__ = '2016.06.11.2' From 4a420119a6e0b7363f9d31e37d3e7af818bedfd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 08:34:30 +0700 Subject: [PATCH 67/92] release 2016.06.11.3 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 8fa97ee87..a46b75fd8 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.2** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.3*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.3** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.11.2 +[debug] youtube-dl version 2016.06.11.3 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f6cc8b79e..9932b1e62 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.11.2' +__version__ = '2016.06.11.3' From 47787efa2b6bd5dc1b6f6cb7027586bac2de4c6c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 13:13:16 +0800 Subject: [PATCH 68/92] [leeco] Recognize Le Sports URLs (fixes #9750) --- youtube_dl/extractor/leeco.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index 375fdaed1..63f581cd9 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -28,7 +28,7 @@ from ..utils import ( class LeIE(InfoExtractor): IE_DESC = '乐视网' - _VALID_URL = r'https?://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html' + _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|sports\.le\.com/video)/(?P<id>\d+)\.html' _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html' @@ -69,6 +69,9 @@ class LeIE(InfoExtractor): 'hls_prefer_native': True, }, 'skip': 'Only available in China', + }, { + 'url': 'http://sports.le.com/video/25737697.html', + 'only_matching': True, }] @staticmethod @@ -196,7 +199,7 @@ class LeIE(InfoExtractor): class LePlaylistIE(InfoExtractor): - _VALID_URL = r'https?://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)' + _VALID_URL = r'https?://[a-z]+\.le\.com/(?!video)[a-z]+/(?P<id>[a-z0-9_]+)' _TESTS = [{ 'url': 'http://www.le.com/tv/46177.html', From 7aab3696dd02ca45feba523b4194d6430939dd1c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 15:37:04 +0800 Subject: [PATCH 69/92] [kuwo] Update _TESTS --- youtube_dl/extractor/kuwo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 11b31a699..0221fb919 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -148,8 +148,8 @@ class KuwoAlbumIE(InfoExtractor): 'url': 'http://www.kuwo.cn/album/502294/', 'info_dict': { 'id': '502294', - 'title': 'M', - 'description': 'md5:6a7235a84cc6400ec3b38a7bdaf1d60c', + 'title': 'Made\xa0Series\xa0《M》', + 'description': 'md5:d463f0d8a0ff3c3ea3d6ed7452a9483f', }, 'playlist_count': 2, } @@ -209,7 +209,7 @@ class KuwoSingerIE(InfoExtractor): 'url': 'http://www.kuwo.cn/mingxing/bruno+mars/', 'info_dict': { 'id': 'bruno+mars', - 'title': 'Bruno Mars', + 'title': 'Bruno\xa0Mars', }, 'playlist_mincount': 329, }, { @@ -306,7 +306,7 @@ class KuwoMvIE(KuwoBaseIE): 'id': '6480076', 'ext': 'mp4', 'title': 'My HouseMV', - 'creator': '2PM', + 'creator': 'PM02:00', }, # In this video, music URLs (anti.s) are blocked outside China and # USA, while the MV URL (mvurl) is available globally, so force the MV From 15d106787e8c21e4d4df95957062bd07c873d203 Mon Sep 17 00:00:00 2001 From: Paul Henning <vxbinaca@users.noreply.github.com> Date: Sat, 11 Jun 2016 05:36:31 -0400 Subject: [PATCH 70/92] [utils] Change Firefox 44 to 47 See commit title. --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f77ab8650..0acbd67de 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -76,7 +76,7 @@ def register_socks_protocols(): compiled_regex_type = type(re.compile('')) std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', From 856150d05647904a5cf6c519c6e276ce3536bd20 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 18:22:26 +0800 Subject: [PATCH 71/92] [telewebion] Add new extractor (closes #5135) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/telewebion.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/telewebion.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 38708294a..36ddc1f73 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -777,6 +777,7 @@ from .telecinco import TelecincoIE from .telegraaf import TelegraafIE from .telemb import TeleMBIE from .teletask import TeleTaskIE +from .telewebion import TelewebionIE from .testurl import TestURLIE from .tf1 import TF1IE from .theintercept import TheInterceptIE diff --git a/youtube_dl/extractor/telewebion.py b/youtube_dl/extractor/telewebion.py new file mode 100644 index 000000000..77916c601 --- /dev/null +++ b/youtube_dl/extractor/telewebion.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class TelewebionIE(InfoExtractor): + _VALID_URL = r'https?://www\.telewebion\.com/#!/episode/(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.telewebion.com/#!/episode/1263668/', + 'info_dict': { + 'id': '1263668', + 'ext': 'mp4', + 'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا', + 'thumbnail': 're:^https?://.*\.jpg', + 'view_count': int, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + secure_token = self._download_webpage( + 'http://m.s2.telewebion.com/op/op?action=getSecurityToken', video_id) + episode_details = self._download_json( + 'http://m.s2.telewebion.com/op/op', video_id, + query={'action': 'getEpisodeDetails', 'episode_id': video_id}) + + m3u8_url = 'http://m.s1.telewebion.com/smil/%s.m3u8?filepath=%s&m3u8=1&secure_token=%s' % ( + video_id, episode_details['file_path'], secure_token) + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', m3u8_id='hls') + + picture_paths = [ + episode_details.get('picture_path'), + episode_details.get('large_picture_path'), + ] + + thumbnails = [{ + 'url': picture_path, + 'preference': idx, + } for idx, picture_path in enumerate(picture_paths) if picture_path is not None] + + return { + 'id': video_id, + 'title': episode_details['title'], + 'formats': formats, + 'thumbnails': thumbnails, + 'view_count': episode_details.get('view_count'), + } From c5edd147d1d2cf0502f5ef48652c88a75ef62529 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 18:33:37 +0800 Subject: [PATCH 72/92] [generic] Remove an invalid test Now handled by telewebion.py --- youtube_dl/extractor/generic.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ef18ce3dc..4aa24061c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1073,20 +1073,6 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, - # Contains a SMIL manifest - { - 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html', - 'info_dict': { - 'id': 'file', - 'ext': 'flv', - 'title': '+ Football: Lottery Champions League Europe', - 'uploader': 'www.telewebion.com', - }, - 'params': { - # rtmpe downloads - 'skip_download': True, - } - }, # Brightcove URL in single quotes { 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/', From 531a74968c24416cb2e4a79c9bfbcc9d02368e44 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 21:35:08 +0800 Subject: [PATCH 73/92] [vimeo] Fix extraction for VimeoReview videos --- youtube_dl/extractor/vimeo.py | 147 +++++++++++++++++++--------------- 1 file changed, 83 insertions(+), 64 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 59f9cb1ae..0fd2c18a0 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -66,6 +66,69 @@ class VimeoBaseInfoExtractor(InfoExtractor): def _set_vimeo_cookie(self, name, value): self._set_cookie('vimeo.com', name, value) + def _vimeo_sort_formats(self, formats): + # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps + # at the same time without actual units specified. This lead to wrong sorting. + self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id')) + + def _parse_config(self, config, video_id): + # Extract title + video_title = config['video']['title'] + + # Extract uploader, uploader_url and uploader_id + video_uploader = config['video'].get('owner', {}).get('name') + video_uploader_url = config['video'].get('owner', {}).get('url') + video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None + + # Extract video thumbnail + video_thumbnail = config['video'].get('thumbnail') + if video_thumbnail is None: + video_thumbs = config['video'].get('thumbs') + if video_thumbs and isinstance(video_thumbs, dict): + _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] + + # Extract video duration + video_duration = int_or_none(config['video'].get('duration')) + + formats = [] + config_files = config['video'].get('files') or config['request'].get('files', {}) + for f in config_files.get('progressive', []): + video_url = f.get('url') + if not video_url: + continue + formats.append({ + 'url': video_url, + 'format_id': 'http-%s' % f.get('quality'), + 'width': int_or_none(f.get('width')), + 'height': int_or_none(f.get('height')), + 'fps': int_or_none(f.get('fps')), + 'tbr': int_or_none(f.get('bitrate')), + }) + m3u8_url = config_files.get('hls', {}).get('url') + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + + subtitles = {} + text_tracks = config['request'].get('text_tracks') + if text_tracks: + for tt in text_tracks: + subtitles[tt['lang']] = [{ + 'ext': 'vtt', + 'url': 'https://vimeo.com' + tt['url'], + }] + + return { + 'title': video_title, + 'uploader': video_uploader, + 'uploader_id': video_uploader_id, + 'uploader_url': video_uploader_url, + 'thumbnail': video_thumbnail, + 'duration': video_duration, + 'formats': formats, + 'subtitles': subtitles, + } + class VimeoIE(VimeoBaseInfoExtractor): """Information extractor for vimeo.com.""" @@ -153,7 +216,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'user18948128', 'uploader': 'Jaime Marquínez Ferrándiz', 'duration': 10, - 'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people\u2026', + 'description': 'This is "youtube-dl password protected test video" by on Vimeo, the home for high quality videos and the people who love them.', }, 'params': { 'videopassword': 'youtube-dl', @@ -389,21 +452,6 @@ class VimeoIE(VimeoBaseInfoExtractor): 'https://player.vimeo.com/player/%s' % feature_id, {'force_feature_id': True}), 'Vimeo') - # Extract title - video_title = config['video']['title'] - - # Extract uploader, uploader_url and uploader_id - video_uploader = config['video'].get('owner', {}).get('name') - video_uploader_url = config['video'].get('owner', {}).get('url') - video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None - - # Extract video thumbnail - video_thumbnail = config['video'].get('thumbnail') - if video_thumbnail is None: - video_thumbs = config['video'].get('thumbs') - if video_thumbs and isinstance(video_thumbs, dict): - _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] - # Extract video description video_description = self._html_search_regex( @@ -423,9 +471,6 @@ class VimeoIE(VimeoBaseInfoExtractor): if not video_description and not mobj.group('player'): self._downloader.report_warning('Cannot find video description') - # Extract video duration - video_duration = int_or_none(config['video'].get('duration')) - # Extract upload date video_upload_date = None mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage) @@ -463,53 +508,22 @@ class VimeoIE(VimeoBaseInfoExtractor): 'format_id': source_name, 'preference': 1, }) - config_files = config['video'].get('files') or config['request'].get('files', {}) - for f in config_files.get('progressive', []): - video_url = f.get('url') - if not video_url: - continue - formats.append({ - 'url': video_url, - 'format_id': 'http-%s' % f.get('quality'), - 'width': int_or_none(f.get('width')), - 'height': int_or_none(f.get('height')), - 'fps': int_or_none(f.get('fps')), - 'tbr': int_or_none(f.get('bitrate')), - }) - m3u8_url = config_files.get('hls', {}).get('url') - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps - # at the same time without actual units specified. This lead to wrong sorting. - self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id')) - subtitles = {} - text_tracks = config['request'].get('text_tracks') - if text_tracks: - for tt in text_tracks: - subtitles[tt['lang']] = [{ - 'ext': 'vtt', - 'url': 'https://vimeo.com' + tt['url'], - }] - - return { + info_dict = self._parse_config(config, video_id) + formats.extend(info_dict['formats']) + self._vimeo_sort_formats(formats) + info_dict.update({ 'id': video_id, - 'uploader': video_uploader, - 'uploader_url': video_uploader_url, - 'uploader_id': video_uploader_id, - 'upload_date': video_upload_date, - 'title': video_title, - 'thumbnail': video_thumbnail, - 'description': video_description, - 'duration': video_duration, 'formats': formats, + 'upload_date': video_upload_date, + 'description': video_description, 'webpage_url': url, 'view_count': view_count, 'like_count': like_count, 'comment_count': comment_count, - 'subtitles': subtitles, - } + }) + + return info_dict class VimeoOndemandIE(VimeoBaseInfoExtractor): @@ -692,7 +706,7 @@ class VimeoGroupsIE(VimeoAlbumIE): return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name) -class VimeoReviewIE(InfoExtractor): +class VimeoReviewIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:review' IE_DESC = 'Review pages on vimeo' _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)' @@ -704,6 +718,7 @@ class VimeoReviewIE(InfoExtractor): 'ext': 'mp4', 'title': "DICK HARDWICK 'Comedian'", 'uploader': 'Richard Hardwick', + 'uploader_id': 'user21297594', } }, { 'note': 'video player needs Referer', @@ -716,14 +731,18 @@ class VimeoReviewIE(InfoExtractor): 'uploader': 'DevWeek Events', 'duration': 2773, 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader_id': 'user22258446', } }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - player_url = 'https://player.vimeo.com/player/' + video_id - return self.url_result(player_url, 'Vimeo', video_id) + video_id = self._match_id(url) + config = self._download_json( + 'https://player.vimeo.com/video/%s/config' % video_id, video_id) + info_dict = self._parse_config(config, video_id) + self._vimeo_sort_formats(info_dict['formats']) + info_dict['id'] = video_id + return info_dict class VimeoWatchLaterIE(VimeoChannelIE): From 94e5d6aedb5b509601d29dd8ea352afa925d3b22 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 21:49:01 +0800 Subject: [PATCH 74/92] [viki] Skip a geo-restricted test --- youtube_dl/extractor/viki.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index e04b814c8..0c0cd622a 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -156,7 +156,8 @@ class VikiIE(VikiBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, + 'skip': 'Blocked in the US', }, { # episode 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', From c83b35d4aa4cec98ac171cca94ec515500076926 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 22:39:13 +0800 Subject: [PATCH 75/92] [viki] Update _TESTS --- youtube_dl/extractor/viki.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 0c0cd622a..70ce5de0e 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -127,7 +127,7 @@ class VikiIE(VikiBaseIE): }, { # clip 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference', - 'md5': '86c0b5dbd4d83a6611a79987cc7a1989', + 'md5': 'feea2b1d7b3957f70886e6dfd8b8be84', 'info_dict': { 'id': '1067139v', 'ext': 'mp4', @@ -161,13 +161,13 @@ class VikiIE(VikiBaseIE): }, { # episode 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', - 'md5': '190f3ef426005ba3a080a63325955bc3', + 'md5': '1f54697dabc8f13f31bf06bb2e4de6db', 'info_dict': { 'id': '44699v', 'ext': 'mp4', 'title': 'Boys Over Flowers - Episode 1', - 'description': 'md5:52617e4f729c7d03bfd4bcbbb6e946f2', - 'duration': 4155, + 'description': 'md5:b89cf50038b480b88b5b3c93589a9076', + 'duration': 4204, 'timestamp': 1270496524, 'upload_date': '20100405', 'uploader': 'group8', @@ -197,7 +197,7 @@ class VikiIE(VikiBaseIE): }, { # non-English description 'url': 'http://www.viki.com/videos/158036v-love-in-magic', - 'md5': '1713ae35df5a521b31f6dc40730e7c9c', + 'md5': '013dc282714e22acf9447cad14ff1208', 'info_dict': { 'id': '158036v', 'ext': 'mp4', @@ -303,7 +303,7 @@ class VikiChannelIE(VikiBaseIE): 'title': 'Boys Over Flowers', 'description': 'md5:ecd3cff47967fe193cff37c0bec52790', }, - 'playlist_count': 70, + 'playlist_mincount': 71, }, { 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete', 'info_dict': { From 6d28c408cfb0ce42f591cc6e2bb67522c0812c72 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 23:00:44 +0800 Subject: [PATCH 76/92] [viki] Do not use a fallback language for title in the first try In test_Viki_3, 'titles' gives a Hebrew title. --- youtube_dl/extractor/viki.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 70ce5de0e..efa15e0b6 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -101,10 +101,13 @@ class VikiBaseIE(InfoExtractor): self.report_warning('Unable to get session token, login has probably failed') @staticmethod - def dict_selection(dict_obj, preferred_key): + def dict_selection(dict_obj, preferred_key, allow_fallback=True): if preferred_key in dict_obj: return dict_obj.get(preferred_key) + if not allow_fallback: + return + filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()])) return filtered_dict[0] if filtered_dict else None @@ -218,7 +221,7 @@ class VikiIE(VikiBaseIE): self._check_errors(video) - title = self.dict_selection(video.get('titles', {}), 'en') + title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False) if not title: title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id container_titles = video.get('container', {}).get('titles', {}) From 80ae228b344ce36a07fb91c7e968fc5249c03161 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 01:57:23 +0700 Subject: [PATCH 77/92] [matchtv] Modernize --- youtube_dl/extractor/matchtv.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/matchtv.py b/youtube_dl/extractor/matchtv.py index 80a0d7013..33b0b539f 100644 --- a/youtube_dl/extractor/matchtv.py +++ b/youtube_dl/extractor/matchtv.py @@ -4,16 +4,12 @@ from __future__ import unicode_literals import random from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode -from ..utils import ( - sanitized_Request, - xpath_text, -) +from ..utils import xpath_text class MatchTVIE(InfoExtractor): - _VALID_URL = r'https?://matchtv\.ru/?#live-player' - _TEST = { + _VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)' + _TESTS = [{ 'url': 'http://matchtv.ru/#live-player', 'info_dict': { 'id': 'matchtv-live', @@ -24,12 +20,16 @@ class MatchTVIE(InfoExtractor): 'params': { 'skip_download': True, }, - } + }, { + 'url': 'http://matchtv.ru/on-air/', + 'only_matching': True, + }] def _real_extract(self, url): video_id = 'matchtv-live' - request = sanitized_Request( - 'http://player.matchtv.ntvplus.tv/player/smil?%s' % compat_urllib_parse_urlencode({ + video_url = self._download_json( + 'http://player.matchtv.ntvplus.tv/player/smil', video_id, + query={ 'ts': '', 'quality': 'SD', 'contentId': '561d2c0df7159b37178b4567', @@ -40,11 +40,10 @@ class MatchTVIE(InfoExtractor): 'contentType': 'channel', 'timeShift': '0', 'platform': 'portal', - }), + }, headers={ 'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf', - }) - video_url = self._download_json(request, video_id)['data']['videoUrl'] + })['data']['videoUrl'] f4m_url = xpath_text(self._download_xml(video_url, video_id), './to') formats = self._extract_f4m_formats(f4m_url, video_id) self._sort_formats(formats) From 2c3322e36ef23eb0566b820dd8e8711de20ed963 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 04:49:37 +0700 Subject: [PATCH 78/92] [youporn] Fix metadata extraction --- youtube_dl/extractor/youporn.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 1124fe6c2..0df2d76ee 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -17,7 +17,7 @@ class YouPornIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?youporn\.com/watch/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' _TESTS = [{ 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', - 'md5': '71ec5fcfddacf80f495efa8b6a8d9a89', + 'md5': '3744d24c50438cf5b6f6d59feb5055c2', 'info_dict': { 'id': '505835', 'display_id': 'sex-ed-is-it-safe-to-masturbate-daily', @@ -121,21 +121,21 @@ class YouPornIE(InfoExtractor): webpage, 'thumbnail', fatal=False, group='thumbnail') uploader = self._html_search_regex( - r'(?s)<div[^>]+class=["\']videoInfoBy(?:\s+[^"\']+)?["\'][^>]*>\s*By:\s*</div>(.+?)</(?:a|div)>', + r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>', webpage, 'uploader', fatal=False) upload_date = unified_strdate(self._html_search_regex( - r'(?s)<div[^>]+class=["\']videoInfoTime["\'][^>]*>(.+?)</div>', + r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>', webpage, 'upload date', fatal=False)) age_limit = self._rta_search(webpage) average_rating = int_or_none(self._search_regex( - r'<div[^>]+class=["\']videoInfoRating["\'][^>]*>\s*<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>', + r'<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>', webpage, 'average rating', fatal=False)) view_count = str_to_int(self._search_regex( - r'(?s)<div[^>]+class=["\']videoInfoViews["\'][^>]*>.*?([\d,.]+)\s*</div>', - webpage, 'view count', fatal=False)) + r'(?s)<div[^>]+class=(["\']).*?\bvideoInfoViews\b.*?\1[^>]*>.*?(?P<count>[\d,.]+)<', + webpage, 'view count', fatal=False, group='count')) comment_count = str_to_int(self._search_regex( r'>All [Cc]omments? \(([\d,.]+)\)', webpage, 'comment count', fatal=False)) From 329ca3bef695bff011ed9b2d5f03e1331bf5bf0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 06:05:34 +0700 Subject: [PATCH 79/92] [utils] Add try_get To reduce boilerplate when accessing JSON --- youtube_dl/utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0acbd67de..c8308ba3a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1901,6 +1901,16 @@ def dict_get(d, key_or_keys, default=None, skip_false_values=True): return d.get(key_or_keys, default) +def try_get(src, getter, expected_type=None): + try: + v = getter(src) + except (AttributeError, KeyError, TypeError, IndexError): + pass + else: + if expected_type is None or isinstance(v, expected_type): + return v + + def encode_compat_str(string, encoding=preferredencoding(), errors='strict'): return string if isinstance(string, compat_str) else compat_str(string, encoding, errors) From 98960c911c9bacc0c366dd11b194963a82606850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 06:06:04 +0700 Subject: [PATCH 80/92] [instagram] Extract metadata from JSON --- youtube_dl/extractor/instagram.py | 72 ++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 3cbe77ad8..fc0197ae1 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -8,6 +8,7 @@ from ..utils import ( int_or_none, limit_length, lowercase_escape, + try_get, ) @@ -19,10 +20,16 @@ class InstagramIE(InfoExtractor): 'info_dict': { 'id': 'aye83DjauH', 'ext': 'mp4', - 'uploader_id': 'naomipq', 'title': 'Video by naomipq', 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', - } + 'thumbnail': 're:^https?://.*\.jpg', + 'timestamp': 1371748545, + 'upload_date': '20130620', + 'uploader_id': 'naomipq', + 'uploader': 'Naomi Leonor Phan-Quang', + 'like_count': int, + 'comment_count': int, + }, }, { # missing description 'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears', @@ -31,6 +38,13 @@ class InstagramIE(InfoExtractor): 'ext': 'mp4', 'uploader_id': 'britneyspears', 'title': 'Video by britneyspears', + 'thumbnail': 're:^https?://.*\.jpg', + 'timestamp': 1453760977, + 'upload_date': '20160125', + 'uploader_id': 'britneyspears', + 'uploader': 'Britney Spears', + 'like_count': int, + 'comment_count': int, }, 'params': { 'skip_download': True, @@ -67,21 +81,57 @@ class InstagramIE(InfoExtractor): url = mobj.group('url') webpage = self._download_webpage(url, video_id) - uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', - webpage, 'uploader id', fatal=False) - desc = self._search_regex( - r'"caption":"(.+?)"', webpage, 'description', default=None) - if desc is not None: - desc = lowercase_escape(desc) + + (video_url, description, thumbnail, timestamp, uploader, + uploader_id, like_count, comment_count) = [None] * 8 + + shared_data = self._parse_json( + self._search_regex( + r'window\._sharedData\s*=\s*({.+?});', + webpage, 'shared data', default='{}'), + video_id, fatal=False) + if shared_data: + media = try_get( + shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict) + if media: + video_url = media.get('video_url') + description = media.get('caption') + thumbnail = media.get('display_src') + timestamp = int_or_none(media.get('date')) + uploader = media.get('owner', {}).get('full_name') + uploader_id = media.get('owner', {}).get('username') + like_count = int_or_none(media.get('likes', {}).get('count')) + comment_count = int_or_none(media.get('comments', {}).get('count')) + + if not video_url: + video_url = self._og_search_video_url(webpage, secure=False) + + if not uploader_id: + uploader_id = self._search_regex( + r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', + webpage, 'uploader id', fatal=False) + + if not description: + description = self._search_regex( + r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None) + if description is not None: + description = lowercase_escape(description) + + if not thumbnail: + thumbnail = self._og_search_thumbnail(webpage) return { 'id': video_id, - 'url': self._og_search_video_url(webpage, secure=False), + 'url': video_url, 'ext': 'mp4', 'title': 'Video by %s' % uploader_id, - 'thumbnail': self._og_search_thumbnail(webpage), + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, 'uploader_id': uploader_id, - 'description': desc, + 'uploader': uploader, + 'like_count': like_count, + 'comment_count': comment_count, } From a936ac321c5c0cee8e9769334945e744cdc60ae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 06:39:31 +0700 Subject: [PATCH 81/92] [README.md] Document using output template in batch files (Closes #9717) --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 3ff33c156..43e5114ea 100644 --- a/README.md +++ b/README.md @@ -553,6 +553,10 @@ The current default template is `%(title)s-%(id)s.%(ext)s`. In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: +#### Output template and Windows batch files + +If you are using output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`. + #### Output template examples Note on Windows you may need to use double quotes instead of single. From 4e790117292d060a3e449c9edfffe14d231aee96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 06:57:04 +0700 Subject: [PATCH 82/92] [nrktv] Fix tests --- youtube_dl/extractor/nrk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 486e086bb..4a790da7b 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -163,7 +163,7 @@ class NRKTVIE(NRKBaseIE): 'ext': 'mp4', 'title': '20 spørsmål 23.05.2014', 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', - 'duration': 1741.52, + 'duration': 1741, }, }, { 'url': 'https://tv.nrk.no/program/mdfp15000514', @@ -173,7 +173,7 @@ class NRKTVIE(NRKBaseIE): 'ext': 'mp4', 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014', 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db', - 'duration': 4605.08, + 'duration': 4605, }, }, { # single playlist video From 971e3b7520563936f6e6946f5c08d64f65ab6f42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 07:20:37 +0700 Subject: [PATCH 83/92] [nrk:skole] Fix extraction --- youtube_dl/extractor/nrk.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 4a790da7b..6ded5bd45 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -260,30 +260,34 @@ class NRKPlaylistIE(InfoExtractor): class NRKSkoleIE(InfoExtractor): IE_DESC = 'NRK Skole' - _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/klippdetalj?.*\btopic=(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)' _TESTS = [{ - 'url': 'http://nrk.no/skole/klippdetalj?topic=nrk:klipp/616532', - 'md5': '04cd85877cc1913bce73c5d28a47e00f', + 'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099', + 'md5': '6bc936b01f9dd8ed45bc58b252b2d9b6', 'info_dict': { 'id': '6021', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Genetikk og eneggede tvillinger', 'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d', 'duration': 399, }, }, { - 'url': 'http://www.nrk.no/skole/klippdetalj?topic=nrk%3Aklipp%2F616532#embed', - 'only_matching': True, - }, { - 'url': 'http://www.nrk.no/skole/klippdetalj?topic=urn:x-mediadb:21379', + 'url': 'https://www.nrk.no/skole/?page=objectives&subject=naturfag&objective=K15114&mediaId=19355', 'only_matching': True, }] def _real_extract(self, url): - video_id = compat_urllib_parse_unquote(self._match_id(url)) + video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'https://mimir.nrk.no/plugin/1.0/static?mediaId=%s' % video_id, + video_id) + + nrk_id = self._parse_json( + self._search_regex( + r'<script[^>]+type=["\']application/json["\'][^>]*>({.+?})</script>', + webpage, 'application json'), + video_id)['activeMedia']['psId'] - nrk_id = self._search_regex(r'data-nrk-id=["\'](\d+)', webpage, 'nrk id') return self.url_result('nrk:%s' % nrk_id) From 84dcd1c4e47f2a5a84a4658f42c66f7546588001 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 11:08:39 +0700 Subject: [PATCH 84/92] [streamcloud] Detect removed videos (Closes #3768) --- youtube_dl/extractor/streamcloud.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py index 712359885..58560ec64 100644 --- a/youtube_dl/extractor/streamcloud.py +++ b/youtube_dl/extractor/streamcloud.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, sanitized_Request, urlencode_postdata, ) @@ -14,7 +15,7 @@ class StreamcloudIE(InfoExtractor): IE_NAME = 'streamcloud.eu' _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?' - _TEST = { + _TESTS = [{ 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html', 'md5': '6bea4c7fa5daaacc2a946b7146286686', 'info_dict': { @@ -23,7 +24,10 @@ class StreamcloudIE(InfoExtractor): 'title': 'youtube-dl test video \'/\\ ä ↭', }, 'skip': 'Only available from the EU' - } + }, { + 'url': 'http://streamcloud.eu/ua8cmfh1nbe6/NSHIP-148--KUC-NG--H264-.mp4.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -31,6 +35,10 @@ class StreamcloudIE(InfoExtractor): orig_webpage = self._download_webpage(url, video_id) + if '>File Not Found<' in orig_webpage: + raise ExtractorError( + 'Video %s does not exist' % video_id, expected=True) + fields = re.findall(r'''(?x)<input\s+ type="(?:hidden|submit)"\s+ name="([^"]+)"\s+ From 77a9a9c295c753c4de4c96def6a9a15de1025f0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 12:06:48 +0700 Subject: [PATCH 85/92] release 2016.06.12 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a46b75fd8..243f2de5d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.3*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.3** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.12** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.11.3 +[debug] youtube-dl version 2016.06.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f89c2d1f2..e8c0a5d24 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -44,8 +44,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek** - **ARD:mediathek**: Saarländischer Rundfunk + - **ARD:mediathek** - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -647,6 +647,7 @@ - **Telegraaf** - **TeleMB** - **TeleTask** + - **Telewebion** - **TF1** - **TheIntercept** - **ThePlatform** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9932b1e62..5e9c14398 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.11.3' +__version__ = '2016.06.12' From e69f9f5d68aed32cc27ca188b0f51925d949c365 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 16:45:07 +0700 Subject: [PATCH 86/92] [downloader/external] Decode error string before writing to stderr --- youtube_dl/downloader/external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 3ff1f9ed4..fae245024 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -85,7 +85,7 @@ class ExternalFD(FileDownloader): cmd, stderr=subprocess.PIPE) _, stderr = p.communicate() if p.returncode != 0: - self.to_stderr(stderr) + self.to_stderr(stderr.decode('utf-8', 'replace')) return p.returncode From bccdac68749e7a39a47dd0e1ad0ec9c177657de6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 01:11:04 +0700 Subject: [PATCH 87/92] [xfileshare:xvidstage] Add support for videos with packed codes (Closes #4335) --- youtube_dl/extractor/xfileshare.py | 31 ++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index ee4d04c20..fe0ab6300 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -5,8 +5,10 @@ import re from .common import InfoExtractor from ..utils import ( + decode_packed_codes, ExtractorError, int_or_none, + NO_DEFAULT, sanitized_Request, urlencode_postdata, ) @@ -23,6 +25,7 @@ class XFileShareIE(InfoExtractor): ('thevideobee.to', 'TheVideoBee'), ('vidto.me', 'Vidto'), ('streamin.to', 'Streamin.To'), + ('xvidstage.com', 'XVIDSTAGE'), ) IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) @@ -78,6 +81,13 @@ class XFileShareIE(InfoExtractor): 'ext': 'mp4', 'title': 'Big Buck Bunny trailer', }, + }, { + 'url': 'http://xvidstage.com/e0qcnl03co6z', + 'info_dict': { + 'id': 'e0qcnl03co6z', + 'ext': 'mp4', + 'title': 'Chucky Prank 2015.mp4', + }, }] def _real_extract(self, url): @@ -113,10 +123,23 @@ class XFileShareIE(InfoExtractor): r'>Watch (.+) ', r'<h2 class="video-page-head">([^<]+)</h2>'], webpage, 'title', default=None) or self._og_search_title(webpage)).strip() - video_url = self._search_regex( - [r'file\s*:\s*["\'](http[^"\']+)["\'],', - r'file_link\s*=\s*\'(https?:\/\/[0-9a-zA-z.\/\-_]+)'], - webpage, 'file url') + + def extract_video_url(default=NO_DEFAULT): + return self._search_regex( + (r'file\s*:\s*(["\'])(?P<url>http.+?)\1,', + r'file_link\s*=\s*(["\'])(?P<url>http.+?)\1', + r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http.+?)\2\)', + r'<embed[^>]+src=(["\'])(?P<url>http.+?)\1'), + webpage, 'file url', default=default, group='url') + + video_url = extract_video_url(default=None) + + if not video_url: + webpage = decode_packed_codes(self._search_regex( + r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))", + webpage, 'packed code')) + video_url = extract_video_url() + thumbnail = self._search_regex( r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None) From cf2bf840bac1742cb422549a5491a30f70d1abb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 01:11:14 +0700 Subject: [PATCH 88/92] [xfileshare] Fix test --- youtube_dl/extractor/xfileshare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index fe0ab6300..0f8ccf430 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -39,7 +39,7 @@ class XFileShareIE(InfoExtractor): 'md5': '5ae4a3580620380619678ee4875893ba', 'info_dict': { 'id': '06y9juieqpmi', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ', 'thumbnail': 're:http://.*\.jpg', }, From 33b72ce64e8705a71f8ab0e6a322e5f9f3b99276 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 01:19:54 +0700 Subject: [PATCH 89/92] [xfileshare] Improve removed videos detection --- youtube_dl/extractor/xfileshare.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 0f8ccf430..995aada0d 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -32,7 +32,10 @@ class XFileShareIE(InfoExtractor): _VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' % '|'.join(re.escape(site) for site in list(zip(*_SITES))[0])) - _FILE_NOT_FOUND_REGEX = r'>(?:404 - )?File Not Found<' + _FILE_NOT_FOUND_REGEXES = ( + r'>(?:404 - )?File Not Found<', + r'>The file was removed by administrator<', + ) _TESTS = [{ 'url': 'http://gorillavid.in/06y9juieqpmi', @@ -88,6 +91,10 @@ class XFileShareIE(InfoExtractor): 'ext': 'mp4', 'title': 'Chucky Prank 2015.mp4', }, + }, { + # removed by administrator + 'url': 'http://xvidstage.com/amfy7atlkx25', + 'only_matching': True, }] def _real_extract(self, url): @@ -97,7 +104,7 @@ class XFileShareIE(InfoExtractor): url = 'http://%s/%s' % (mobj.group('host'), video_id) webpage = self._download_webpage(url, video_id) - if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None: + if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES): raise ExtractorError('Video %s does not exist' % video_id, expected=True) fields = self._hidden_inputs(webpage) From b50e02c1e4c9ea70e88ab115b17cfa109b0c9617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 07:05:32 +0700 Subject: [PATCH 90/92] [README.md] Update links to options available for YoutubeDL --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 43e5114ea..b5cbaced7 100644 --- a/README.md +++ b/README.md @@ -964,7 +964,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) ``` -Most likely, you'll want to use various options. For a list of what can be done, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L121-L269). For a start, if you want to intercept youtube-dl's output, set a `logger` object. +Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L128-L278). For a start, if you want to intercept youtube-dl's output, set a `logger` object. Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: From b4663f12b1c872f4e731f1940831ec017bc86959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 07:16:35 +0700 Subject: [PATCH 91/92] [README.md] Update links to info dict metafields --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b5cbaced7..5a9768161 100644 --- a/README.md +++ b/README.md @@ -935,8 +935,8 @@ After you have ensured this site is distributing it's content legally, you can f ``` 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want. -8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. +7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. +8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. 9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: From 79cd8b3d8acee7845260d5bd60698155a0d81d33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 10:04:04 +0700 Subject: [PATCH 92/92] [README.md] Suggest checking extractor code under all Python versions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a9768161..f1e59542d 100644 --- a/README.md +++ b/README.md @@ -937,7 +937,7 @@ After you have ensured this site is distributing it's content legally, you can f 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. 8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. -9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). +9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/extractors.py