From 043dc9d36fea85a964bad3ec13f77d32c462115b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 May 2016 18:39:54 +0800 Subject: [PATCH 01/67] [cbc] Fix for old-styled URLs The URL http://www.cbc.ca/player/News/ID/2672225049/ (#6342) redirects to http://www.cbc.ca/player/play/2672224672, while youtube-dl wasn't able to handle it correctly. --- youtube_dl/extractor/cbc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 581928f7d..daf237ca8 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -11,7 +11,7 @@ from ..utils import ( class CBCIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P[^/?#]+)' _TESTS = [{ # with mediaId 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs', From ad96b4c8f56ba9873c62a2ce9916253f9b8a49ee Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 May 2016 19:02:53 +0800 Subject: [PATCH 02/67] [common] Extract audio formats in SMIL Found in http://www.cbc.ca/player/play/2657631896 Closes #5156 --- youtube_dl/extractor/common.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8a8c07226..9f22ee930 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1278,21 +1278,21 @@ class InfoExtractor(object): m3u8_count = 0 srcs = [] - videos = smil.findall(self._xpath_ns('.//video', namespace)) - for video in videos: - src = video.get('src') + media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace)) + for medium in media: + src = medium.get('src') if not src or src in srcs: continue srcs.append(src) - bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) - filesize = int_or_none(video.get('size') or video.get('fileSize')) - width = int_or_none(video.get('width')) - height = int_or_none(video.get('height')) - proto = video.get('proto') - ext = video.get('ext') + bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000) + filesize = int_or_none(medium.get('size') or medium.get('fileSize')) + width = int_or_none(medium.get('width')) + height = int_or_none(medium.get('height')) + proto = medium.get('proto') + ext = medium.get('ext') src_ext = determine_ext(src) - streamer = video.get('streamer') or base + streamer = medium.get('streamer') or base if proto == 'rtmp' or streamer.startswith('rtmp'): rtmp_count += 1 From 31a70191e730a2a963c8b2e4d19921cad573ad8a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 May 2016 19:04:50 +0800 Subject: [PATCH 03/67] [cbc] Add the test case from #5156 --- youtube_dl/extractor/cbc.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index daf237ca8..22d5e72d5 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -91,7 +91,7 @@ class CBCIE(InfoExtractor): class CBCPlayerIE(InfoExtractor): _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.cbc.ca/player/play/2683190193', 'info_dict': { 'id': '2683190193', @@ -102,7 +102,20 @@ class CBCPlayerIE(InfoExtractor): 'upload_date': '20160210', 'uploader': 'CBCC-NEW', }, - } + }, { + # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ + 'url': 'http://www.cbc.ca/player/play/2657631896', + 'md5': 'e5e708c34ae6fca156aafe17c43e8b75', + 'info_dict': { + 'id': '2657631896', + 'ext': 'mp3', + 'title': 'CBC Montreal is organizing its first ever community hackathon!', + 'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.', + 'timestamp': 1425704400, + 'upload_date': '20150307', + 'uploader': 'CBCC-NEW', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) From f0c96af9cb0edc69f9ba73d39e6e191994e31256 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 May 2016 20:55:10 +0600 Subject: [PATCH 04/67] [wistia] Add alias and modernize --- youtube_dl/extractor/wistia.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 8b14840a2..478c42833 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -3,16 +3,16 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( ExtractorError, - sanitized_Request, int_or_none, ) class WistiaIE(InfoExtractor): - _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P[a-z0-9]+)' - _API_URL = 'http://fast.wistia.com/embed/medias/{0:}.json' + _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P[a-z0-9]+)' + _API_URL = 'http://fast.wistia.com/embed/medias/%s.json' + _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s' - _TEST = { + _TESTS = [{ 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', 'md5': 'cafeb56ec0c53c18c97405eecb3133df', 'info_dict': { @@ -24,17 +24,25 @@ class WistiaIE(InfoExtractor): 'timestamp': 1386185018, 'duration': 117, }, - } + }, { + 'url': 'wistia:sh7fpupwlt', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - request = sanitized_Request(self._API_URL.format(video_id)) - request.add_header('Referer', url) # Some videos require this. - data_json = self._download_json(request, video_id) + data_json = self._download_json( + self._API_URL % video_id, video_id, + # Some videos require this. + headers={ + 'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id, + }) + if data_json.get('error'): - raise ExtractorError('Error while getting the playlist', - expected=True) + raise ExtractorError( + 'Error while getting the playlist', expected=True) + data = data_json['media'] title = data['name'] From 36ca2c55db7939aff2dc700523843a9a0f82ae2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 May 2016 21:04:01 +0600 Subject: [PATCH 05/67] [wistia] Skip storyboard and improve extraction --- youtube_dl/extractor/wistia.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 478c42833..6eb94fcab 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -49,19 +49,23 @@ class WistiaIE(InfoExtractor): formats = [] thumbnails = [] for a in data['assets']: + aurl = a.get('url') + if not aurl: + continue astatus = a.get('status') atype = a.get('type') - if (astatus is not None and astatus != 2) or atype == 'preview': + if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'): continue elif atype in ('still', 'still_image'): thumbnails.append({ - 'url': a['url'], - 'resolution': '%dx%d' % (a['width'], a['height']), + 'url': aurl, + 'width': int_or_none(a.get('width')), + 'height': int_or_none(a.get('height')), }) else: formats.append({ 'format_id': atype, - 'url': a['url'], + 'url': aurl, 'tbr': int_or_none(a.get('bitrate')), 'vbr': int_or_none(a.get('opt_vbitrate')), 'width': int_or_none(a.get('width')), From 45f160a43c5f103af7a843f1159a1f6e8f498f0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 May 2016 21:16:08 +0600 Subject: [PATCH 06/67] [wistia] Improve hls support --- youtube_dl/extractor/wistia.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 6eb94fcab..97139a35a 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -27,6 +27,10 @@ class WistiaIE(InfoExtractor): }, { 'url': 'wistia:sh7fpupwlt', 'only_matching': True, + }, { + # with hls video + 'url': 'wistia:807fafadvk', + 'only_matching': True, }] def _real_extract(self, url): @@ -63,6 +67,8 @@ class WistiaIE(InfoExtractor): 'height': int_or_none(a.get('height')), }) else: + aext = a.get('ext') + is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8' formats.append({ 'format_id': atype, 'url': aurl, @@ -73,7 +79,8 @@ class WistiaIE(InfoExtractor): 'filesize': int_or_none(a.get('size')), 'vcodec': a.get('codec'), 'container': a.get('container'), - 'ext': a.get('ext'), + 'ext': 'mp4' if is_m3u8 else aext, + 'protocol': 'm3u8' if is_m3u8 else None, 'preference': 1 if atype == 'original' else None, }) From 64413f7563eb7a89e06ede91fc135de73bc57db4 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 20 May 2016 16:20:05 +0100 Subject: [PATCH 07/67] [cbc] fix extraction for flv only videos(fixes #5309) --- youtube_dl/extractor/cbc.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 22d5e72d5..ff663d079 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -28,6 +28,7 @@ class CBCIE(InfoExtractor): }, { # with clipId 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', + 'md5': '0274a90b51a9b4971fe005c63f592f12', 'info_dict': { 'id': '2487345465', 'ext': 'mp4', @@ -93,6 +94,7 @@ class CBCPlayerIE(InfoExtractor): _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P\d+)' _TESTS = [{ 'url': 'http://www.cbc.ca/player/play/2683190193', + 'md5': '64d25f841ddf4ddb28a235338af32e2c', 'info_dict': { 'id': '2683190193', 'ext': 'mp4', @@ -115,6 +117,19 @@ class CBCPlayerIE(InfoExtractor): 'upload_date': '20150307', 'uploader': 'CBCC-NEW', }, + }, { + # available only when we add `formats=MPEG4,FLV,MP3` to theplatform url + 'url': 'http://www.cbc.ca/player/play/2164402062', + 'md5': '17a61eb813539abea40618d6323a7f82', + 'info_dict': { + 'id': '2164402062', + 'ext': 'flv', + 'title': 'Cancer survivor four times over', + 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.', + 'timestamp': 1320410746, + 'upload_date': '20111104', + 'uploader': 'CBCC-NEW', + }, }] def _real_extract(self, url): @@ -123,7 +138,7 @@ class CBCPlayerIE(InfoExtractor): '_type': 'url_transparent', 'ie_key': 'ThePlatform', 'url': smuggle_url( - 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true' % video_id, { + 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, { 'force_smil_url': True }), 'id': video_id, From aa5957ac49aad5165ce9ab5b9403539d61a09dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 May 2016 21:33:31 +0600 Subject: [PATCH 08/67] [extractor/generic] Add support for async wistia embeds (Closes #9549) --- youtube_dl/extractor/generic.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a6b1e23e3..632d7b5f0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1563,6 +1563,15 @@ class GenericIE(InfoExtractor): 'id': match.group('id') } + match = re.search( + r'''(?sx) + ]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? + ]+class=(["']).*?\bwistia_async_(?P[a-z0-9]+)\b.*?\2 + ''', webpage) + if match: + return self.url_result(self._proto_relative_url( + 'wistia:%s' % match.group('id')), 'Wistia') + # Look for SVT player svt_url = SVTIE._extract_url(webpage) if svt_url: From 7ded6545edb18bb008e8277b42a21d60fb6cd653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 May 2016 21:43:36 +0600 Subject: [PATCH 09/67] [extractor/generic] Add test for wistia standard embed --- youtube_dl/extractor/generic.py | 16 ++++++++++++++++ youtube_dl/extractor/wistia.py | 3 ++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 632d7b5f0..9883cde61 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -742,6 +742,22 @@ class GenericIE(InfoExtractor): 'timestamp': 1401832161, }, }, + # Wistia standard embed (async) + { + 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/', + 'info_dict': { + 'id': '807fafadvk', + 'ext': 'mp4', + 'title': 'Drip Brennan Dunn Workshop', + 'description': 'a JV Webinars video from getdrip-1', + 'duration': 4986.95, + 'upload_date': '20160518', + 'timestamp': 1463607249, + }, + 'params': { + 'skip_download': True, + } + }, # Soundcloud embed { 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/', diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 97139a35a..c634b8dec 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + float_or_none, ) @@ -92,6 +93,6 @@ class WistiaIE(InfoExtractor): 'description': data.get('seoDescription'), 'formats': formats, 'thumbnails': thumbnails, - 'duration': int_or_none(data.get('duration')), + 'duration': float_or_none(data.get('duration')), 'timestamp': int_or_none(data.get('createdAt')), } From 6c114b12104e8c9d0713d1cb2cd6c4ddc7872b7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 May 2016 21:55:35 +0600 Subject: [PATCH 10/67] [extractor/generic] Remove generic id and title from wistia extractionand update tests --- youtube_dl/extractor/generic.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9883cde61..c368f08e1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -717,15 +717,18 @@ class GenericIE(InfoExtractor): }, # Wistia embed { - 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', - 'md5': '8788b683c777a5cf25621eaf286d0c23', + 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', + 'md5': '1953f3a698ab51cfc948ed3992a0b7ff', 'info_dict': { - 'id': '1cfaf6b7ea', + 'id': '6e2wtrbdaf', 'ext': 'mov', - 'title': 'md5:51364a8d3d009997ba99656004b5e20d', - 'duration': 643.0, - 'filesize': 182808282, - 'uploader': 'education-portal.com', + 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england', + 'description': 'a Paywall Videos video from Remilon', + 'duration': 644.072, + 'uploader': 'study.com', + 'timestamp': 1459678540, + 'upload_date': '20160403', + 'filesize': 24687186, }, }, { @@ -734,12 +737,12 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'uxjb0lwrcz', 'ext': 'mp4', - 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks', + 'title': 'Conversation about Hexagonal Rails Part 1', 'description': 'a Martin Fowler video from ThoughtWorks', 'duration': 1715.0, 'uploader': 'thoughtworks.wistia.com', - 'upload_date': '20140603', 'timestamp': 1401832161, + 'upload_date': '20140603', }, }, # Wistia standard embed (async) @@ -751,8 +754,8 @@ class GenericIE(InfoExtractor): 'title': 'Drip Brennan Dunn Workshop', 'description': 'a JV Webinars video from getdrip-1', 'duration': 4986.95, - 'upload_date': '20160518', 'timestamp': 1463607249, + 'upload_date': '20160518', }, 'params': { 'skip_download': True, @@ -1564,19 +1567,15 @@ class GenericIE(InfoExtractor): 'url': embed_url, 'ie_key': 'Wistia', 'uploader': video_uploader, - 'title': video_title, - 'id': video_id, } match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P[^"\']+)', webpage) if match: return { '_type': 'url_transparent', - 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')), + 'url': 'wistia:%s' % match.group('id'), 'ie_key': 'Wistia', 'uploader': video_uploader, - 'title': video_title, - 'id': match.group('id') } match = re.search( From 6756602be6b59c7bff57ccaeb33844cdc5636910 Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Thu, 19 May 2016 03:42:09 +0200 Subject: [PATCH 11/67] [LocalNews8] add extractor (Closes #9200) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/localnews8.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 youtube_dl/extractor/localnews8.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 74aba2d5c..5b96a086d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -396,6 +396,7 @@ from .livestream import ( LivestreamShortenerIE, ) from .lnkgo import LnkGoIE +from .localnews8 import LocalNews8IE from .lovehomeporn import LoveHomePornIE from .lrt import LRTIE from .lynda import ( diff --git a/youtube_dl/extractor/localnews8.py b/youtube_dl/extractor/localnews8.py new file mode 100644 index 000000000..b38d1d58a --- /dev/null +++ b/youtube_dl/extractor/localnews8.py @@ -0,0 +1,29 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class LocalNews8IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?localnews8\.com/.+?/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304', + 'md5': '477bdb188f177788c65db27ecb56649b', + 'info_dict': { + 'id': '35183304', + 'ext': 'mp4', + 'title': 'Rexburg business turns carbon fiber scraps into wedding ring', + 'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.', + 'duration': '153', + 'timestamp': '1441844822', + 'uploader_id': 'api', + }} + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + partner_id = self._search_regex(r'partnerId\s*:\s*"(\d+)"', webpage, video_id) + kaltura_id = self._search_regex(r'var\s+videoIdString\s*=\s*"kaltura:(.+)";', webpage, video_id) + + return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') From 1846e9ade0fb9508459282a992539c700aa26f9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 May 2016 22:31:08 +0600 Subject: [PATCH 12/67] [localnews8] Fix extractor (Closes #9539) --- youtube_dl/extractor/localnews8.py | 38 ++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/localnews8.py b/youtube_dl/extractor/localnews8.py index b38d1d58a..aad396135 100644 --- a/youtube_dl/extractor/localnews8.py +++ b/youtube_dl/extractor/localnews8.py @@ -1,29 +1,47 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor class LocalNews8IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?localnews8\.com/.+?/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P[^/]+)/(?P[0-9]+)' _TEST = { 'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304', - 'md5': '477bdb188f177788c65db27ecb56649b', + 'md5': 'be4d48aea61aa2bde7be2ee47691ad20', 'info_dict': { 'id': '35183304', + 'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings', 'ext': 'mp4', 'title': 'Rexburg business turns carbon fiber scraps into wedding ring', 'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.', - 'duration': '153', - 'timestamp': '1441844822', + 'duration': 153, + 'timestamp': 1441844822, + 'upload_date': '20150910', 'uploader_id': 'api', - }} + } + } def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') - partner_id = self._search_regex(r'partnerId\s*:\s*"(\d+)"', webpage, video_id) - kaltura_id = self._search_regex(r'var\s+videoIdString\s*=\s*"kaltura:(.+)";', webpage, video_id) + webpage = self._download_webpage(url, display_id) - return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') + partner_id = self._search_regex( + r'partnerId\s*[:=]\s*(["\'])(?P\d+)\1', + webpage, 'partner id', group='id') + kaltura_id = self._search_regex( + r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P[0-9a-z_]+)\1', + webpage, 'videl id', group='id') + + return { + '_type': 'url_transparent', + 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id), + 'ie_key': 'Kaltura', + 'id': video_id, + 'display_id': display_id, + } From b219f5e51be520b2e23acd1ec08735fc733f9619 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 May 2016 00:59:06 +0600 Subject: [PATCH 13/67] [brightcove:new] Improve error reporting --- youtube_dl/extractor/brightcove.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index fc7fc5b16..ef560b592 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -444,6 +444,10 @@ class BrightcoveNewIE(InfoExtractor): # non numeric ref: prefixed video id 'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356', 'only_matching': True, + }, { + # unavailable video without message but with error_code + 'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001', + 'only_matching': True, }] @staticmethod @@ -514,8 +518,9 @@ class BrightcoveNewIE(InfoExtractor): }) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - json_data = self._parse_json(e.cause.read().decode(), video_id) - raise ExtractorError(json_data[0]['message'], expected=True) + json_data = self._parse_json(e.cause.read().decode(), video_id)[0] + raise ExtractorError( + json_data.get('message') or json_data['error_code'], expected=True) raise title = json_data['name'].strip() From c8602b2f9bcdda00398b2c54db4c1be85b75ce39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 May 2016 05:09:16 +0600 Subject: [PATCH 14/67] [nrk] Unquote subtitles' URLs --- youtube_dl/extractor/nrk.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 7532f40c1..486e086bb 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -55,7 +55,9 @@ class NRKBaseIE(InfoExtractor): for subtitle in ('webVtt', 'timedText'): subtitle_url = asset.get('%sSubtitlesUrl' % subtitle) if subtitle_url: - subtitles.setdefault('no', []).append({'url': subtitle_url}) + subtitles.setdefault('no', []).append({ + 'url': compat_urllib_parse_unquote(subtitle_url) + }) entries.append({ 'id': asset.get('carrierId') or entry_id, 'title': entry_title, From 16da9bbc29b76b6e6e1a6134a17e9f25d91296c8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 21 May 2016 13:15:28 +0800 Subject: [PATCH 15/67] [common] Add _m3u8_meta_format() template For extractors who handle m3u8 manifests by themselves. (eg., AnvatoIE) Part of #9522 --- youtube_dl/extractor/common.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9f22ee930..17e866f91 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1058,12 +1058,8 @@ class InfoExtractor(object): }) return formats - def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, - entry_protocol='m3u8', preference=None, - m3u8_id=None, note=None, errnote=None, - fatal=True, live=False): - - formats = [{ + def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None): + return { 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])), 'url': m3u8_url, 'ext': ext, @@ -1071,7 +1067,14 @@ class InfoExtractor(object): 'preference': preference - 1 if preference else -1, 'resolution': 'multiple', 'format_note': 'Quality selection URL', - }] + } + + def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, + entry_protocol='m3u8', preference=None, + m3u8_id=None, note=None, errnote=None, + fatal=True, live=False): + + formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] format_url = lambda u: ( u From 7b2fcbfd4ea34e6d29484f5987a36665117aefaa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 21 May 2016 13:16:28 +0800 Subject: [PATCH 16/67] [common] Skip TYPE=CLOSED-CAPTIONS lines in m3u8 manifests According to [1], valid values for TYPE are AUDIO, VIDEO, SUBTITLES and CLOSED-CAPTIONS. Such a value is found in Anvato master playlists, though I don't use _extract_m3u8_formats() in the end. Part of #9522. [1] https://tools.ietf.org/html/draft-pantos-http-live-streaming-19#section-4.3.4.1 --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 17e866f91..4bfa610c1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1141,7 +1141,7 @@ class InfoExtractor(object): format_id = [] if m3u8_id: format_id.append(m3u8_id) - last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None + last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None # Despite specification does not mention NAME attribute for # EXT-X-STREAM-INF it still sometimes may be present stream_name = last_info.get('NAME') or last_media_name From 9f54e692d2de2d52f147f2d714d0312dbe21a5ed Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 21 May 2016 13:18:29 +0800 Subject: [PATCH 17/67] [anvato] Add new extractor Used in CBSLocal (#9522) --- youtube_dl/extractor/anvato.py | 224 +++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 youtube_dl/extractor/anvato.py diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py new file mode 100644 index 000000000..cb29cf111 --- /dev/null +++ b/youtube_dl/extractor/anvato.py @@ -0,0 +1,224 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import hashlib +import json +import random +import time + +from .common import InfoExtractor +from ..aes import aes_encrypt +from ..compat import compat_str +from ..utils import ( + bytes_to_intlist, + determine_ext, + intlist_to_bytes, + int_or_none, + strip_jsonp, +) + + +def md5_text(s): + if not isinstance(s, compat_str): + s = compat_str(s) + return hashlib.md5(s.encode('utf-8')).hexdigest() + + +class AnvatoIE(InfoExtractor): + # Copied from anvplayer.min.js + _ANVACK_TABLE = { + 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ', + 'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA', + 'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP', + 'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv', + 'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7', + 'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR', + 'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg', + 'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto', + 'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY', + 'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh', + 'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK', + 'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D', + 'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad', + 'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp', + 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih', + 'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR', + 'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW', + 'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su', + 'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q', + 'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5', + 'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3', + 'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI', + 'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s', + 'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz', + 'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg', + 'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x', + 'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH', + 'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX', + 'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc', + 'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK', + 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7', + 'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C', + 'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e', + 'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1', + 'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re', + 'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51', + 'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho', + 'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9', + 'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH', + 'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F', + 'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo', + 'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR', + 'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa', + 'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk', + 'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ', + 'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ', + 'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m', + 'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b', + 'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3', + 'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK', + 'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua', + 'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua', + 'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F', + 'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx', + 'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ', + 'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH', + 'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm', + 'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt', + 'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl', + 'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b', + 'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV', + 'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg', + 'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk', + 'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT', + 'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa', + 'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv', + 'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k', + 'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI', + 'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr', + 'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw', + 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K', + 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH', + 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK', + 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu', + 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg', + 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg', + 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK', + 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n', + 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD', + 'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk', + 'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn', + 'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W', + 'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ', + 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ' + } + + _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' + + def __init__(self, *args, **kwargs): + super(AnvatoIE, self).__init__(*args, **kwargs) + self.__server_time = None + + def _server_time(self, access_key, video_id): + if self.__server_time is not None: + return self.__server_time + + self.__server_time = int(self._download_json( + self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id, + note='Fetching server time')['server_time']) + + return self.__server_time + + def _api_prefix(self, access_key): + return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage') + + def _get_video_json(self, access_key, video_id): + # See et() in anvplayer.min.js, which is an alias of getVideoJSON() + video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key) + server_time = self._server_time(access_key, video_id) + input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time)) + + auth_secret = intlist_to_bytes(aes_encrypt( + bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY))) + + video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii') + anvrid = md5_text(time.time() * 1000 * random.random())[:30] + payload = { + 'api': { + 'anvrid': anvrid, + 'anvstk': md5_text('%s|%s|%d|%s' % ( + access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])), + 'anvts': server_time, + }, + } + + return self._download_json( + video_data_url, video_id, transform_source=strip_jsonp, + data=json.dumps(payload).encode('utf-8')) + + def _extract_anvato_videos(self, webpage, video_id): + anvplayer_data = self._parse_json(self._html_search_regex( + r']+data-anvp=\'([^\']+)\'', webpage, + 'Anvato player data'), video_id) + + video_id = anvplayer_data['video'] + access_key = anvplayer_data['accessKey'] + + video_data = self._get_video_json(access_key, video_id) + + formats = [] + for published_url in video_data['published_urls']: + video_url = published_url['embed_url'] + ext = determine_ext(video_url) + + if ext == 'smil': + formats.extend(self._extract_smil_formats(video_url, video_id)) + continue + + tbr = int_or_none(published_url.get('kbps')) + a_format = { + 'url': video_url, + 'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(), + 'tbr': tbr if tbr != 0 else None, + } + + if ext == 'm3u8': + # Not using _extract_m3u8_formats here as individual media + # playlists are also included in published_urls. + if tbr is None: + formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls')) + continue + else: + a_format.update({ + 'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])), + 'ext': 'mp4', + }) + elif ext == 'mp3': + a_format['vcodec'] = 'none' + else: + a_format.update({ + 'width': int_or_none(published_url.get('width')), + 'height': int_or_none(published_url.get('height')), + }) + formats.append(a_format) + + self._sort_formats(formats) + + subtitles = {} + for caption in video_data.get('captions', []): + a_caption = { + 'url': caption['url'], + 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None + } + subtitles.setdefault(caption['language'], []).append(a_caption) + + return { + 'id': video_id, + 'formats': formats, + 'title': video_data.get('def_title'), + 'description': video_data.get('def_description'), + 'categories': video_data.get('categories'), + 'thumbnail': video_data.get('thumbnail'), + 'subtitles': subtitles, + } From 612b5f403e33d5c164b5c0bbad9f01ef6d38d050 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 21 May 2016 13:38:01 +0800 Subject: [PATCH 18/67] [jwplatform] Improved m3u8 and rtmp support Changes made for SendtoNewsIE. Part of #9522 --- youtube_dl/extractor/jwplatform.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 8a5e562db..0aa6fc750 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -5,33 +5,47 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, float_or_none, int_or_none, ) class JWPlatformBaseIE(InfoExtractor): - def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True): + def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None): video_data = jwplayer_data['playlist'][0] formats = [] for source in video_data['sources']: source_url = self._proto_relative_url(source['file']) source_type = source.get('type') or '' - if source_type in ('application/vnd.apple.mpegurl', 'hls'): + if source_type in ('application/vnd.apple.mpegurl', 'hls') or determine_ext(source_url) == 'm3u8': formats.extend(self._extract_m3u8_formats( - source_url, video_id, 'mp4', 'm3u8_native', fatal=False)) + source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) elif source_type.startswith('audio'): formats.append({ 'url': source_url, 'vcodec': 'none', }) else: - formats.append({ + a_format = { 'url': source_url, 'width': int_or_none(source.get('width')), 'height': int_or_none(source.get('height')), - }) + } + if source_url.startswith('rtmp'): + # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as + # of jwplayer.flash.swf + rtmp_url, prefix, play_path = re.split( + r'((?:mp4|mp3|flv):)', source_url, 1) + a_format.update({ + 'url': rtmp_url, + 'ext': 'flv', + 'play_path': prefix + play_path, + }) + if rtmp_params: + a_format.update(rtmp_params) + formats.append(a_format) self._sort_formats(formats) subtitles = {} From 5ce3d5bd1b0933a26a4224643cf8d3ad14330e17 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 21 May 2016 13:39:42 +0800 Subject: [PATCH 19/67] [sendtonews] Add new extractor Used in CBSLocal. Part of #9522 --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/sendtonews.py | 86 ++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 youtube_dl/extractor/sendtonews.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5b96a086d..8352b3c3a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -670,6 +670,7 @@ from .screencastomatic import ScreencastOMaticIE from .screenjunkies import ScreenJunkiesIE from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE from .senateisvp import SenateISVPIE +from .sendtonews import SendtoNewsIE from .servingsys import ServingSysIE from .sexu import SexuIE from .shahid import ShahidIE diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py new file mode 100644 index 000000000..1c636f672 --- /dev/null +++ b/youtube_dl/extractor/sendtonews.py @@ -0,0 +1,86 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .jwplatform import JWPlatformBaseIE +from ..compat import compat_parse_qs +from ..utils import ( + ExtractorError, + parse_duration, +) + + +class SendtoNewsIE(JWPlatformBaseIE): + _VALID_URL = r'https?://embed\.sendtonews\.com/player/embed\.php\?(?P[^#]+)' + + _TEST = { + # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/ + 'url': 'http://embed.sendtonews.com/player/embed.php?SK=GxfCe0Zo7D&MK=175909&PK=5588&autoplay=on&sound=yes', + 'info_dict': { + 'id': 'GxfCe0Zo7D-175909-5588', + 'ext': 'mp4', + 'title': 'Recap: CLE 15, CIN 6', + 'description': '5/16/16: Indians\' bats explode for 15 runs in a win', + 'duration': 49, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + _URL_TEMPLATE = '//embed.sendtonews.com/player/embed.php?SK=%s&MK=%s&PK=%s' + + @classmethod + def _extract_url(cls, webpage): + mobj = re.search(r'''(?x)]+src=([\'"]) + (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\? + .*\bSC=(?P[0-9a-zA-Z-]+).* + \1>''', webpage) + if mobj: + sk, mk, pk = mobj.group('SC').split('-') + return cls._URL_TEMPLATE % (sk, mk, pk) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + params = compat_parse_qs(mobj.group('query')) + + if 'SK' not in params or 'MK' not in params or 'PK' not in params: + raise ExtractorError('Invalid URL', expected=True) + + video_id = '-'.join([params['SK'][0], params['MK'][0], params['PK'][0]]) + + webpage = self._download_webpage(url, video_id) + + jwplayer_data_str = self._search_regex( + r'jwplayer\("[^"]+"\)\.setup\((.+?)\);', webpage, 'JWPlayer data') + js_vars = { + 'w': 1024, + 'h': 768, + 'modeVar': 'html5', + } + for name, val in js_vars.items(): + js_val = '%d' % val if isinstance(val, int) else '"%s"' % val + jwplayer_data_str = jwplayer_data_str.replace(':%s,' % name, ':%s,' % js_val) + + info_dict = self._parse_jwplayer_data( + self._parse_json(jwplayer_data_str, video_id), + video_id, require_title=False, rtmp_params={'no_resume': True}) + + title = self._html_search_regex( + r']+class="embedTitle">([^<]+)', webpage, 'title') + description = self._html_search_regex( + r']+class="embedSubTitle">([^<]+)', webpage, + 'description', fatal=False) + duration = parse_duration(self._html_search_regex( + r']+class="embedDetails">([0-9:]+)', webpage, + 'duration', fatal=False)) + + info_dict.update({ + 'title': title, + 'description': description, + 'duration': duration, + }) + + return info_dict From 661d46b28f6de2772fc642c36b505a3c7b9a3b10 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 21 May 2016 13:40:45 +0800 Subject: [PATCH 20/67] [cbslocal] Add new extractor (closes #9522) --- youtube_dl/extractor/cbslocal.py | 84 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 85 insertions(+) create mode 100644 youtube_dl/extractor/cbslocal.py diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py new file mode 100644 index 000000000..74adb38a6 --- /dev/null +++ b/youtube_dl/extractor/cbslocal.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import calendar +import datetime + +from .anvato import AnvatoIE +from .sendtonews import SendtoNewsIE +from ..compat import compat_urlparse + + +class CBSLocalIE(AnvatoIE): + _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P[0-9a-z-]+)' + + _TESTS = [{ + # Anvato backend + 'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis', + 'md5': 'f0ee3081e3843f575fccef901199b212', + 'info_dict': { + 'id': '3401037', + 'ext': 'mp4', + 'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'', + 'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.', + 'thumbnail': 're:^https?://.*', + 'timestamp': 1463440500, + 'upload_date': '20160516', + 'subtitles': { + 'en': 'mincount:5', + }, + 'categories': [ + 'Stations\\Spoken Word\\KCBSTV', + 'Syndication\\MSN', + 'Syndication\\NDN', + 'Syndication\\AOL', + 'Syndication\\Yahoo', + 'Syndication\\Tribune', + 'Syndication\\Curb.tv', + 'Content\\News' + ], + }, + }, { + # SendtoNews embed + 'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/', + 'info_dict': { + 'id': 'GxfCe0Zo7D-175909-5588', + 'ext': 'mp4', + 'title': 'Recap: CLE 15, CIN 6', + 'description': '5/16/16: Indians\' bats explode for 15 runs in a win', + 'upload_date': '20160516', + 'timestamp': 1463433840, + 'duration': 49, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + sendtonews_url = SendtoNewsIE._extract_url(webpage) + if sendtonews_url: + info_dict = { + '_type': 'url_transparent', + 'url': compat_urlparse.urljoin(url, sendtonews_url), + } + else: + info_dict = self._extract_anvato_videos(webpage, display_id) + + time_str = self._html_search_regex( + r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) + timestamp = None + if time_str: + timestamp = calendar.timegm(datetime.datetime.strptime( + time_str, '%b %d, %Y %I:%M %p').timetuple()) + + info_dict.update({ + 'display_id': display_id, + 'timestamp': timestamp, + }) + + return info_dict diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8352b3c3a..c93cd2765 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -111,6 +111,7 @@ from .cbc import ( CBCPlayerIE, ) from .cbs import CBSIE +from .cbslocal import CBSLocalIE from .cbsinteractive import CBSInteractiveIE from .cbsnews import ( CBSNewsIE, From 115c65793af4c56c8f1986d2640105fc7e760c13 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 21 May 2016 13:50:38 +0800 Subject: [PATCH 21/67] [jwplatform] Don't fail with RTMP URLs without mp4:, mp3: or flv: --- youtube_dl/extractor/jwplatform.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 0aa6fc750..fa6f335e1 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -34,15 +34,18 @@ class JWPlatformBaseIE(InfoExtractor): 'height': int_or_none(source.get('height')), } if source_url.startswith('rtmp'): + a_format['ext'] = 'flv', + # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as # of jwplayer.flash.swf - rtmp_url, prefix, play_path = re.split( + rtmp_url_parts = re.split( r'((?:mp4|mp3|flv):)', source_url, 1) - a_format.update({ - 'url': rtmp_url, - 'ext': 'flv', - 'play_path': prefix + play_path, - }) + if len(rtmp_url_parts) == 3: + rtmp_url, prefix, play_path = rtmp_url_parts + a_format.update({ + 'url': rtmp_url, + 'play_path': prefix + play_path, + }) if rtmp_params: a_format.update(rtmp_params) formats.append(a_format) From 4c718d3c50b8d80bf07e44c73a5bdcd98544388f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 21 May 2016 17:37:35 +0200 Subject: [PATCH 22/67] [rtve] Recognize 'filmoteca' URLs --- youtube_dl/extractor/rtve.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index edd0d108e..f11e3588b 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -64,7 +64,7 @@ def _decrypt_url(png): class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' IE_DESC = 'RTVE a la carta' - _VALID_URL = r'https?://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P\d+)' + _VALID_URL = r'https?://www\.rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P\d+)' _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', @@ -87,6 +87,9 @@ class RTVEALaCartaIE(InfoExtractor): }, { 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', 'only_matching': True, + }, { + 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', + 'only_matching': True, }] def _real_initialize(self): From c8cc3745fbb34d39f4dfb0c3facb6fa9278af93c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 21 May 2016 21:18:59 +0200 Subject: [PATCH 23/67] release 2016.05.21 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 7 +++++++ youtube_dl/version.py | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7024fc729..00cc634e3 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.16 +[debug] youtube-dl version 2016.05.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 29db13883..cd6bfa51c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -16,6 +16,8 @@ - **9gag** - **abc.net.au** - **Abc7News** + - **abcnews** + - **abcnews:video** - **AcademicEarth:Course** - **acast** - **acast:channel** @@ -104,6 +106,7 @@ - **CBCPlayer** - **CBS** - **CBSInteractive** + - **CBSLocal** - **CBSNews**: CBS News - **CBSNewsLiveVideo**: CBS News Live Videos - **CBSSports** @@ -213,6 +216,7 @@ - **Flickr** - **Folketinget**: Folketinget (ft.dk; Danish parliament) - **FootyRoom** + - **Formula1** - **FOX** - **Foxgay** - **FoxNews**: Fox News and Fox Business Video @@ -316,6 +320,7 @@ - **la7.tv** - **Laola1Tv** - **Le**: 乐视网 + - **Learnr** - **Lecture2Go** - **Lemonde** - **LePlaylist** @@ -331,6 +336,7 @@ - **livestream** - **livestream:original** - **LnkGo** + - **LocalNews8** - **LoveHomePorn** - **lrt.lt** - **lynda**: lynda.com videos @@ -556,6 +562,7 @@ - **ScreenJunkies** - **ScreenwaveMedia** - **SenateISVP** + - **SendtoNews** - **ServingSys** - **Sexu** - **Shahid** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5a0fdd6ce..4bdb5f352 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.16' +__version__ = '2016.05.21' From 7e642e4fd68f9418ecdcb852aa34a4e49c41e58b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 21 May 2016 21:24:53 +0200 Subject: [PATCH 24/67] release: check for pandoc Abort releaseing if pandoc is missing. (pandoc was not included in my essential app database, and thus missing on my new machine.) --- devscripts/release.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/devscripts/release.sh b/devscripts/release.sh index 8dea55dbb..8b37152a7 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -33,6 +33,7 @@ if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: th useless_files=$(find youtube_dl -type f -not -name '*.py') if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi +if ! type pandoc 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi /bin/echo -e "\n### First of all, testing..." make clean From 598869afb139707c7064a6c8397bbcf09b2b43f5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 21 May 2016 21:27:00 +0200 Subject: [PATCH 25/67] release 2016.05.21.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 00cc634e3..7f8650553 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.21 +[debug] youtube-dl version 2016.05.21.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4bdb5f352..0a2e43d05 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.21' +__version__ = '2016.05.21.1' From d2fee3c99e9d1c8eba5bd55aa3a9dd5702b23b34 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 21 May 2016 21:46:42 +0200 Subject: [PATCH 26/67] release.sh: also check for python3 rsa module --- devscripts/release.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 8b37152a7..7dd391b38 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -33,7 +33,8 @@ if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: th useless_files=$(find youtube_dl -type f -not -name '*.py') if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi -if ! type pandoc 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi +if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi +if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi /bin/echo -e "\n### First of all, testing..." make clean From e03b35b8f998692aa853c6dbd498655fc831f9e7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 21 May 2016 21:47:39 +0200 Subject: [PATCH 27/67] release 2016.05.21.2 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7f8650553..2d80d45b6 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.2** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.21.1 +[debug] youtube-dl version 2016.05.21.2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0a2e43d05..522a56669 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.21.1' +__version__ = '2016.05.21.2' From 0db9a05f88cbbe6709da3875b798634dc536536b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 May 2016 02:15:56 +0600 Subject: [PATCH 28/67] [periscope:user] Adapt to layout changes (Closes #9563) --- youtube_dl/extractor/periscope.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 0a4bc761d..b2008decc 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import parse_iso8601 +from ..utils import ( + parse_iso8601, + unescapeHTML, +) class PeriscopeIE(InfoExtractor): @@ -92,6 +95,7 @@ class PeriscopeUserIE(InfoExtractor): 'info_dict': { 'id': 'LularoeHusbandMike', 'title': 'LULAROE HUSBAND MIKE', + 'description': 'md5:6cf4ec8047768098da58e446e82c82f0', }, # Periscope only shows videos in the last 24 hours, so it's possible to # get 0 videos @@ -103,16 +107,19 @@ class PeriscopeUserIE(InfoExtractor): webpage = self._download_webpage(url, user_id) - broadcast_data = self._parse_json(self._html_search_meta( - 'broadcast-data', webpage, default='{}'), user_id) - username = broadcast_data.get('user', {}).get('display_name') - user_broadcasts = self._parse_json( - self._html_search_meta('user-broadcasts', webpage, default='{}'), + data_store = self._parse_json( + unescapeHTML(self._search_regex( + r'data-store=(["\'])(?P.+?)\1', + webpage, 'data store', default='{}', group='data')), user_id) + user = data_store.get('User', {}).get('user', {}) + title = user.get('display_name') or user.get('username') + description = user.get('description') + entries = [ self.url_result( 'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id'])) - for broadcast in user_broadcasts.get('broadcasts', [])] + for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])] - return self.playlist_result(entries, user_id, username) + return self.playlist_result(entries, user_id, title, description) From 92d221ad4858a62143ce5645c56261b26023308e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 May 2016 02:39:15 +0600 Subject: [PATCH 29/67] [periscope] Update uploader_id (Closes #9565) --- youtube_dl/extractor/periscope.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index b2008decc..c23b314e7 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -45,8 +45,11 @@ class PeriscopeIE(InfoExtractor): broadcast = broadcast_data['broadcast'] status = broadcast['status'] - uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name') - uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id') + user = broadcast_data.get('user', {}) + + uploader = broadcast.get('user_display_name') or user.get('display_name') + uploader_id = (broadcast.get('username') or user.get('username') or + broadcast.get('user_id') or user.get('id')) title = '%s - %s' % (uploader, status) if uploader else status state = broadcast.get('state').lower() From c8831015f41879e0d8788c228acf52579e6cf12b Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Sat, 21 May 2016 18:51:34 +0200 Subject: [PATCH 30/67] [ComedyCentral] Add support for tosh.cc.com and cc.com/video-clips --- youtube_dl/extractor/comedycentral.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 0c59102e0..830073834 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -44,10 +44,10 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): # or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524 _VALID_URL = r'''(?x)^(:(?Ptds|thedailyshow) |https?://(:www\.)? - (?Pthedailyshow|thecolbertreport)\.(?:cc\.)?com/ + (?Pthedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/ ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P.*)| (?P - (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P[^/?#]+)) + (?:(?:guests/[^/]+|videos|video-clips|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P[^/?#]+)) |(the-colbert-report-(videos|collections)/(?P[0-9]+)/[^/]*/(?P.*?)) |(watch/(?P[^/]*)/(?P.*)) )| From 0150a00f333371b366ff10871458e0b071f20ee3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 May 2016 02:58:41 +0600 Subject: [PATCH 31/67] [cc] Add test for tosh.cc (Closes #9566) --- youtube_dl/extractor/comedycentral.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 830073834..2b6aaa3aa 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -47,7 +47,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): (?Pthedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/ ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P.*)| (?P - (?:(?:guests/[^/]+|videos|video-clips|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P[^/?#]+)) + (?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P[^/?#]+)) |(the-colbert-report-(videos|collections)/(?P[0-9]+)/[^/]*/(?P.*?)) |(watch/(?P[^/]*)/(?P.*)) )| @@ -129,6 +129,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): }, { 'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel', 'only_matching': True, + }, { + 'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans', + 'only_matching': True, }] _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] From 898f4b49ccc828f86a075d656aa9a1e1428e538c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 May 2016 06:47:22 +0600 Subject: [PATCH 32/67] [theplatform] Add _extract_urls --- youtube_dl/extractor/theplatform.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index a25417f94..02dbef913 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -151,6 +151,22 @@ class ThePlatformIE(ThePlatformBaseIE): 'only_matching': True, }] + @classmethod + def _extract_urls(cls, webpage): + m = re.search( + r'''(?x) + https?://player\.theplatform\.com/p/.+?)\2 + ''', webpage) + if m: + return [m.group('url')] + + matches = re.findall( + r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) + if matches: + return list(zip(*matches))[1] + @staticmethod def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False): flags = '10' if include_qs else '00' From 4d8819d2492e10f10bd09490f8f203d2f5e2cac4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 May 2016 06:52:39 +0600 Subject: [PATCH 33/67] [extractor/generic] Add support for theplatform embeds (Closes #8636, closes #9476) --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c368f08e1..ad6a40730 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -62,6 +62,7 @@ from .digiteka import DigitekaIE from .instagram import InstagramIE from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE +from .theplatform import ThePlatformIE class GenericIE(InfoExtractor): @@ -1499,6 +1500,11 @@ class GenericIE(InfoExtractor): if bc_urls: return _playlist_from_matches(bc_urls, ie='BrightcoveNew') + # Look for ThePlatform embeds + tp_urls = ThePlatformIE._extract_urls(webpage) + if tp_urls: + return _playlist_from_matches(tp_urls, ie='ThePlatform') + # Look for embedded rtl.nl player matches = re.findall( r']+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', From c6b9cf05e1dbd5e2534607fd3319ac73791d1c89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 May 2016 08:28:41 +0600 Subject: [PATCH 34/67] [utils] Do not fail on unknown date formats in unified_strdate --- youtube_dl/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 5301d0740..d65f5e833 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1055,7 +1055,10 @@ def unified_strdate(date_str, day_first=True): if upload_date is None: timetuple = email.utils.parsedate_tz(date_str) if timetuple: - upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') + try: + upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') + except ValueError: + pass if upload_date is not None: return compat_str(upload_date) From 21a19aa94d7a650d90ab258bd277a8648378c135 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 22 May 2016 08:59:28 +0600 Subject: [PATCH 35/67] [README.md] Clarify location for youtube-dl.exe --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a2febab2c..96cefb548 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget: sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl -Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29). +Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory (`%USERPROFILE%`, for example `C:\Users\\` or `C:\Documents and Settings\\`) or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\system32` (e.g. **do not** put in `C:\Windows\System32`). OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/). From 4f3a25c2b413977bf0ea6f7bd16d3d20259470bb Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 22 May 2016 09:00:08 +0600 Subject: [PATCH 36/67] [README.md] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 96cefb548..759d2bb01 100644 --- a/README.md +++ b/README.md @@ -433,7 +433,7 @@ You can use `--ignore-config` if you want to disable the configuration file for ### Authentication with `.netrc` file -You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a`.netrc` file in your `$HOME` and restrict permissions to read/write by you only: +You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by you only: ``` touch $HOME/.netrc chmod a-rwx,u+rw $HOME/.netrc From 9b06b0fb9297efe47a8de71142e926dda5031b65 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 22 May 2016 09:26:06 +0600 Subject: [PATCH 37/67] [README.md] Clarify updating on Windows --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 759d2bb01..649e78186 100644 --- a/README.md +++ b/README.md @@ -675,6 +675,8 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231 If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`). +Note that on Windows before running the update command in command prompt you should either `cd` to the directory where `youtube-dl.exe` is located or use the full path to `youtube-dl.exe` (e.g. `C:\Program Files (x86)\youtube-dl\youtube-dl.exe -U`). + If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update. If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum. From e5871c672b32d30fe4a943ad1375a7000829f03c Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 22 May 2016 09:36:07 +0600 Subject: [PATCH 38/67] [README.md] Clarify location for youtube-dl.exe even more %USERPROFILE% not in %PATH% by default. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 649e78186..185df5e76 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget: sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl -Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory (`%USERPROFILE%`, for example `C:\Users\\` or `C:\Documents and Settings\\`) or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\system32` (e.g. **do not** put in `C:\Windows\System32`). +Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`). OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/). From e9297256d405651428d5d52f0bb6b32ca66ea15a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 May 2016 10:06:45 +0600 Subject: [PATCH 39/67] [update] Fix youtube-dl.exe updating from arbitrary directory (Closes #2718) --- youtube_dl/update.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/update.py b/youtube_dl/update.py index 676ebe1c4..ebce9666a 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -83,11 +83,8 @@ def update_self(to_screen, verbose, opener): print_notes(to_screen, versions_info['versions']) - filename = sys.argv[0] - # Py2EXE: Filename could be different - if hasattr(sys, 'frozen') and not os.path.isfile(filename): - if os.path.isfile(filename + '.exe'): - filename += '.exe' + # sys.executable is set to the full pathname of the exe-file for py2exe + filename = sys.executable if hasattr(sys, 'frozen') else sys.argv[0] if not os.access(filename, os.W_OK): to_screen('ERROR: no write permissions on %s' % filename) @@ -95,7 +92,7 @@ def update_self(to_screen, verbose, opener): # Py2EXE if hasattr(sys, 'frozen'): - exe = os.path.abspath(filename) + exe = filename directory = os.path.dirname(exe) if not os.access(directory, os.W_OK): to_screen('ERROR: no write permissions on %s' % directory) From c776b99691e5fdec75cc7d5c268c260f23bd2ac7 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 22 May 2016 10:14:02 +0600 Subject: [PATCH 40/67] [README.md] Remove Windows updating trickery Windows updating fixed in e9297256d405651428d5d52f0bb6b32ca66ea15a. --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 185df5e76..00f42e056 100644 --- a/README.md +++ b/README.md @@ -675,8 +675,6 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231 If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`). -Note that on Windows before running the update command in command prompt you should either `cd` to the directory where `youtube-dl.exe` is located or use the full path to `youtube-dl.exe` (e.g. `C:\Program Files (x86)\youtube-dl\youtube-dl.exe -U`). - If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update. If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum. From 70346165fe9348b54e8d71fb40654d135af945f8 Mon Sep 17 00:00:00 2001 From: Thor77 Date: Sun, 22 May 2016 08:15:39 +0200 Subject: [PATCH 41/67] [bandcamp] raise ExtractorError when track not streamable (#9465) * [bandcamp] raise ExtractorError when track not streamable * [bandcamp] update md5 for second test * don't rely on json-data, but just check for 'file' * don't rely on presence of 'file' --- youtube_dl/extractor/bandcamp.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index c1ef8051d..991ab0676 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -29,7 +29,7 @@ class BandcampIE(InfoExtractor): '_skip': 'There is a limit of 200 free downloads / month for the test song' }, { 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', - 'md5': '2b68e5851514c20efdff2afc5603b8b4', + 'md5': '73d0b3171568232574e45652f8720b5c', 'info_dict': { 'id': '2650410135', 'ext': 'mp3', @@ -48,6 +48,10 @@ class BandcampIE(InfoExtractor): if m_trackinfo: json_code = m_trackinfo.group(1) data = json.loads(json_code)[0] + track_id = compat_str(data['id']) + + if not data.get('file'): + raise ExtractorError('Not streamable', video_id=track_id, expected=True) formats = [] for format_id, format_url in data['file'].items(): @@ -64,7 +68,7 @@ class BandcampIE(InfoExtractor): self._sort_formats(formats) return { - 'id': compat_str(data['id']), + 'id': track_id, 'title': data['title'], 'formats': formats, 'duration': float_or_none(data.get('duration')), From a4a7c44bd337cdda534ad879c516d5b33e25a893 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 22 May 2016 15:04:51 +0600 Subject: [PATCH 42/67] [README.md] Document solution for extremely slow start on Windows --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 00f42e056..ef0e265c8 100644 --- a/README.md +++ b/README.md @@ -693,6 +693,10 @@ hash -r Again, from then on you'll be able to update with `sudo youtube-dl -U`. +### youtube-dl is extremely slow to start on Windows + +Add a file exclusion for `youtube-dl.exe` in Windows Defender settings. + ### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos. From 4a12077855026a0ca9cf31868c13d2d029f7a723 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 22 May 2016 22:22:27 +0800 Subject: [PATCH 43/67] [genric] Eliminate duplicated video URLs (closes #6562) --- youtube_dl/extractor/generic.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ad6a40730..bb96e7231 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1194,6 +1194,16 @@ class GenericIE(InfoExtractor): 'uploader': 'Lake8737', } }, + # Duplicated embedded video URLs + { + 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443', + 'info_dict': { + 'id': '149298443_480_16c25b74_2', + 'ext': 'mp4', + 'title': 'vs. Blue Orange Spring Game', + 'uploader': 'www.hudl.com', + }, + }, ] def report_following_redirect(self, new_url): @@ -2111,7 +2121,7 @@ class GenericIE(InfoExtractor): raise UnsupportedError(url) entries = [] - for video_url in found: + for video_url in orderedSet(found): video_url = unescapeHTML(video_url) video_url = video_url.replace('\\/', '/') video_url = compat_urlparse.urljoin(url, video_url) From c5f51551006c9d9ad7263cb3d3d90d1c91c8c648 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 22 Apr 2016 09:36:14 +0100 Subject: [PATCH 44/67] [wat] extract all formats --- youtube_dl/extractor/wat.py | 129 +++++++++++++++++------------------- 1 file changed, 59 insertions(+), 70 deletions(-) diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 5227bb5ad..de7d6b559 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -2,25 +2,26 @@ from __future__ import unicode_literals import re -import hashlib from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, unified_strdate, + HEADRequest, + float_or_none, ) class WatIE(InfoExtractor): - _VALID_URL = r'(?:wat:(?P\d{8})|https?://www\.wat\.tv/video/(?P.*)-(?P.*?)_.*?\.html)' + _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P[0-9a-z]+)' IE_NAME = 'wat.tv' _TESTS = [ { 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html', - 'md5': 'ce70e9223945ed26a8056d413ca55dc9', + 'md5': '83d882d9de5c9d97f0bb2c6273cde56a', 'info_dict': { 'id': '11713067', - 'display_id': 'soupe-figues-l-orange-aux-epices', 'ext': 'mp4', 'title': 'Soupe de figues à l\'orange et aux épices', 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.', @@ -33,7 +34,6 @@ class WatIE(InfoExtractor): 'md5': 'fbc84e4378165278e743956d9c1bf16b', 'info_dict': { 'id': '11713075', - 'display_id': 'gregory-lemarchal-voix-ange', 'ext': 'mp4', 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)', 'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3', @@ -44,96 +44,85 @@ class WatIE(InfoExtractor): }, ] - def download_video_info(self, real_id): + def _real_extract(self, url): + video_id = self._match_id(url) + video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36)) + # 'contentv4' is used in the website, but it also returns the related # videos, we don't need them - info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id) - return info['media'] - - def _real_extract(self, url): - def real_id_for_chapter(chapter): - return chapter['tc_start'].split('-')[0] - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - real_id = mobj.group('real_id') - if not real_id: - short_id = mobj.group('short_id') - webpage = self._download_webpage(url, display_id or short_id) - real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id') - - video_info = self.download_video_info(real_id) + video_info = self._download_json( + 'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media'] error_desc = video_info.get('error_desc') if error_desc: raise ExtractorError( '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True) - geo_list = video_info.get('geoList') - country = geo_list[0] if geo_list else '' - chapters = video_info['chapters'] first_chapter = chapters[0] - files = video_info['files'] - first_file = files[0] - if real_id_for_chapter(first_chapter) != real_id: + def video_id_for_chapter(chapter): + return chapter['tc_start'].split('-')[0] + + if video_id_for_chapter(first_chapter) != video_id: self.to_screen('Multipart video detected') - chapter_urls = [] - for chapter in chapters: - chapter_id = real_id_for_chapter(chapter) - # Yes, when we this chapter is processed by WatIE, - # it will download the info again - chapter_info = self.download_video_info(chapter_id) - chapter_urls.append(chapter_info['url']) - entries = [self.url_result(chapter_url) for chapter_url in chapter_urls] - return self.playlist_result(entries, real_id, video_info['title']) - - upload_date = None - if 'date_diffusion' in first_chapter: - upload_date = unified_strdate(first_chapter['date_diffusion']) + entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters] + return self.playlist_result(entries, video_id, video_info['title']) # Otherwise we can continue and extract just one part, we have to use - # the short id for getting the video url + # the video id for getting the video url - formats = [{ - 'url': 'http://wat.tv/get/android5/%s.mp4' % real_id, - 'format_id': 'Mobile', - }] + date_diffusion = first_chapter.get('date_diffusion') + upload_date = unified_strdate(date_diffusion) if date_diffusion else None - fmts = [('SD', 'web')] - if first_file.get('hasHD'): - fmts.append(('HD', 'webhd')) + def extract_url(path_template, url_type): + req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id) + head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type) + red_url = head.geturl() + if req_url == red_url: + raise ExtractorError( + '%s said: Sorry, this video is not available from your country.' % self.IE_NAME, + expected=True) + return red_url - def compute_token(param): - timestamp = '%08x' % int(self._download_webpage( - 'http://www.wat.tv/servertime', real_id, - 'Downloading server time').split('|')[0]) - magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564' - return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp) + m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8') + http_url = extract_url('android5/%s.mp4', 'http') - for fmt in fmts: - webid = '/%s/%s' % (fmt[1], real_id) - video_url = self._download_webpage( - 'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country), - real_id, - 'Downloading %s video URL' % fmt[0], - 'Failed to download %s video URL' % fmt[0], - False) - if not video_url: + formats = [] + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') + formats.extend(m3u8_formats) + formats.extend(self._extract_f4m_formats( + m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'), + video_id, f4m_id='hds', fatal=False)) + for m3u8_format in m3u8_formats: + mobj = re.search( + r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url']) + if not mobj: continue - formats.append({ - 'url': video_url, - 'ext': 'mp4', - 'format_id': fmt[0], + abr, vbr = mobj.groups() + abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000) + m3u8_format.update({ + 'vbr': vbr, + 'abr': abr, }) + if not vbr or not abr: + continue + f = m3u8_format.copy() + f.update({ + 'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url), + 'format_id': f['format_id'].replace('hls', 'http'), + 'protocol': 'http', + }) + formats.append(f) + self._sort_formats(formats) return { - 'id': real_id, - 'display_id': display_id, + 'id': video_id, 'title': first_chapter['title'], 'thumbnail': first_chapter['preview'], 'description': first_chapter['description'], 'view_count': video_info['views'], 'upload_date': upload_date, - 'duration': first_file['duration'], + 'duration': video_info['files'][0]['duration'], 'formats': formats, } From db3b8b2103099a8859402f2167d7ad1a8fa66829 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 22 May 2016 16:54:41 +0100 Subject: [PATCH 45/67] [tf1] add support for more related web sites --- youtube_dl/extractor/tf1.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 3f54b2744..aff5121b9 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class TF1IE(InfoExtractor): """TF1 uses the wat.tv player.""" - _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P.+?)\.html' + _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|(?:www\.)?(?:tfou|ushuaiatv|histoire|tvbreizh))\.fr/(?:[^/]+/)*(?P[^/?#.]+)' _TESTS = [{ 'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', 'info_dict': { @@ -48,6 +48,6 @@ class TF1IE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) wat_id = self._html_search_regex( - r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})(?:#.*?)?\1', + r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})(?:.*?)?\1', webpage, 'wat id', group='id') return self.url_result('wat:%s' % wat_id, 'Wat') From eb7941e3e6e92bac26f5d21525fc8ac89c934abe Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 23 May 2016 01:34:08 +0800 Subject: [PATCH 46/67] [compat] Fix for XML with in Python 2.7 and 3.2 Such XML documents cause DeprecationWarning if python is run with `-W error` --- test/test_compat.py | 6 ++++++ youtube_dl/compat.py | 17 ++++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index 539b30540..f5317ac3e 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -103,6 +103,12 @@ class TestCompat(unittest.TestCase): self.assertTrue(isinstance(doc.find('chinese').text, compat_str)) self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str)) + def test_compat_etree_fromstring_doctype(self): + xml = ''' + +''' + compat_etree_fromstring(xml) + def test_struct_unpack(self): self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,)) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 1392361a1..06e5f3ff6 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -245,13 +245,20 @@ try: except ImportError: # Python 2.6 from xml.parsers.expat import ExpatError as compat_xml_parse_error + +etree = xml.etree.ElementTree + + +class _TreeBuilder(etree.TreeBuilder): + def doctype(self, name, pubid, system): + pass + if sys.version_info[0] >= 3: - compat_etree_fromstring = xml.etree.ElementTree.fromstring + def compat_etree_fromstring(text): + return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) else: # python 2.x tries to encode unicode strings with ascii (see the # XMLParser._fixtext method) - etree = xml.etree.ElementTree - try: _etree_iter = etree.Element.iter except AttributeError: # Python <=2.6 @@ -265,7 +272,7 @@ else: # 2.7 source def _XML(text, parser=None): if not parser: - parser = etree.XMLParser(target=etree.TreeBuilder()) + parser = etree.XMLParser(target=_TreeBuilder()) parser.feed(text) return parser.close() @@ -277,7 +284,7 @@ else: return el def compat_etree_fromstring(text): - doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) + doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory))) for el in _etree_iter(doc): if el.text is not None and isinstance(el.text, bytes): el.text = el.text.decode('utf-8') From 7a46542f97c99e47ad86707bf21628630c8d871e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 23 May 2016 01:38:00 +0800 Subject: [PATCH 47/67] [livestream] Video IDs should always be strings (#2234) --- youtube_dl/extractor/livestream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index eada7c299..0edc06c43 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -150,7 +150,7 @@ class LivestreamIE(InfoExtractor): } def _extract_stream_info(self, stream_info): - broadcast_id = stream_info['broadcast_id'] + broadcast_id = compat_str(stream_info['broadcast_id']) is_live = stream_info.get('is_live') formats = [] From 78d3b3e2137f6be75b64e9bbfdec88cb420a91d1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 23 May 2016 01:39:09 +0800 Subject: [PATCH 48/67] [generic] Improve Livestream detection (closes #2234) --- youtube_dl/extractor/generic.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index bb96e7231..303e112d2 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -784,6 +784,19 @@ class GenericIE(InfoExtractor): 'title': 'Rosetta #CometLanding webcast HL 10', } }, + # Another Livestream embed, without 'new.' in URL + { + 'url': 'https://www.freespeech.org/', + 'info_dict': { + 'id': '123537347', + 'ext': 'mp4', + 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + # Live stream + 'skip_download': True, + }, + }, # LazyYT { 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', @@ -1878,7 +1891,7 @@ class GenericIE(InfoExtractor): return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast') mobj = re.search( - r']+src="(?Phttps?://new\.livestream\.com/[^"]+/player[^"]+)"', + r']+src="(?Phttps?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"', webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'Livestream') From 102810ef0402834bd5d43e70a5e397f2a581a5dc Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 22 May 2016 20:36:23 +0100 Subject: [PATCH 49/67] [voxmedia] fix volume embed extraction --- youtube_dl/extractor/voxmedia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index 0c6b1f030..9d73600aa 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -117,7 +117,7 @@ class VoxMediaIE(InfoExtractor): volume_webpage = self._download_webpage( 'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid) video_data = self._parse_json(self._search_regex( - r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid) + r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid) for provider_video_type in ('ooyala', 'youtube'): provider_video_id = video_data.get('%s_id' % provider_video_type) if provider_video_id: From e54373204ab6c5be36823695a571680d9a641ba0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 23 May 2016 03:44:04 +0600 Subject: [PATCH 50/67] [lifenews] Fix metadata extraction --- youtube_dl/extractor/lifenews.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index ba2f80a75..4b1fb9772 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -7,10 +7,10 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( determine_ext, - int_or_none, - remove_end, - unified_strdate, ExtractorError, + int_or_none, + parse_iso8601, + remove_end, ) @@ -28,7 +28,9 @@ class LifeNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'Мужчина нашел дома архив оборонного завода', 'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26', + 'timestamp': 1344154740, 'upload_date': '20120805', + 'view_count': int, } }, { # single video embedded via iframe @@ -39,7 +41,9 @@ class LifeNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ', 'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ', + 'timestamp': 1427961840, 'upload_date': '20150402', + 'view_count': int, } }, { # two videos embedded via iframe @@ -48,7 +52,8 @@ class LifeNewsIE(InfoExtractor): 'id': '153461', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве', 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.', - 'upload_date': '20150505', + 'timestamp': 1430825520, + 'view_count': int, }, 'playlist': [{ 'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795', @@ -57,6 +62,7 @@ class LifeNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)', 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.', + 'timestamp': 1430825520, 'upload_date': '20150505', }, }, { @@ -66,6 +72,7 @@ class LifeNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)', 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.', + 'timestamp': 1430825520, 'upload_date': '20150505', }, }], @@ -100,21 +107,17 @@ class LifeNewsIE(InfoExtractor): description = self._og_search_description(webpage) view_count = self._html_search_regex( - r'
\s*(\d+)\s*
', webpage, 'view count', fatal=False) - comment_count = self._html_search_regex( - r'=\'commentCount\'[^>]*>\s*(\d+)\s*<', - webpage, 'comment count', fatal=False) + r']+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P\d+)\s*', + webpage, 'view count', fatal=False, group='value') - upload_date = self._html_search_regex( - r']*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False) - if upload_date is not None: - upload_date = unified_strdate(upload_date) + timestamp = parse_iso8601(self._search_regex( + r']+datetime=(["\'])(?P.+?)\1', + webpage, 'upload date', fatal=False, group='value')) common_info = { 'description': description, 'view_count': int_or_none(view_count), - 'comment_count': int_or_none(comment_count), - 'upload_date': upload_date, + 'timestamp': timestamp, } def make_entry(video_id, video_url, index=None): From 5181759c0d488f9fc30175f6aff4b8d4a236352d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 23 May 2016 04:00:08 +0600 Subject: [PATCH 51/67] [life] Update _VALID_URL --- youtube_dl/extractor/lifenews.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 4b1fb9772..d5d528a36 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -15,13 +15,13 @@ from ..utils import ( class LifeNewsIE(InfoExtractor): - IE_NAME = 'lifenews' - IE_DESC = 'LIFE | NEWS' - _VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P
news|video)/(?P\d+)' + IE_NAME = 'life' + IE_DESC = 'Life.ru' + _VALID_URL = r'https?://life\.ru/t/[^/]+/(?P\d+)' _TESTS = [{ # single video embedded via video/source - 'url': 'http://lifenews.ru/news/98736', + 'url': 'https://life.ru/t/новости/98736', 'md5': '77c95eaefaca216e32a76a343ad89d23', 'info_dict': { 'id': '98736', @@ -34,7 +34,7 @@ class LifeNewsIE(InfoExtractor): } }, { # single video embedded via iframe - 'url': 'http://lifenews.ru/news/152125', + 'url': 'https://life.ru/t/новости/152125', 'md5': '77d19a6f0886cd76bdbf44b4d971a273', 'info_dict': { 'id': '152125', @@ -47,7 +47,7 @@ class LifeNewsIE(InfoExtractor): } }, { # two videos embedded via iframe - 'url': 'http://lifenews.ru/news/153461', + 'url': 'https://life.ru/t/новости/153461', 'info_dict': { 'id': '153461', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве', @@ -77,18 +77,20 @@ class LifeNewsIE(InfoExtractor): }, }], }, { - 'url': 'http://lifenews.ru/video/13035', + 'url': 'https://life.ru/t/новости/213035', + 'only_matching': True, + }, { + 'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461', + 'only_matching': True, + }, { + 'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil', 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - section = mobj.group('section') + video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://lifenews.ru/%s/%s' % (section, video_id), - video_id, 'Downloading page') + webpage = self._download_webpage(url, video_id) video_urls = re.findall( r']+>]+src=["\'](.+?)["\']', webpage) @@ -102,7 +104,7 @@ class LifeNewsIE(InfoExtractor): title = remove_end( self._og_search_title(webpage), - ' - Первый по срочным новостям — LIFE | NEWS') + ' - Life.ru') description = self._og_search_description(webpage) From 5db9df622fb45ba6fbb57ef4a2ad5f2da0236a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 23 May 2016 04:22:09 +0600 Subject: [PATCH 52/67] [life:embed] Use native hls --- youtube_dl/extractor/lifenews.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index d5d528a36..c2b4490c4 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -188,7 +188,8 @@ class LifeEmbedIE(InfoExtractor): ext = determine_ext(video_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id='m3u8')) + video_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='m3u8')) else: formats.append({ 'url': video_url, From 4b464a6a78749dfdc7c71fa932146403f18f6cb5 Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 23 May 2016 00:47:22 +0100 Subject: [PATCH 53/67] [washingtonpost] improve format extraction and add support for video pages extraction --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/washingtonpost.py | 148 +++++++++++++++++-------- 2 files changed, 103 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c93cd2765..d0346714c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -941,7 +941,10 @@ from .vube import VubeIE from .vuclip import VuClipIE from .vulture import VultureIE from .walla import WallaIE -from .washingtonpost import WashingtonPostIE +from .washingtonpost import ( + WashingtonPostIE, + WashingtonPostArticleIE, +) from .wat import WatIE from .watchindianporn import WatchIndianPornIE from .wdr import ( diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dl/extractor/washingtonpost.py index ec8b99998..71349d487 100644 --- a/youtube_dl/extractor/washingtonpost.py +++ b/youtube_dl/extractor/washingtonpost.py @@ -11,7 +11,100 @@ from ..utils import ( class WashingtonPostIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P[^/]+)/(?:$|[?#])' + IE_NAME = 'washingtonpost' + _VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _TEST = { + 'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d', + 'md5': '6f537e1334b714eb15f9563bd4b9cdfa', + 'info_dict': { + 'id': '480ba4ee-1ec7-11e6-82c2-a7dcb313287d', + 'ext': 'mp4', + 'title': 'Egypt finds belongings, debris from plane crash', + 'description': 'md5:a17ceee432f215a5371388c1f680bd86', + 'upload_date': '20160520', + 'uploader': 'Reuters', + 'timestamp': 1463778452, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + 'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % video_id, + video_id, transform_source=strip_jsonp)[0]['contentConfig'] + title = video_data['title'] + + urls = [] + formats = [] + for s in video_data.get('streams', []): + s_url = s.get('url') + if not s_url or s_url in urls: + continue + urls.append(s_url) + video_type = s.get('type') + if video_type == 'smil': + continue + elif video_type in ('ts', 'hls'): + m3u8_formats = self._extract_m3u8_formats( + s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + for m3u8_format in m3u8_formats: + width = m3u8_format.get('width') + if not width: + continue + vbr = self._search_regex( + r'%d_%d_(\d+)' % (width, m3u8_format['height']), m3u8_format['url'], 'vbr', default=None) + if vbr: + m3u8_format.update({ + 'vbr': int_or_none(vbr), + }) + formats.extend(m3u8_formats) + else: + width = int_or_none(s.get('width')) + vbr = int_or_none(s.get('bitrate')) + has_width = width != 0 + formats.append({ + 'format_id': ( + '%s-%d-%d' % (video_type, width, vbr) + if width + else video_type), + 'vbr': vbr if has_width else None, + 'width': width, + 'height': int_or_none(s.get('height')), + 'acodec': s.get('audioCodec'), + 'vcodec': s.get('videoCodec') if has_width else 'none', + 'filesize': int_or_none(s.get('fileSize')), + 'url': s_url, + 'ext': 'mp4', + 'protocol': { + 'mp4': 'http', + 'ts': 'm3u8_native', + 'hls': 'm3u8_native', + }.get(s.get('type')), + }) + source_media_url = video_data.get('sourceMediaURL') + if source_media_url: + formats.append({ + 'format_id': 'source_media', + 'url': source_media_url, + }) + self._sort_formats( + formats, ('width', 'height', 'vbr', 'filesize', 'tbr', 'format_id')) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('blurb'), + 'uploader': video_data.get('credits', {}).get('source'), + 'formats': formats, + 'duration': int_or_none(video_data.get('videoDuration'), 100), + 'timestamp': int_or_none( + video_data.get('dateConfig', {}).get('dateFirstPublished'), 1000), + } + + +class WashingtonPostArticleIE(InfoExtractor): + IE_NAME = 'washingtonpost:article' + _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/(?:[^/]+/)*(?P[^/?#]+)' _TESTS = [{ 'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/', 'info_dict': { @@ -63,6 +156,10 @@ class WashingtonPostIE(InfoExtractor): }] }] + @classmethod + def suitable(cls, url): + return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url) + def _real_extract(self, url): page_id = self._match_id(url) webpage = self._download_webpage(url, page_id) @@ -74,54 +171,7 @@ class WashingtonPostIE(InfoExtractor): ]*?data-uuid=| data-video-uuid= )"([^"]+)"''', webpage) - entries = [] - for i, uuid in enumerate(uuids, start=1): - vinfo_all = self._download_json( - 'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid, - page_id, - transform_source=strip_jsonp, - note='Downloading information of video %d/%d' % (i, len(uuids)) - ) - vinfo = vinfo_all[0]['contentConfig'] - uploader = vinfo.get('credits', {}).get('source') - timestamp = int_or_none( - vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000) - - formats = [{ - 'format_id': ( - '%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate')) - if s.get('width') - else s.get('type')), - 'vbr': s.get('bitrate') if s.get('width') != 0 else None, - 'width': s.get('width'), - 'height': s.get('height'), - 'acodec': s.get('audioCodec'), - 'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none', - 'filesize': s.get('fileSize'), - 'url': s.get('url'), - 'ext': 'mp4', - 'preference': -100 if s.get('type') == 'smil' else None, - 'protocol': { - 'MP4': 'http', - 'F4F': 'f4m', - }.get(s.get('type')), - } for s in vinfo.get('streams', [])] - source_media_url = vinfo.get('sourceMediaURL') - if source_media_url: - formats.append({ - 'format_id': 'source_media', - 'url': source_media_url, - }) - self._sort_formats(formats) - entries.append({ - 'id': uuid, - 'title': vinfo['title'], - 'description': vinfo.get('blurb'), - 'uploader': uploader, - 'formats': formats, - 'duration': int_or_none(vinfo.get('videoDuration'), 100), - 'timestamp': timestamp, - }) + entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids] return { '_type': 'playlist', From 0c50eeb9870ec7d940c35c9cec52bfd35d009420 Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 23 May 2016 02:27:31 +0100 Subject: [PATCH 54/67] [reuters] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/reuters.py | 69 ++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 youtube_dl/extractor/reuters.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d0346714c..d8b3170ba 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -630,6 +630,7 @@ from .rds import RDSIE from .redtube import RedTubeIE from .regiotv import RegioTVIE from .restudy import RestudyIE +from .reuters import ReutersIE from .reverbnation import ReverbNationIE from .revision3 import Revision3IE from .rice import RICEIE diff --git a/youtube_dl/extractor/reuters.py b/youtube_dl/extractor/reuters.py new file mode 100644 index 000000000..961d504eb --- /dev/null +++ b/youtube_dl/extractor/reuters.py @@ -0,0 +1,69 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + js_to_json, + int_or_none, + unescapeHTML, +) + + +class ReutersIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P[0-9]+)' + _TEST = { + 'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562', + 'md5': '8015113643a0b12838f160b0b81cc2ee', + 'info_dict': { + 'id': '368575562', + 'ext': 'mp4', + 'title': 'San Francisco police chief resigns', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id) + video_data = js_to_json(self._search_regex( + r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);', + webpage, 'video data')) + + def get_json_value(key, fatal=False): + return self._search_regex('"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal) + + title = unescapeHTML(get_json_value('title', fatal=True)) + mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups() + + mas_data = self._download_json( + 'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid), + video_id, transform_source=js_to_json) + formats = [] + for f in mas_data: + f_url = f.get('url') + if not f_url: + continue + method = f.get('method') + if method == 'hls': + formats.extend(self._extract_m3u8_formats( + f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + container = f.get('container') + ext = '3gp' if method == 'mobile' else container + formats.append({ + 'format_id': ext, + 'url': f_url, + 'ext': ext, + 'container': container if method != 'mobile' else None, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': get_json_value('thumb'), + 'duration': int_or_none(get_json_value('seconds')), + 'formats': formats, + } From b1e9ebd08087c7e591b55451551d51120b7eec9d Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 23 May 2016 02:30:12 +0100 Subject: [PATCH 55/67] [washingtonpost] remove unnecessary code --- youtube_dl/extractor/washingtonpost.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dl/extractor/washingtonpost.py index 71349d487..c2c5bae05 100644 --- a/youtube_dl/extractor/washingtonpost.py +++ b/youtube_dl/extractor/washingtonpost.py @@ -74,12 +74,6 @@ class WashingtonPostIE(InfoExtractor): 'vcodec': s.get('videoCodec') if has_width else 'none', 'filesize': int_or_none(s.get('fileSize')), 'url': s_url, - 'ext': 'mp4', - 'protocol': { - 'mp4': 'http', - 'ts': 'm3u8_native', - 'hls': 'm3u8_native', - }.get(s.get('type')), }) source_media_url = video_data.get('sourceMediaURL') if source_media_url: From 42a7439717610530b0f7c630ef0eecf1b0638475 Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 23 May 2016 09:30:26 +0100 Subject: [PATCH 56/67] [cbs] allow to pass content id to the extractor(closes #9589) --- youtube_dl/extractor/cbs.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 051d783a2..ac2c7dced 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .theplatform import ThePlatformIE from ..utils import ( xpath_text, @@ -21,7 +23,7 @@ class CBSBaseIE(ThePlatformIE): class CBSIE(CBSBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P[^/]+)' + _VALID_URL = r'(?:cbs:(?P\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P[^/]+))' _TESTS = [{ 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', @@ -66,11 +68,12 @@ class CBSIE(CBSBaseIE): TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - content_id = self._search_regex( - [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"], - webpage, 'content id') + content_id, display_id = re.match(self._VALID_URL, url).groups() + if not content_id: + webpage = self._download_webpage(url, display_id) + content_id = self._search_regex( + [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"], + webpage, 'content id') items_data = self._download_xml( 'http://can.cbs.com/thunder/player/videoPlayerService.php', content_id, query={'partner': 'cbs', 'contentId': content_id}) From 05b651e3a58081492eb35d896c80dd1bdb87081c Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 23 May 2016 13:04:50 +0100 Subject: [PATCH 57/67] [washingtonpost] reduce requests for m3u8 manifests --- youtube_dl/extractor/washingtonpost.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dl/extractor/washingtonpost.py index c2c5bae05..839cad986 100644 --- a/youtube_dl/extractor/washingtonpost.py +++ b/youtube_dl/extractor/washingtonpost.py @@ -44,7 +44,7 @@ class WashingtonPostIE(InfoExtractor): video_type = s.get('type') if video_type == 'smil': continue - elif video_type in ('ts', 'hls'): + elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url): m3u8_formats = self._extract_m3u8_formats( s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) for m3u8_format in m3u8_formats: @@ -74,6 +74,8 @@ class WashingtonPostIE(InfoExtractor): 'vcodec': s.get('videoCodec') if has_width else 'none', 'filesize': int_or_none(s.get('fileSize')), 'url': s_url, + 'ext': 'mp4', + 'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None, }) source_media_url = video_data.get('sourceMediaURL') if source_media_url: From e8593f346a4b1236d2a023eb3070610bf180459c Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 23 May 2016 23:58:16 +0100 Subject: [PATCH 58/67] [ooyala] extract subtitles --- youtube_dl/extractor/ooyala.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 95e982897..4c119071d 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -22,13 +22,7 @@ class OoyalaBaseIE(InfoExtractor): metadata = content_tree[list(content_tree)[0]] embed_code = metadata['embed_code'] pcode = metadata.get('asset_pcode') or embed_code - video_info = { - 'id': embed_code, - 'title': metadata['title'], - 'description': metadata.get('description'), - 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), - 'duration': float_or_none(metadata.get('duration'), 1000), - } + title = metadata['title'] urls = [] formats = [] @@ -78,8 +72,24 @@ class OoyalaBaseIE(InfoExtractor): self.IE_NAME, cur_auth_data['message']), expected=True) self._sort_formats(formats) - video_info['formats'] = formats - return video_info + subtitles = {} + for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items(): + sub_url = sub.get('url') + if not sub_url: + continue + subtitles[lang] = [{ + 'url': sub_url, + }] + + return { + 'id': embed_code, + 'title': title, + 'description': metadata.get('description'), + 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), + 'duration': float_or_none(metadata.get('duration'), 1000), + 'subtitles': subtitles, + 'formats': formats, + } class OoyalaIE(OoyalaBaseIE): From a4760d204fe4cd7592bdfc91cbf550eb985374ac Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 24 May 2016 00:22:29 +0100 Subject: [PATCH 59/67] [ooyala] use api v2 to reduce requests for format extraction --- youtube_dl/extractor/ooyala.py | 88 +++++++++++++++++----------------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 4c119071d..09bc291f0 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -15,7 +15,7 @@ from ..compat import compat_urllib_parse_urlencode class OoyalaBaseIE(InfoExtractor): _PLAYER_BASE = 'http://player.ooyala.com/' _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' - _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v1/authorization/embed_code/%s/%s?' + _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?' def _extract(self, content_tree_url, video_id, domain='example.org'): content_tree = self._download_json(content_tree_url, video_id)['content_tree'] @@ -24,52 +24,50 @@ class OoyalaBaseIE(InfoExtractor): pcode = metadata.get('asset_pcode') or embed_code title = metadata['title'] + auth_data = self._download_json( + self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + + compat_urllib_parse_urlencode({ + 'domain': domain, + 'supportedFormats': 'mp4,rtmp,m3u8,hds', + }), video_id) + + cur_auth_data = auth_data['authorization_data'][embed_code] + urls = [] formats = [] - for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'): - auth_data = self._download_json( - self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + - compat_urllib_parse_urlencode({ - 'domain': domain, - 'supportedFormats': supported_format - }), - video_id, 'Downloading %s JSON' % supported_format) - - cur_auth_data = auth_data['authorization_data'][embed_code] - - if cur_auth_data['authorized']: - for stream in cur_auth_data['streams']: - url = base64.b64decode( - stream['url']['data'].encode('ascii')).decode('utf-8') - if url in urls: - continue - urls.append(url) - delivery_type = stream['delivery_type'] - if delivery_type == 'hls' or '.m3u8' in url: - formats.extend(self._extract_m3u8_formats( - url, embed_code, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif delivery_type == 'hds' or '.f4m' in url: - formats.extend(self._extract_f4m_formats( - url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) - elif '.smil' in url: - formats.extend(self._extract_smil_formats( - url, embed_code, fatal=False)) - else: - formats.append({ - 'url': url, - 'ext': stream.get('delivery_type'), - 'vcodec': stream.get('video_codec'), - 'format_id': delivery_type, - 'width': int_or_none(stream.get('width')), - 'height': int_or_none(stream.get('height')), - 'abr': int_or_none(stream.get('audio_bitrate')), - 'vbr': int_or_none(stream.get('video_bitrate')), - 'fps': float_or_none(stream.get('framerate')), - }) - else: - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, cur_auth_data['message']), expected=True) + if cur_auth_data['authorized']: + for stream in cur_auth_data['streams']: + url = base64.b64decode( + stream['url']['data'].encode('ascii')).decode('utf-8') + if url in urls: + continue + urls.append(url) + delivery_type = stream['delivery_type'] + if delivery_type == 'hls' or '.m3u8' in url: + formats.extend(self._extract_m3u8_formats( + url, embed_code, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif delivery_type == 'hds' or '.f4m' in url: + formats.extend(self._extract_f4m_formats( + url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) + elif '.smil' in url: + formats.extend(self._extract_smil_formats( + url, embed_code, fatal=False)) + else: + formats.append({ + 'url': url, + 'ext': stream.get('delivery_type'), + 'vcodec': stream.get('video_codec'), + 'format_id': delivery_type, + 'width': int_or_none(stream.get('width')), + 'height': int_or_none(stream.get('height')), + 'abr': int_or_none(stream.get('audio_bitrate')), + 'vbr': int_or_none(stream.get('video_bitrate')), + 'fps': float_or_none(stream.get('framerate')), + }) + else: + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, cur_auth_data['message']), expected=True) self._sort_formats(formats) subtitles = {} From 277c7465f58e0ac50de0dd9ebc2083f6142e9a94 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 24 May 2016 11:24:29 +0100 Subject: [PATCH 60/67] [ooyala] check manifest ext with determine_ext and update tests for related extractors --- youtube_dl/extractor/byutv.py | 5 ++--- youtube_dl/extractor/espn.py | 12 ++++-------- youtube_dl/extractor/formula1.py | 3 ++- youtube_dl/extractor/groupon.py | 7 +++---- youtube_dl/extractor/howcast.py | 7 ++----- youtube_dl/extractor/ooyala.py | 24 +++++++++++++----------- youtube_dl/extractor/teachingchannel.py | 6 ++---- youtube_dl/extractor/veoh.py | 1 + youtube_dl/extractor/vice.py | 3 +++ youtube_dl/extractor/voxmedia.py | 12 ++++++++---- 10 files changed, 40 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index dda98059e..54eb57b46 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P[^/?#]+)' _TEST = { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', + 'md5': '05850eb8c749e2ee05ad5a1c34668493', 'info_dict': { 'id': 'studio-c-season-5-episode-5', 'ext': 'mp4', @@ -19,9 +20,7 @@ class BYUtvIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 1486.486, }, - 'params': { - 'skip_download': True, - } + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index db4b263bc..e3575aed1 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -8,28 +8,24 @@ class ESPNIE(InfoExtractor): _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P[^/]+)' _TESTS = [{ 'url': 'http://espn.go.com/video/clip?id=10365079', + 'md5': '60e5d097a523e767d06479335d1bdc58', 'info_dict': { 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', 'ext': 'mp4', 'title': '30 for 30 Shorts: Judging Jewell', 'description': None, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'add_ie': ['OoyalaExternal'], }, { # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season 'url': 'http://espn.go.com/video/clip?id=2743663', + 'md5': 'f4ac89b59afc7e2d7dbb049523df6768', 'info_dict': { 'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg', 'ext': 'mp4', 'title': 'Must-See Moments: Best of the MLS season', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'add_ie': ['OoyalaExternal'], }, { 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', 'only_matching': True, diff --git a/youtube_dl/extractor/formula1.py b/youtube_dl/extractor/formula1.py index 726393fcc..322c41e5a 100644 --- a/youtube_dl/extractor/formula1.py +++ b/youtube_dl/extractor/formula1.py @@ -13,7 +13,8 @@ class Formula1IE(InfoExtractor): 'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV', 'ext': 'flv', 'title': 'Race highlights - Spain 2016', - } + }, + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/groupon.py b/youtube_dl/extractor/groupon.py index 1dd0a81cc..7bbb669c7 100644 --- a/youtube_dl/extractor/groupon.py +++ b/youtube_dl/extractor/groupon.py @@ -14,6 +14,7 @@ class GrouponIE(InfoExtractor): 'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors', }, 'playlist': [{ + 'md5': '42428ce8a00585f9bc36e49226eae7a1', 'info_dict': { 'id': 'fk6OhWpXgIQ', 'ext': 'mp4', @@ -24,10 +25,8 @@ class GrouponIE(InfoExtractor): 'uploader_id': 'groupon', 'uploader': 'Groupon', }, - }], - 'params': { - 'skip_download': True, - } + 'add_ie': ['Youtube'], + }] } _PROVIDERS = { diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py index e8f51e545..92caeb8f9 100644 --- a/youtube_dl/extractor/howcast.py +++ b/youtube_dl/extractor/howcast.py @@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P\d+)' _TEST = { 'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly', - 'md5': '8b743df908c42f60cf6496586c7f12c3', + 'md5': '7d45932269a288149483144f01b99789', 'info_dict': { 'id': '390161', 'ext': 'mp4', @@ -18,10 +18,7 @@ class HowcastIE(InfoExtractor): 'upload_date': '20100609', 'duration': 56.823, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 09bc291f0..2038a6ba5 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -8,6 +8,7 @@ from ..utils import ( float_or_none, ExtractorError, unsmuggle_url, + determine_ext, ) from ..compat import compat_urllib_parse_urlencode @@ -37,26 +38,27 @@ class OoyalaBaseIE(InfoExtractor): formats = [] if cur_auth_data['authorized']: for stream in cur_auth_data['streams']: - url = base64.b64decode( + s_url = base64.b64decode( stream['url']['data'].encode('ascii')).decode('utf-8') - if url in urls: + if s_url in urls: continue - urls.append(url) + urls.append(s_url) + ext = determine_ext(s_url, None) delivery_type = stream['delivery_type'] - if delivery_type == 'hls' or '.m3u8' in url: + if delivery_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - url, embed_code, 'mp4', 'm3u8_native', + s_url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - elif delivery_type == 'hds' or '.f4m' in url: + elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( - url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) - elif '.smil' in url: + s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) + elif ext == 'smil': formats.extend(self._extract_smil_formats( - url, embed_code, fatal=False)) + s_url, embed_code, fatal=False)) else: formats.append({ - 'url': url, - 'ext': stream.get('delivery_type'), + 'url': s_url, + 'ext': ext or stream.get('delivery_type'), 'vcodec': stream.get('video_codec'), 'format_id': delivery_type, 'width': int_or_none(stream.get('width')), diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py index e0477382c..e279280e9 100644 --- a/youtube_dl/extractor/teachingchannel.py +++ b/youtube_dl/extractor/teachingchannel.py @@ -11,6 +11,7 @@ class TeachingChannelIE(InfoExtractor): _TEST = { 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', + 'md5': '3d6361864d7cac20b57c8784da17166f', 'info_dict': { 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', 'ext': 'mp4', @@ -18,10 +19,7 @@ class TeachingChannelIE(InfoExtractor): 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', 'duration': 422.255, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index 23ce0a0d1..0f5d68738 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -37,6 +37,7 @@ class VeohIE(InfoExtractor): 'uploader': 'afp-news', 'duration': 123, }, + 'skip': 'This video has been deleted.', }, { 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX', diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 95daf4dfd..e2b2ce098 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -11,12 +11,14 @@ class ViceIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.vice.com/video/cowboy-capitalists-part-1', + 'md5': 'e9d77741f9e42ba583e683cd170660f7', 'info_dict': { 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', 'ext': 'flv', 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', 'duration': 725.983, }, + 'add_ie': ['Ooyala'], }, { 'url': 'http://www.vice.com/video/how-to-hack-a-car', 'md5': '6fb2989a3fed069fb8eab3401fc2d3c9', @@ -29,6 +31,7 @@ class ViceIE(InfoExtractor): 'uploader': 'Motherboard', 'upload_date': '20140529', }, + 'add_ie': ['Youtube'], }, { 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', 'only_matching': True, diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index 9d73600aa..b1b32ad44 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -15,7 +15,8 @@ class VoxMediaIE(InfoExtractor): 'ext': 'mp4', 'title': 'Google\'s new material design direction', 'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2', - } + }, + 'add_ie': ['Ooyala'], }, { # data-ooyala-id 'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet', @@ -25,7 +26,8 @@ class VoxMediaIE(InfoExtractor): 'ext': 'mp4', 'title': 'The Nexus 6: hands-on with Google\'s phablet', 'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af', - } + }, + 'add_ie': ['Ooyala'], }, { # volume embed 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', @@ -35,7 +37,8 @@ class VoxMediaIE(InfoExtractor): 'ext': 'mp4', 'title': 'The new frontier of LGBTQ civil rights, explained', 'description': 'md5:0dc58e94a465cbe91d02950f770eb93f', - } + }, + 'add_ie': ['Ooyala'], }, { # youtube embed 'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance', @@ -48,7 +51,8 @@ class VoxMediaIE(InfoExtractor): 'upload_date': '20160324', 'uploader_id': 'voxdotcom', 'uploader': 'Vox', - } + }, + 'add_ie': ['Youtube'], }, { # SBN.VideoLinkset.entryGroup multiple ooyala embeds 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', From 444417edb55a5bf471697a3b2353fdbfb6f7e26d Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 24 May 2016 15:58:27 +0100 Subject: [PATCH 61/67] [radiocanada] Add new extractor(#4020) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/radiocanada.py | 130 ++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 youtube_dl/extractor/radiocanada.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d8b3170ba..f9fed18f6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -617,6 +617,10 @@ from .qqmusic import ( QQMusicPlaylistIE, ) from .r7 import R7IE +from .radiocanada import ( + RadioCanadaIE, + RadioCanadaAudioVideoIE, +) from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py new file mode 100644 index 000000000..4f05bbddc --- /dev/null +++ b/youtube_dl/extractor/radiocanada.py @@ -0,0 +1,130 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + xpath_text, + find_xpath_attr, + determine_ext, + int_or_none, + unified_strdate, + xpath_element, + ExtractorError, +) + + +class RadioCanadaIE(InfoExtractor): + IE_NAME = 'radiocanada' + _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P[^:/]+)[:/](?P[0-9]+)' + _TEST = { + 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', + 'info_dict': { + 'id': '7184272', + 'ext': 'flv', + 'title': 'Le parcours du tireur capté sur vidéo', + 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', + 'upload_date': '20141023', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + app_code, video_id = re.match(self._VALID_URL, url).groups() + + formats = [] + # TODO: extract m3u8 and f4m formats + # m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements + # f4m formats can be extracted using flashhd device_type but they produce unplayable file + for device_type in ('flash',): + v_data = self._download_xml( + 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx', + video_id, note='Downloading %s XML' % device_type, query={ + 'appCode': app_code, + 'idMedia': video_id, + 'connectionType': 'broadband', + 'multibitrate': 'true', + 'deviceType': device_type, + # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction + 'paysJ391wsHjbOJwvCs26toz': 'CA', + 'bypasslock': 'NZt5K62gRqfc', + }) + v_url = xpath_text(v_data, 'url') + if not v_url: + continue + if v_url == 'null': + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, xpath_text(v_data, 'message')), expected=True) + ext = determine_ext(v_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + v_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False)) + else: + ext = determine_ext(v_url) + bitrates = xpath_element(v_data, 'bitrates') + for url_e in bitrates.findall('url'): + tbr = int_or_none(url_e.get('bitrate')) + if not tbr: + continue + formats.append({ + 'format_id': 'rtmp-%d' % tbr, + 'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url), + 'ext': 'flv', + 'protocol': 'rtmp', + 'width': int_or_none(url_e.get('width')), + 'height': int_or_none(url_e.get('height')), + 'tbr': tbr, + }) + self._sort_formats(formats) + + metadata = self._download_xml( + 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', + video_id, note='Downloading metadata XML', query={ + 'appCode': app_code, + 'idMedia': video_id, + }) + + def get_meta(name): + el = find_xpath_attr(metadata, './/Meta', 'name', name) + return el.text if el is not None else None + + return { + 'id': video_id, + 'title': get_meta('Title'), + 'description': get_meta('Description') or get_meta('ShortDescription'), + 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), + 'duration': int_or_none(get_meta('length')), + 'series': get_meta('Emission'), + 'season_number': int_or_none('SrcSaison'), + 'episode_number': int_or_none('SrcEpisode'), + 'upload_date': unified_strdate(get_meta('Date')), + 'formats': formats, + } + + +class RadioCanadaAudioVideoIE(InfoExtractor): + 'radiocanada:audiovideo' + _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P[0-9]+)' + _TEST = { + 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', + 'info_dict': { + 'id': '7527184', + 'ext': 'flv', + 'title': 'Barack Obama au Vietnam', + 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', + 'upload_date': '20160523', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) From a4690b3244a42a833146c406e622c96045b23df5 Mon Sep 17 00:00:00 2001 From: mexican porn commits Date: Mon, 23 May 2016 16:32:39 -0500 Subject: [PATCH 62/67] [xhamster] url regex fix for videos with empty title. --- youtube_dl/extractor/xhamster.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index b3547174d..314e5020d 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -12,7 +12,7 @@ from ..utils import ( class XHamsterIE(InfoExtractor): - _VALID_URL = r'(?Phttps?)://(?:.+?\.)?xhamster\.com/movies/(?P[0-9]+)/(?P.+?)\.html(?:\?.*)?' + _VALID_URL = r'(?Phttps?)://(?:.+?\.)?xhamster\.com/movies/(?P[0-9]+)/(?P.*?)\.html(?:\?.*)?' _TESTS = [ { 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', @@ -38,6 +38,18 @@ class XHamsterIE(InfoExtractor): 'age_limit': 18, } }, + { + 'url': 'http://xhamster.com/movies/5667973/.html', + 'info_dict': { + 'id': '5667973', + 'ext': 'mp4', + 'title': '....', + 'upload_date': '20160208', + 'uploader': 'parejafree', + 'duration': 72.0, + 'age_limit': 18, + } + }, { 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', 'only_matching': True, @@ -170,7 +182,7 @@ class XHamsterEmbedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id, + r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id, webpage, 'xhamster url', default=None) if not video_url: From 6b43132ce9ec7477d69d8ad9d5b868060679de95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 May 2016 21:38:27 +0600 Subject: [PATCH 63/67] [xhamster] Update tests --- youtube_dl/extractor/xhamster.py | 79 +++++++++++++++++--------------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 314e5020d..bd8e1af2e 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -13,48 +13,51 @@ from ..utils import ( class XHamsterIE(InfoExtractor): _VALID_URL = r'(?Phttps?)://(?:.+?\.)?xhamster\.com/movies/(?P[0-9]+)/(?P.*?)\.html(?:\?.*)?' - _TESTS = [ - { - 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', - 'info_dict': { - 'id': '1509445', - 'ext': 'mp4', - 'title': 'FemaleAgent Shy beauty takes the bait', - 'upload_date': '20121014', - 'uploader': 'Ruseful2011', - 'duration': 893.52, - 'age_limit': 18, - } + _TESTS = [{ + 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', + 'md5': '8281348b8d3c53d39fffb377d24eac4e', + 'info_dict': { + 'id': '1509445', + 'ext': 'mp4', + 'title': 'FemaleAgent Shy beauty takes the bait', + 'upload_date': '20121014', + 'uploader': 'Ruseful2011', + 'duration': 893.52, + 'age_limit': 18, }, - { - 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', - 'info_dict': { - 'id': '2221348', - 'ext': 'mp4', - 'title': 'Britney Spears Sexy Booty', - 'upload_date': '20130914', - 'uploader': 'jojo747400', - 'duration': 200.48, - 'age_limit': 18, - } + }, { + 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', + 'info_dict': { + 'id': '2221348', + 'ext': 'mp4', + 'title': 'Britney Spears Sexy Booty', + 'upload_date': '20130914', + 'uploader': 'jojo747400', + 'duration': 200.48, + 'age_limit': 18, }, - { - 'url': 'http://xhamster.com/movies/5667973/.html', - 'info_dict': { - 'id': '5667973', - 'ext': 'mp4', - 'title': '....', - 'upload_date': '20160208', - 'uploader': 'parejafree', - 'duration': 72.0, - 'age_limit': 18, - } + 'params': { + 'skip_download': True, }, - { - 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', - 'only_matching': True, + }, { + # empty seo + 'url': 'http://xhamster.com/movies/5667973/.html', + 'info_dict': { + 'id': '5667973', + 'ext': 'mp4', + 'title': '....', + 'upload_date': '20160208', + 'uploader': 'parejafree', + 'duration': 72.0, + 'age_limit': 18, }, - ] + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', + 'only_matching': True, + }] def _real_extract(self, url): def extract_video_url(webpage, name): From 0d6ee9750801045e45157f38d98ef2be0c6da4f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 May 2016 21:42:47 +0600 Subject: [PATCH 64/67] Credit @TRox1972 for tosh.cc (#9566) and localnews8 (#9539) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 5ca71ace7..3272fc6ea 100644 --- a/AUTHORS +++ b/AUTHORS @@ -172,3 +172,4 @@ blahgeek Kevin Deldycke inondle Tomáš Čech +Déstin Reed From 688c634b7d95a20c6081b202427a9e5fd7f36422 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 24 May 2016 16:42:22 +0100 Subject: [PATCH 65/67] skip some tests to reduce test time --- youtube_dl/extractor/byutv.py | 3 +++ youtube_dl/extractor/espn.py | 6 ++++++ youtube_dl/extractor/groupon.py | 5 ++++- youtube_dl/extractor/howcast.py | 3 +++ youtube_dl/extractor/teachingchannel.py | 3 +++ 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 54eb57b46..3aec601f8 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -20,6 +20,9 @@ class BYUtvIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 1486.486, }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['Ooyala'], } diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index e3575aed1..66c08bec4 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -15,6 +15,9 @@ class ESPNIE(InfoExtractor): 'title': '30 for 30 Shorts: Judging Jewell', 'description': None, }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['OoyalaExternal'], }, { # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season @@ -25,6 +28,9 @@ class ESPNIE(InfoExtractor): 'ext': 'mp4', 'title': 'Must-See Moments: Best of the MLS season', }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['OoyalaExternal'], }, { 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', diff --git a/youtube_dl/extractor/groupon.py b/youtube_dl/extractor/groupon.py index 7bbb669c7..a6da90931 100644 --- a/youtube_dl/extractor/groupon.py +++ b/youtube_dl/extractor/groupon.py @@ -26,7 +26,10 @@ class GrouponIE(InfoExtractor): 'uploader': 'Groupon', }, 'add_ie': ['Youtube'], - }] + }], + 'params': { + 'skip_download': True, + }, } _PROVIDERS = { diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py index 92caeb8f9..7e36b85ad 100644 --- a/youtube_dl/extractor/howcast.py +++ b/youtube_dl/extractor/howcast.py @@ -18,6 +18,9 @@ class HowcastIE(InfoExtractor): 'upload_date': '20100609', 'duration': 56.823, }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['Ooyala'], } diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py index e279280e9..d14d93e3a 100644 --- a/youtube_dl/extractor/teachingchannel.py +++ b/youtube_dl/extractor/teachingchannel.py @@ -19,6 +19,9 @@ class TeachingChannelIE(InfoExtractor): 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', 'duration': 422.255, }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['Ooyala'], } From 1de32771e1d3f89ef2738883b304ce52a5ecf303 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 24 May 2016 20:10:12 +0100 Subject: [PATCH 66/67] [eyedotv] Add new extractor(closes #9582) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/eyedotv.py | 64 ++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 youtube_dl/extractor/eyedotv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f9fed18f6..05561149a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -231,6 +231,7 @@ from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE from .expotv import ExpoTVIE from .extremetube import ExtremeTubeIE +from .eyedotv import EyedoTVIE from .facebook import FacebookIE from .faz import FazIE from .fc2 import FC2IE diff --git a/youtube_dl/extractor/eyedotv.py b/youtube_dl/extractor/eyedotv.py new file mode 100644 index 000000000..2f3035147 --- /dev/null +++ b/youtube_dl/extractor/eyedotv.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + xpath_text, + parse_duration, + ExtractorError, +) + + +class EyedoTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P[0-9]+)' + _TEST = { + 'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301', + 'md5': 'ba14f17995cdfc20c36ba40e21bf73f7', + 'info_dict': { + 'id': '16301', + 'ext': 'mp4', + 'title': 'Journée du conseil scientifique de l\'Afnic 2015', + 'description': 'md5:4abe07293b2f73efc6e1c37028d58c98', + 'uploader': 'Afnic Live', + 'uploader_id': '8023', + } + } + _ROOT_URL = 'http://live.eyedo.net:1935/' + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id) + + def _add_ns(path): + return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api') + + title = xpath_text(video_data, _add_ns('Titre'), 'title', True) + state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True) + if state_live_code == 'avenir': + raise ExtractorError( + '%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME, + expected=True) + + is_live = state_live_code == 'live' + m3u8_url = None + # http://eyedo.tv/Content/Html5/Scripts/html5view.js + if is_live: + if xpath_text(video_data, 'Cdn') == 'true': + m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id + else: + m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id + else: + m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id) + + return { + 'id': video_id, + 'title': title, + 'formats': self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'), + 'description': xpath_text(video_data, _add_ns('Description')), + 'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))), + 'uploader': xpath_text(video_data, _add_ns('Createur')), + 'uploader_id': xpath_text(video_data, _add_ns('CreateurId')), + 'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')), + 'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')), + } From 4ee0b8afdb384ad3e2d65b6b0159a801ee73d26d Mon Sep 17 00:00:00 2001 From: wankerer Date: Tue, 24 May 2016 10:18:36 -0700 Subject: [PATCH 67/67] [eporner] fix for the new URL layout Recently eporner slightly changed the URL layout, the ID that used to be digits only are now digits and letters, so youtube-dl falls back to the generic extractor that doesn't work. Fix the matching regex to allow letters in ID. [v2: added a test case] --- youtube_dl/extractor/eporner.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index e006921ec..581276694 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -11,8 +11,8 @@ from ..utils import ( class EpornerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P\d+)/(?P[\w-]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P\w+)/(?P[\w-]+)' + _TESTS = [{ 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', 'md5': '39d486f046212d8e1b911c52ab4691f8', 'info_dict': { @@ -23,8 +23,22 @@ class EpornerIE(InfoExtractor): 'duration': 1838, 'view_count': int, 'age_limit': 18, - } - } + }, + }, + # New (May 2016) URL layout + { + 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', + 'md5': '3469eeaa93b6967a34cdbdbb9d064b33', + 'info_dict': { + 'id': '3YRUtzMcWn0', + 'display_id': 'Star-Wars-XXX-Parody', + 'ext': 'mp4', + 'title': 'Star Wars XXX Parody', + 'duration': 361.0, + 'view_count': int, + 'age_limit': 18, + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url)