From d7d4481c6a8a914a436006b244b9fd781d322b71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 24 Oct 2016 23:54:03 +0700 Subject: [PATCH 01/86] [movieclips] Fix _VALID_URL --- youtube_dl/extractor/movieclips.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/movieclips.py b/youtube_dl/extractor/movieclips.py index d0cb8278e..30c206f9b 100644 --- a/youtube_dl/extractor/movieclips.py +++ b/youtube_dl/extractor/movieclips.py @@ -11,7 +11,7 @@ from ..utils import ( class MovieClipsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/.+-(?P\d+)(?:\?|$)' + _VALID_URL = r'https?://(?:www\.)?movieclips\.com/videos/.+-(?P\d+)(?:\?|$)' _TEST = { 'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597', 'md5': '42b5a0352d4933a7bd54f2104f481244', From 2e7c8cab55e8b29dea5443aa45451b524799a12a Mon Sep 17 00:00:00 2001 From: Zhong Jianxin Date: Wed, 9 Mar 2016 23:43:27 +0800 Subject: [PATCH 02/86] [pandatv] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pandatv.py | 84 ++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 youtube_dl/extractor/pandatv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6f7d9b65b..108d7ca69 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -667,6 +667,7 @@ from .orf import ( ORFFM4IE, ORFIPTVIE, ) +from .pandatv import PandaTVIE from .pandoratv import PandoraTVIE from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE diff --git a/youtube_dl/extractor/pandatv.py b/youtube_dl/extractor/pandatv.py new file mode 100644 index 000000000..84014f3c5 --- /dev/null +++ b/youtube_dl/extractor/pandatv.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + qualities +) + +class PandaTVIE(InfoExtractor): + IE_DESC = '熊猫TV' + _VALID_URL = r'http://(?:www\.)?panda\.tv/(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://www.panda.tv/10091', + 'info_dict': { + 'id': '10091', + 'title': 're:.+', + 'uploader': '囚徒', + 'ext': 'flv', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + config = self._download_json( + 'http://www.panda.tv/api_room?roomid=%s' % video_id, + video_id + ) + + data = config['data'] + + error_code = config.get('errno', 0) + if error_code is not 0: + error_desc = 'Server reported error %i' % error_code + if isinstance(data, compat_str): + error_desc += ': ' + data + raise ExtractorError(error_desc, expected=True) + + video_info = data['videoinfo'] + + # 2 = live, 3 = offline + if video_info.get('status') != '2': + raise ExtractorError( + 'Live stream is offline', expected=True) + + title = data['roominfo']['name'] + uploader = data.get('hostinfo', {}).get('name') + room_key = video_info['room_key'] + stream_addr = video_info.get('stream_addr', {'OD': '1', 'HD': '1', 'SD': '1'}) + + plflag0, plflag1 = video_info['plflag'].split('_') + plflag0 = int(plflag0) - 1 + if plflag1 == '21': + plflag0 = 10 + plflag1 = '4' + live_panda = 'live_panda' if plflag0 < 1 else '' + + quality_key = qualities(['OD', 'HD', 'SD']) + suffix = ['_small', '_mid', ''] + formats = [] + for k, v in stream_addr.items(): + if v == '1': + quality = quality_key(k) + if quality >= 0: + formats.append({ + 'url': 'http://pl%s.live.panda.tv/live_panda/%s%s%s.flv' % (plflag1, room_key, live_panda, suffix[quality]), + 'format_id': k, + 'quality': quality, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._live_title(title), + 'uploader': uploader, + 'formats': formats, + 'is_live': True, + } From d2e96a8ed439791ee266f06f48cf06facc9a0ea5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 25 Oct 2016 01:51:37 +0700 Subject: [PATCH 03/86] [pandatv] Extract m3u8, document reverse source and PEP 8 --- youtube_dl/extractor/pandatv.py | 51 +++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/pandatv.py b/youtube_dl/extractor/pandatv.py index 84014f3c5..133cc9b88 100644 --- a/youtube_dl/extractor/pandatv.py +++ b/youtube_dl/extractor/pandatv.py @@ -2,16 +2,16 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, - qualities + qualities, ) + class PandaTVIE(InfoExtractor): IE_DESC = '熊猫TV' _VALID_URL = r'http://(?:www\.)?panda\.tv/(?P[0-9]+)' - _TESTS = [{ + _TEST = { 'url': 'http://www.panda.tv/10091', 'info_dict': { 'id': '10091', @@ -23,25 +23,23 @@ class PandaTVIE(InfoExtractor): 'params': { 'skip_download': True, }, - }] + 'skip': 'Live stream is offline', + } def _real_extract(self, url): video_id = self._match_id(url) config = self._download_json( - 'http://www.panda.tv/api_room?roomid=%s' % video_id, - video_id - ) - - data = config['data'] + 'http://www.panda.tv/api_room?roomid=%s' % video_id, video_id) error_code = config.get('errno', 0) if error_code is not 0: - error_desc = 'Server reported error %i' % error_code - if isinstance(data, compat_str): - error_desc += ': ' + data - raise ExtractorError(error_desc, expected=True) + raise ExtractorError( + '%s returned error %s: %s' + % (self.IE_NAME, error_code, config['errmsg']), + expected=True) + data = config['data'] video_info = data['videoinfo'] # 2 = live, 3 = offline @@ -52,8 +50,12 @@ class PandaTVIE(InfoExtractor): title = data['roominfo']['name'] uploader = data.get('hostinfo', {}).get('name') room_key = video_info['room_key'] - stream_addr = video_info.get('stream_addr', {'OD': '1', 'HD': '1', 'SD': '1'}) + stream_addr = video_info.get( + 'stream_addr', {'OD': '1', 'HD': '1', 'SD': '1'}) + # Reverse engineered from web player swf + # (http://s6.pdim.gs/static/07153e425f581151.swf at the moment of + # writing). plflag0, plflag1 = video_info['plflag'].split('_') plflag0 = int(plflag0) - 1 if plflag1 == '21': @@ -65,14 +67,19 @@ class PandaTVIE(InfoExtractor): suffix = ['_small', '_mid', ''] formats = [] for k, v in stream_addr.items(): - if v == '1': - quality = quality_key(k) - if quality >= 0: - formats.append({ - 'url': 'http://pl%s.live.panda.tv/live_panda/%s%s%s.flv' % (plflag1, room_key, live_panda, suffix[quality]), - 'format_id': k, - 'quality': quality, - }) + if v != '1': + continue + quality = quality_key(k) + if quality <= 0: + continue + for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))): + formats.append({ + 'url': 'http://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s' + % (pl, plflag1, room_key, live_panda, suffix[quality], ext), + 'format_id': '%s-%s' % (k, ext), + 'quality': quality, + 'source_preference': pref, + }) self._sort_formats(formats) return { From 81cb7a59784dc2a4861f127845b52b77d6580c19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 25 Oct 2016 01:51:46 +0700 Subject: [PATCH 04/86] Credit @azuwis for pandatv (#10736) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 2d0b0c6c0..0c27b7f39 100644 --- a/AUTHORS +++ b/AUTHORS @@ -188,3 +188,4 @@ Xie Yanbo Philip Xu John Hawkinson Rich Leeper +Zhong Jianxin From b0b28b82413515061d9563d0a5ba3dad90fb59e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 25 Oct 2016 01:53:41 +0700 Subject: [PATCH 05/86] [ChangeLog] Actualize --- ChangeLog | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ChangeLog b/ChangeLog index f64dcbc48..8cd81f1f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,8 +4,13 @@ Core * Running youtube-dl in the background is fixed (#10996, #10706, #955) Extractors ++ [pandatv] Add support for panda.tv (#10736) + [dotsub] Support Vimeo embed (#10964) * [litv] Fix extraction ++ [vimeo] Delegate ondemand redirects to ondemand extractor (#10994) +* [vivo] Fix extraction (#11003) ++ [twitch:stream] Add support for rebroadcasts (#10995) +* [pluralsight] Fix subtitles conversion (#10990) version 2016.10.21.1 From b17422753fbaf4e973c42a4dee7b4a071bd9a692 Mon Sep 17 00:00:00 2001 From: Thor77 Date: Mon, 24 Oct 2016 21:43:03 +0200 Subject: [PATCH 06/86] [jamendo] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/jamendo.py | 117 +++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 youtube_dl/extractor/jamendo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 108d7ca69..95c03522e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -408,6 +408,7 @@ from .ivi import ( from .ivideon import IvideonIE from .iwara import IwaraIE from .izlesene import IzleseneIE +from .jamendo import JamendoIE, JamendoAlbumIE from .jeuxvideo import JeuxVideoIE from .jove import JoveIE from .jwplatform import JWPlatformIE diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py new file mode 100644 index 000000000..4aacd10f4 --- /dev/null +++ b/youtube_dl/extractor/jamendo.py @@ -0,0 +1,117 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from collections import namedtuple + +from ..compat import compat_urlparse +from .common import InfoExtractor + +FormatData = namedtuple('FormatData', [ + 'format_id', 'sub_domain', 'ext', 'quality']) + + +class JamendoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P[0-9]+)/(?P[\w-]+)' + _TEST = { + 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', + 'md5': '6e9e82ed6db98678f171c25a8ed09ffd', + 'info_dict': { + 'id': '196219', + 'display_id': 'stories-from-emona-i', + 'ext': 'flac', + 'title': 'Stories from Emona I', + 'thumbnail': 're:^https?://.*\.jpg' + } + } + + def _real_extract(self, url): + url_data = self._VALID_URL_RE.match(url) + track_id = url_data.group('id') + display_id = url_data.group('display_id') + webpage = self._download_webpage(url, display_id) + + thumbnail = self._html_search_meta( + 'image', webpage, 'thumbnail', fatal=False) + title = self._html_search_meta('name', webpage, 'title') + + url_template = 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' + format_data = [ + FormatData( + format_id='mp31', sub_domain='mp3l', ext='mp3', quality=0), + FormatData( + format_id='mp32', sub_domain='mp3d', ext='mp3', quality=1), + FormatData( + format_id='ogg1', sub_domain='ogg', ext='ogg', quality=2), + FormatData( + format_id='flac', sub_domain='flac', ext='flac', quality=3), + ] + formats = [ + { + 'format_id': fd.format_id, + 'url': url_template % (fd.sub_domain, track_id, fd.format_id), + 'ext': fd.ext, + 'quality': fd.quality + } + for fd in format_data + ] + self._check_formats(formats, video_id=display_id) + return { + 'id': track_id, + 'display_id': display_id, + 'thumbnail': thumbnail, + 'title': title, + 'formats': formats + } + + +class JamendoAlbumIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P[0-9]+)/(?P[\w-]+)' + _TEST = { + 'url': 'https://www.jamendo.com/album/121486/duck-on-cover', + 'info_dict': { + 'id': '121486', + 'title': 'Duck On Cover' + }, + 'playlist_mincount': 2, + 'playlist': [ + { + 'md5': 'e1a2fcb42bda30dfac990212924149a8', + 'info_dict': { + 'id': '1032333', + 'ext': 'flac', + 'title': 'Warmachine' + } + }, + { + 'md5': '1f358d7b2f98edfe90fd55dac0799d50', + 'info_dict': { + 'id': '1032330', + 'ext': 'flac', + 'title': 'Without Your Ghost' + } + } + ], + 'params': { + 'playlistend': 2 + } + } + + def _real_extract(self, url): + url_data = self._VALID_URL_RE.match(url) + album_id = url_data.group('id') + webpage = self._download_webpage(url, url_data.group('display_id')) + + title = self._html_search_meta('name', webpage, 'title') + + track_paths = re.findall(r'

Date: Sat, 29 Oct 2016 17:59:35 +0800 Subject: [PATCH 22/86] [openload] Fix extraction (#10408) Thanks @TwelveCharzz again for studying openload codes --- ChangeLog | 1 + youtube_dl/extractor/openload.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index b4de1d35f..b2aafcf02 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [openload] Fix extraction (#10408) * [adultswim] Fix extraction (#10979) * [hornbunny] Fix extraction (#10981) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 6cf7e4a77..d3d4101de 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -82,7 +82,7 @@ class OpenloadIE(InfoExtractor): if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 if idx == len(enc_data) - 1: - j += 2 + j += 3 video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) From ea331f40e65740973e7766e71853352c814158f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 Oct 2016 05:10:31 +0700 Subject: [PATCH 23/86] Credit @Thor77 for jamendo (#10934) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 0c27b7f39..4a6f7e13f 100644 --- a/AUTHORS +++ b/AUTHORS @@ -189,3 +189,4 @@ Philip Xu John Hawkinson Rich Leeper Zhong Jianxin +Thor77 From 2a048f9878b2417fb4625881ee4a2340ed6c7d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 Oct 2016 05:27:50 +0700 Subject: [PATCH 24/86] [beeg] Fix extraction (closes #11069) --- youtube_dl/extractor/beeg.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index 956c7680e..b0b7914d8 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -46,19 +46,19 @@ class BeegIE(InfoExtractor): self._proto_relative_url(cpl_url), video_id, 'Downloading cpl JS', fatal=False) if cpl: - beeg_version = self._search_regex( - r'beeg_version\s*=\s*(\d+)', cpl, - 'beeg version', default=None) or self._search_regex( + beeg_version = int_or_none(self._search_regex( + r'beeg_version\s*=\s*([^\b]+)', cpl, + 'beeg version', default=None)) or self._search_regex( r'/(\d+)\.js', cpl_url, 'beeg version', default=None) beeg_salt = self._search_regex( - r'beeg_salt\s*=\s*(["\'])(?P.+?)\1', cpl, 'beeg beeg_salt', + r'beeg_salt\s*=\s*(["\'])(?P.+?)\1', cpl, 'beeg salt', default=None, group='beeg_salt') - beeg_version = beeg_version or '1750' - beeg_salt = beeg_salt or 'MIDtGaw96f0N1kMMAM1DE46EC9pmFr' + beeg_version = beeg_version or '2000' + beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H' video = self._download_json( - 'http://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id), + 'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id), video_id) def split(o, e): From e1a0b3b81ce451b1502fa0f9a92190124107022e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 30 Oct 2016 17:01:48 +0800 Subject: [PATCH 25/86] [imgur] Recognize /r/ URLs (closes #11071) --- ChangeLog | 1 + youtube_dl/extractor/imgur.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index b2aafcf02..b9f8d92da 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [imgur] Recognize /r/ URLs (#11071) * [openload] Fix extraction (#10408) * [adultswim] Fix extraction (#10979) * [hornbunny] Fix extraction (#10981) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index d23489dcf..67c24a51c 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -13,7 +13,7 @@ from ..utils import ( class ImgurIE(InfoExtractor): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|topic/[^/]+)/)?(?P[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$' _TESTS = [{ 'url': 'https://i.imgur.com/A61SaA1.gifv', @@ -43,6 +43,9 @@ class ImgurIE(InfoExtractor): }, { 'url': 'http://imgur.com/topic/Funny/N8rOudd', 'only_matching': True, + }, { + 'url': 'http://imgur.com/r/aww/VQcQPhM', + 'only_matching': True, }] def _real_extract(self, url): From d9ee2e5cf684090862b60f32a9b0b4c06a59ac91 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 30 Oct 2016 18:20:55 +0800 Subject: [PATCH 26/86] [facebook] Remove SWF params so that 1080P are detected Closes #11073 In the provided link, SWF params give up to 720P, and VideoConfig gives 1080P for both best and bestvideo. I guess all Facebook videos supports HTML5 now, so I remove the old detection for SWF params --- ChangeLog | 1 + youtube_dl/extractor/facebook.py | 60 ++++++++++++-------------------- 2 files changed, 23 insertions(+), 38 deletions(-) diff --git a/ChangeLog b/ChangeLog index b9f8d92da..dc343c8ca 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [facebook] Improve 1080P video detection (#11073) * [imgur] Recognize /r/ URLs (#11071) * [openload] Fix extraction (#10408) * [adultswim] Fix extraction (#10979) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 801573459..b4d38e5c2 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import json import re import socket @@ -100,7 +99,8 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': '"What are you doing running in the snow?"', 'uploader': 'FailArmy', - } + }, + 'skip': 'Video gone', }, { 'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903', 'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3', @@ -110,6 +110,7 @@ class FacebookIE(InfoExtractor): 'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog', 'uploader': 'S. Saint', }, + 'skip': 'Video gone', }, { 'note': 'swf params escaped', 'url': 'https://www.facebook.com/barackobama/posts/10153664894881749', @@ -119,6 +120,18 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 'Facebook video #10153664894881749', }, + }, { + # have 1080P, but only up to 720p in swf params + 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', + 'md5': '0d9813160b146b3bc8744e006027fcc6', + 'info_dict': { + 'id': '10155529876156509', + 'ext': 'mp4', + 'title': 'Holocaust survivor becomes US citizen', + 'timestamp': 1477818095, + 'upload_date': '20161030', + 'uploader': 'CNN', + }, }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'only_matching': True, @@ -227,43 +240,13 @@ class FacebookIE(InfoExtractor): video_data = None - BEFORE = '{swf.addParam(param[0], param[1]);});' - AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' - PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER) - - for m in re.findall(PATTERN, webpage): - swf_params = m.replace('\\\\', '\\').replace('\\"', '"') - data = dict(json.loads(swf_params)) - params_raw = compat_urllib_parse_unquote(data['params']) - video_data_candidate = json.loads(params_raw)['video_data'] - for _, f in video_data_candidate.items(): - if not f: - continue - if isinstance(f, dict): - f = [f] - if not isinstance(f, list): - continue - if f[0].get('video_id') == video_id: - video_data = video_data_candidate - break - if video_data: + server_js_data = self._parse_json(self._search_regex( + r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id) + for item in server_js_data.get('instances', []): + if item[1][0] == 'VideoConfig': + video_data = item[2][0]['videoData'] break - def video_data_list2dict(video_data): - ret = {} - for item in video_data: - format_id = item['stream_type'] - ret.setdefault(format_id, []).append(item) - return ret - - if not video_data: - server_js_data = self._parse_json(self._search_regex( - r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id) - for item in server_js_data.get('instances', []): - if item[1][0] == 'VideoConfig': - video_data = video_data_list2dict(item[2][0]['videoData']) - break - if not video_data: if not fatal_if_no_video: return webpage, False @@ -276,7 +259,8 @@ class FacebookIE(InfoExtractor): raise ExtractorError('Cannot parse data') formats = [] - for format_id, f in video_data.items(): + for f in video_data: + format_id = f['stream_type'] if f and isinstance(f, dict): f = [f] if not f or not isinstance(f, list): From cae6bc011852271734884648be254cc51500935f Mon Sep 17 00:00:00 2001 From: dundua Date: Sun, 30 Oct 2016 04:14:51 -0700 Subject: [PATCH 27/86] [vessel] Improve video id extraction --- youtube_dl/extractor/vessel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index 2cd617b91..5b7647f8a 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -13,7 +13,7 @@ from ..utils import ( class VesselIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P[0-9a-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P[0-9a-zA-Z-_]+)' _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s' _LOGIN_URL = 'https://www.vessel.com/api/account/login' _NETRC_MACHINE = 'vessel' @@ -37,7 +37,7 @@ class VesselIE(InfoExtractor): @staticmethod def _extract_urls(webpage): return [url for _, url in re.findall( - r']+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z]+.*?)\1', + r']+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z-_]+.*?)\1', webpage)] @staticmethod From a901fc5fc2600f3324018ee32d4933350716e75e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 Oct 2016 18:17:15 +0700 Subject: [PATCH 28/86] [vessel] Add tests for #11068 --- youtube_dl/extractor/vessel.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index 5b7647f8a..6b9c227db 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -32,6 +32,12 @@ class VesselIE(InfoExtractor): }, { 'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346', 'only_matching': True, + }, { + 'url': 'https://www.vessel.com/videos/F01_dsLj1', + 'only_matching': True, + }, { + 'url': 'https://www.vessel.com/videos/RRX-sir-J', + 'only_matching': True, }] @staticmethod From 3bf55be466373cc6af16007fa8ddc5aed89f885d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 Oct 2016 18:19:29 +0700 Subject: [PATCH 29/86] [ChangeLog] Actualize --- ChangeLog | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ChangeLog b/ChangeLog index dc343c8ca..072b4ddeb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,9 +3,13 @@ version Extractors * [facebook] Improve 1080P video detection (#11073) * [imgur] Recognize /r/ URLs (#11071) +* [beeg] Fix extraction (#11069) * [openload] Fix extraction (#10408) +* [gvsearch] Modernize and fix search request (#11051) * [adultswim] Fix extraction (#10979) ++ [nobelprize] Add support for nobelprize.org (#9999) * [hornbunny] Fix extraction (#10981) +* [tvp] Improve video id extraction (#10585) version 2016.10.26 From e70a5e656685eb68d47583cdd1bac963f9ecacf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 Oct 2016 18:24:49 +0700 Subject: [PATCH 30/86] release 2016.10.30 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c2f84a2fe..fc89ba454 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.26*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.26** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.30*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.30** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.10.26 +[debug] youtube-dl version 2016.10.30 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 072b4ddeb..f36e04d13 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.10.30 Extractors * [facebook] Improve 1080P video detection (#11073) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 963b7bbb7..e04fb6be2 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -488,6 +488,7 @@ - **Nintendo** - **njoy**: N-JOY - **njoy:embed** + - **NobelPrize** - **Noco** - **Normalboots** - **NosVideo** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6d0691bf9..f20f3dfe2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.10.26' +__version__ = '2016.10.30' From 7e7a028aa4108f5ce455fda88dfdcee13b3b7578 Mon Sep 17 00:00:00 2001 From: Mel Shafer Date: Sun, 30 Oct 2016 14:12:36 -0400 Subject: [PATCH 31/86] [README.md] Fix a typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0fbf32545..0f4088adc 100644 --- a/README.md +++ b/README.md @@ -1083,7 +1083,7 @@ Say `meta` from the previous example has a `title` and you are about to extract title = meta['title'] ``` -If `title` disappeares from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected. +If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected. Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario: From 2c6da7df4a4d69ec933688e3c53795fd3436a1c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 31 Oct 2016 01:36:53 +0700 Subject: [PATCH 32/86] release 2016.10.31 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index fc89ba454..3e020524b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.30*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.30** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.31** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.10.30 +[debug] youtube-dl version 2016.10.31 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 29f52cbe8..0b5a5c1f8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -245,7 +245,7 @@ Say `meta` from the previous example has a `title` and you are about to extract title = meta['title'] ``` -If `title` disappeares from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected. +If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected. Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario: diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f20f3dfe2..16274c22d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.10.30' +__version__ = '2016.10.31' From e5a088dc4be4fdcc96927a9f1b7284d4cd49c415 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 31 Oct 2016 23:32:08 +0700 Subject: [PATCH 33/86] [utils] Fix --match-filter for int-like strings (closes #11082) --- test/test_YoutubeDL.py | 6 ++++++ youtube_dl/utils.py | 12 +++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 0dfe25c00..8bf00bea9 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -605,6 +605,7 @@ class TestYoutubeDL(unittest.TestCase): 'extractor': 'TEST', 'duration': 30, 'filesize': 10 * 1024, + 'playlist_id': '42', } second = { 'id': '2', @@ -614,6 +615,7 @@ class TestYoutubeDL(unittest.TestCase): 'duration': 10, 'description': 'foo', 'filesize': 5 * 1024, + 'playlist_id': '43', } videos = [first, second] @@ -650,6 +652,10 @@ class TestYoutubeDL(unittest.TestCase): res = get_videos(f) self.assertEqual(res, ['1']) + f = match_filter_func('playlist_id = 42') + res = get_videos(f) + self.assertEqual(res, ['1']) + def test_playlist_items_selection(self): entries = [{ 'id': compat_str(i), diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2770c5f1c..1a5ce8688 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2345,11 +2345,18 @@ def _match_one(filter_part, dct): m = operator_rex.search(filter_part) if m: op = COMPARISON_OPERATORS[m.group('op')] - if m.group('strval') is not None: + actual_value = dct.get(m.group('key')) + if (m.group('strval') is not None or + # If the original field is a string and matching comparisonvalue is + # a number we should respect the origin of the original field + # and process comparison value as a string (see + # https://github.com/rg3/youtube-dl/issues/11082). + actual_value is not None and m.group('intval') is not None and + isinstance(actual_value, compat_str)): if m.group('op') not in ('=', '!='): raise ValueError( 'Operator %s does not support string values!' % m.group('op')) - comparison_value = m.group('strval') + comparison_value = m.group('strval') or m.group('intval') else: try: comparison_value = int(m.group('intval')) @@ -2361,7 +2368,6 @@ def _match_one(filter_part, dct): raise ValueError( 'Invalid integer value %r in filter part %r' % ( m.group('intval'), filter_part)) - actual_value = dct.get(m.group('key')) if actual_value is None: return m.group('none_inclusive') return op(actual_value, comparison_value) From b82c33dd67c24cd6db94a9793b2f62a5db412795 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 Nov 2016 01:15:46 +0700 Subject: [PATCH 34/86] [extractor/common] Improve mpd base URL extraction (closes #10909, closes #11079) --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 415dc84c8..68b325fca 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1539,7 +1539,7 @@ class InfoExtractor(object): if res is False: return [] mpd, urlh = res - mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group() + mpd_base_url = re.match(r'https?://[^?#&]+/', urlh.geturl()).group() return self._parse_mpd_formats( compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, From e3577722b0bdb16e786eba8029d09ccc1983e0ce Mon Sep 17 00:00:00 2001 From: NeroBurner Date: Mon, 26 Sep 2016 22:45:02 +0200 Subject: [PATCH 35/86] [nicknight] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nick.py | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dea97920b..913ffe29a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -596,6 +596,7 @@ from .nhl import ( from .nick import ( NickIE, NickDeIE, + NickNightAtIE, ) from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninecninemedia import ( diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 57cf1ce8e..a96bb0ef5 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -69,7 +69,7 @@ class NickIE(MTVServicesInfoExtractor): class NickDeIE(MTVServicesInfoExtractor): IE_NAME = 'nick.de' - _VALID_URL = r'https?://(?:www\.)?(?:nick\.de|nickelodeon\.nl)/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?:nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse', 'only_matching': True, @@ -91,3 +91,28 @@ class NickDeIE(MTVServicesInfoExtractor): {'siteKey': 'nick.de'}) return self._get_videos_info_from_url(mrss_url, video_id) + + +class NickNightAtIE(MTVServicesInfoExtractor): + IE_NAME = 'nicknight.de' + _VALID_URL = r'https?://(?:www\.)nicknight\.(?:de|at|tv)/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.nicknight.at/shows/977-awkward/videos/85987-nimmer-beste-freunde', + 'only_matching': True, + }, { + 'url': 'http://www.nicknight.at/shows/977-awkward', + 'only_matching': True, + }, { + 'url': 'http://www.nicknight.at/shows/1900-faking-it', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + mrss_url = self._search_regex( + r'mrss: (["\'])(?Phttp.+?)\1', webpage, 'mrss url', group='url') + + return self._get_videos_info_from_url(mrss_url, video_id) From 9c82bba05d5495d29be2ee20fc9cb690b37fcdce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 01:29:05 +0700 Subject: [PATCH 36/86] [nickde] Improve extraction --- youtube_dl/extractor/nick.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index a96bb0ef5..36364bee9 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .mtv import MTVServicesInfoExtractor from ..utils import update_url_query @@ -69,7 +71,7 @@ class NickIE(MTVServicesInfoExtractor): class NickDeIE(MTVServicesInfoExtractor): IE_NAME = 'nick.de' - _VALID_URL = r'https?://(?:www\.)?(?:nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?Pnick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse', 'only_matching': True, @@ -79,16 +81,21 @@ class NickDeIE(MTVServicesInfoExtractor): }, { 'url': 'http://www.nickelodeon.nl/shows/474-spongebob/videos/17403-een-kijkje-in-de-keuken-met-sandy-van-binnenuit', 'only_matching': True, + }, { + 'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + host = mobj.group('host') webpage = self._download_webpage(url, video_id) mrss_url = update_url_query(self._search_regex( r'data-mrss=(["\'])(?Phttp.+?)\1', webpage, 'mrss url', group='url'), - {'siteKey': 'nick.de'}) + {'siteKey': host}) return self._get_videos_info_from_url(mrss_url, video_id) From f449c061d04b37f70dafa8c01a2e1fb7a47a9a5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 01:35:53 +0700 Subject: [PATCH 37/86] [nicknight] Improve extraction (closes #10769) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/nick.py | 28 +++++++++++++--------------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 913ffe29a..f30ac5aaf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -596,7 +596,7 @@ from .nhl import ( from .nick import ( NickIE, NickDeIE, - NickNightAtIE, + NickNightIE, ) from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninecninemedia import ( diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 36364bee9..7672845bf 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -86,6 +86,11 @@ class NickDeIE(MTVServicesInfoExtractor): 'only_matching': True, }] + def _extract_mrss_url(self, webpage, host): + return update_url_query(self._search_regex( + r'data-mrss=(["\'])(?Phttp.+?)\1', webpage, 'mrss url', group='url'), + {'siteKey': host}) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') @@ -93,16 +98,14 @@ class NickDeIE(MTVServicesInfoExtractor): webpage = self._download_webpage(url, video_id) - mrss_url = update_url_query(self._search_regex( - r'data-mrss=(["\'])(?Phttp.+?)\1', webpage, 'mrss url', group='url'), - {'siteKey': host}) + mrss_url = self._extract_mrss_url(webpage, host) return self._get_videos_info_from_url(mrss_url, video_id) -class NickNightAtIE(MTVServicesInfoExtractor): - IE_NAME = 'nicknight.de' - _VALID_URL = r'https?://(?:www\.)nicknight\.(?:de|at|tv)/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' +class NickNightIE(NickDeIE): + IE_NAME = 'nicknight' + _VALID_URL = r'https?://(?:www\.)(?Pnicknight\.(?:de|at|tv))/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.nicknight.at/shows/977-awkward/videos/85987-nimmer-beste-freunde', 'only_matching': True, @@ -114,12 +117,7 @@ class NickNightAtIE(MTVServicesInfoExtractor): 'only_matching': True, }] - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - mrss_url = self._search_regex( - r'mrss: (["\'])(?Phttp.+?)\1', webpage, 'mrss url', group='url') - - return self._get_videos_info_from_url(mrss_url, video_id) + def _extract_mrss_url(self, webpage, *args): + return self._search_regex( + r'mrss\s*:\s*(["\'])(?Phttp.+?)\1', webpage, + 'mrss url', group='url') From b2758123c5e759fdb0c7d23d380e4dd9e245cd4a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 19 Oct 2016 16:22:40 +0100 Subject: [PATCH 38/86] add Basic support for Smooth Streaming protocol(#8118) --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/downloader/__init__.py | 2 + youtube_dl/downloader/ism.py | 273 ++++++++++++++++++++++++++++++ youtube_dl/extractor/common.py | 101 +++++++++++ 4 files changed, 377 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/downloader/ism.py diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 99825e343..53f20ac2c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1658,7 +1658,7 @@ class YoutubeDL(object): video_ext, audio_ext = audio.get('ext'), video.get('ext') if video_ext and audio_ext: COMPATIBLE_EXTS = ( - ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'), + ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'), ('webm') ) for exts in COMPATIBLE_EXTS: diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 817591d97..16952e359 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -7,6 +7,7 @@ from .http import HttpFD from .rtmp import RtmpFD from .dash import DashSegmentsFD from .rtsp import RtspFD +from .ism import IsmFD from .external import ( get_external_downloader, FFmpegFD, @@ -24,6 +25,7 @@ PROTOCOL_MAP = { 'rtsp': RtspFD, 'f4m': F4mFD, 'http_dash_segments': DashSegmentsFD, + 'ism': IsmFD, } diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py new file mode 100644 index 000000000..9f8f14b66 --- /dev/null +++ b/youtube_dl/downloader/ism.py @@ -0,0 +1,273 @@ +from __future__ import unicode_literals + +import os +import time +import struct +import binascii +import io + +from .fragment import FragmentFD +from ..compat import compat_urllib_error +from ..utils import ( + sanitize_open, + encodeFilename, +) + + +u8 = struct.Struct(b'>B') +u88 = struct.Struct(b'>Bx') +u16 = struct.Struct(b'>H') +u1616 = struct.Struct(b'>Hxx') +u32 = struct.Struct(b'>I') +u64 = struct.Struct(b'>Q') + +s88 = struct.Struct(b'>bx') +s16 = struct.Struct(b'>h') +s1616 = struct.Struct(b'>hxx') +s32 = struct.Struct(b'>i') + +unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) + +TRACK_ENABLED = 0x1 +TRACK_IN_MOVIE = 0x2 +TRACK_IN_PREVIEW = 0x4 + +SELF_CONTAINED = 0x1 + + +def box(box_type, payload): + return u32.pack(8 + len(payload)) + box_type + payload + + +def full_box(box_type, version, flags, payload): + return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload) + + +def write_piff_header(stream, params): + track_id = params['track_id'] + fourcc = params['fourcc'] + duration = params['duration'] + timescale = params.get('timescale', 10000000) + language = params.get('language', 'und') + height = params.get('height', 0) + width = params.get('width', 0) + is_audio = width == 0 and height == 0 + creation_time = modification_time = int(time.time()) + + ftyp_payload = b'isml' # major brand + ftyp_payload += u32.pack(1) # minor version + ftyp_payload += b'piff' + b'iso2' # compatible brands + stream.write(box(b'ftyp', ftyp_payload)) # File Type Box + + mvhd_payload = u64.pack(creation_time) + mvhd_payload += u64.pack(modification_time) + mvhd_payload += u32.pack(timescale) + mvhd_payload += u64.pack(duration) + mvhd_payload += s1616.pack(1) # rate + mvhd_payload += s88.pack(1) # volume + mvhd_payload += u16.pack(0) # reserved + mvhd_payload += u32.pack(0) * 2 # reserved + mvhd_payload += unity_matrix + mvhd_payload += u32.pack(0) * 6 # pre defined + mvhd_payload += u32.pack(0xffffffff) # next track id + moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box + + tkhd_payload = u64.pack(creation_time) + tkhd_payload += u64.pack(modification_time) + tkhd_payload += u32.pack(track_id) # track id + tkhd_payload += u32.pack(0) # reserved + tkhd_payload += u64.pack(duration) + tkhd_payload += u32.pack(0) * 2 # reserved + tkhd_payload += s16.pack(0) # layer + tkhd_payload += s16.pack(0) # alternate group + tkhd_payload += s88.pack(1 if is_audio else 0) # volume + tkhd_payload += u16.pack(0) # reserved + tkhd_payload += unity_matrix + tkhd_payload += u1616.pack(width) + tkhd_payload += u1616.pack(height) + trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box + + mdhd_payload = u64.pack(creation_time) + mdhd_payload += u64.pack(modification_time) + mdhd_payload += u32.pack(timescale) + mdhd_payload += u64.pack(duration) + mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60)) + mdhd_payload += u16.pack(0) # pre defined + mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box + + hdlr_payload = u32.pack(0) # pre defined + hdlr_payload += b'soun' if is_audio else b'vide' # handler type + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name + mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box + + if is_audio: + smhd_payload = s88.pack(0) # balance + smhd_payload = u16.pack(0) # reserved + media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header + else: + vmhd_payload = u16.pack(0) # graphics mode + vmhd_payload += u16.pack(0) * 3 # opcolor + media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header + minf_payload = media_header_box + + dref_payload = u32.pack(1) # entry count + dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box + dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box + minf_payload += box(b'dinf', dinf_payload) # Data Information Box + + stsd_payload = u32.pack(1) # entry count + + sample_entry_payload = u8.pack(0) * 6 # reserved + sample_entry_payload += u16.pack(1) # data reference index + if is_audio: + sample_entry_payload += u32.pack(0) * 2 # reserved + sample_entry_payload += u16.pack(params.get('channels', 2)) + sample_entry_payload += u16.pack(params.get('bits_per_sample', 16)) + sample_entry_payload += u16.pack(0) # pre defined + sample_entry_payload += u16.pack(0) # reserved + sample_entry_payload += u1616.pack(params['sampling_rate']) + + if fourcc == 'AACL': + smaple_entry_box = box(b'mp4a', sample_entry_payload) + else: + sample_entry_payload = sample_entry_payload + sample_entry_payload += u16.pack(0) # pre defined + sample_entry_payload += u16.pack(0) # reserved + sample_entry_payload += u32.pack(0) * 3 # pre defined + sample_entry_payload += u16.pack(width) + sample_entry_payload += u16.pack(height) + sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi + sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi + sample_entry_payload += u32.pack(0) # reserved + sample_entry_payload += u16.pack(1) # frame count + sample_entry_payload += u8.pack(0) * 32 # compressor name + sample_entry_payload += u16.pack(0x18) # depth + sample_entry_payload += s16.pack(-1) # pre defined + + codec_private_data = binascii.unhexlify(params['codec_private_data']) + if fourcc in ('H264', 'AVC1'): + sps, pps = codec_private_data.split(u32.pack(1))[1:] + avcc_payload = u8.pack(1) # configuration version + avcc_payload += sps[1] # avc profile indication + avcc_payload += sps[2] # profile compatibility + avcc_payload += sps[3] # avc level indication + avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one + avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001) + avcc_payload += u16.pack(len(sps)) + avcc_payload += sps + avcc_payload += u8.pack(1) # number of pps + avcc_payload += u16.pack(len(pps)) + avcc_payload += pps + sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record + smaple_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry + stsd_payload += smaple_entry_box + + stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box + + stts_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box + + stsc_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box + + stco_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box + + minf_payload += box(b'stbl', stbl_payload) # Sample Table Box + + mdia_payload += box(b'minf', minf_payload) # Media Information Box + + trak_payload += box(b'mdia', mdia_payload) # Media Box + + moov_payload += box(b'trak', trak_payload) # Track Box + + mehd_payload = u64.pack(duration) + mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box + + trex_payload = u32.pack(track_id) # track id + trex_payload += u32.pack(1) # default sample description index + trex_payload += u32.pack(0) # default sample duration + trex_payload += u32.pack(0) # default sample size + trex_payload += u32.pack(0) # default sample flags + mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box + + moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box + stream.write(box(b'moov', moov_payload)) # Movie Box + + +def extract_box_data(data, box_sequence): + data_reader = io.BytesIO(data) + while True: + box_size = u32.unpack(data_reader.read(4))[0] + box_type = data_reader.read(4) + if box_type == box_sequence[0]: + box_data = data_reader.read(box_size - 8) + if len(box_sequence) == 1: + return box_data + return extract_box_data(box_data, box_sequence[1:]) + data_reader.seek(box_size - 8, 1) + + +class IsmFD(FragmentFD): + """ + Download segments in a ISM manifest + """ + + FD_NAME = 'ism' + + def real_download(self, filename, info_dict): + segments = info_dict['fragments'][:1] if self.params.get( + 'test', False) else info_dict['fragments'] + + ctx = { + 'filename': filename, + 'total_frags': len(segments), + } + + self._prepare_and_start_frag_download(ctx) + + segments_filenames = [] + + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + + track_written = False + for i, segment in enumerate(segments): + segment_url = segment['url'] + segment_name = 'Frag%d' % i + target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name) + count = 0 + while count <= fragment_retries: + try: + success = ctx['dl'].download(target_filename, {'url': segment_url}) + if not success: + return False + down, target_sanitized = sanitize_open(target_filename, 'rb') + down_data = down.read() + if not track_written: + tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd']) + info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] + write_piff_header(ctx['dest_stream'], info_dict['_download_params']) + track_written = True + ctx['dest_stream'].write(down_data) + down.close() + segments_filenames.append(target_sanitized) + break + except compat_urllib_error.HTTPError as err: + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(err, segment_name, count, fragment_retries) + if count > fragment_retries: + if skip_unavailable_fragments: + self.report_skip_fragment(segment_name) + continue + self.report_error('giving up after %s fragment retries' % fragment_retries) + return False + + self._finish_frag_download(ctx) + + for segment_file in segments_filenames: + os.remove(encodeFilename(segment_file)) + + return True diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 68b325fca..2e9f05ae3 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1780,6 +1780,107 @@ class InfoExtractor(object): self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats + def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True): + res = self._download_webpage_handle( + ism_url, video_id, + note=note or 'Downloading ISM manifest', + errnote=errnote or 'Failed to download ISM manifest', + fatal=fatal) + if res is False: + return [] + ism, urlh = res + + return self._parse_ism_formats( + compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id) + + def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None): + if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None: + return [] + + ism_base_url = re.match(r'https?://.+/', ism_url).group() + + duration = int(ism_doc.attrib['Duration']) + timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000 + + formats = [] + for stream in ism_doc.findall('StreamIndex'): + stream_type = stream.get('Type') + if stream_type not in ('video', 'audio'): + continue + url_pattern = stream.attrib['Url'] + stream_timescale = int_or_none(stream.get('TimeScale')) or timescale + stream_name = stream.get('Name') + for track in stream.findall('QualityLevel'): + fourcc = track.get('FourCC') + # TODO: add support for WVC1 and WMAP + if fourcc not in ('H264', 'AVC1', 'AACL'): + self.report_warning('%s is not a supported codec' % fourcc) + continue + tbr = int(track.attrib['Bitrate']) // 1000 + width = int_or_none(track.get('MaxWidth')) + height = int_or_none(track.get('MaxHeight')) + sampling_rate = int_or_none(track.get('SamplingRate')) + + track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern) + track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern) + + fragments = [] + fragment_ctx = { + 'time': 0, + } + stream_fragments = stream.findall('c') + for stream_fragment_index, stream_fragment in enumerate(stream_fragments): + fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time'] + fragment_repeat = int_or_none(stream_fragment.get('r')) or 1 + fragment_ctx['duration'] = int_or_none(stream_fragment.get('d')) + if not fragment_ctx['duration']: + try: + next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t']) + except IndexError: + next_fragment_time = duration + fragment_ctx['duration'] = (next_fragment_time - frgament_time) / fragment_repeat + for _ in range(fragment_repeat): + fragments.append({ + 'url': re.sub(r'{start[ _]time}', str(fragment_ctx['time']), track_url_pattern), + 'duration': fragment_ctx['duration'] / stream_timescale, + }) + fragment_ctx['time'] += fragment_ctx['duration'] + + format_id = [] + if ism_id: + format_id.append(ism_id) + if stream_name: + format_id.append(stream_name) + format_id.append(compat_str(tbr)) + + formats.append({ + 'format_id': '-'.join(format_id), + 'url': ism_url, + 'manifest_url': ism_url, + 'ext': 'ismv' if stream_type == 'video' else 'isma', + 'width': width, + 'height': height, + 'tbr': tbr, + 'asr': sampling_rate, + 'vcodec': 'none' if stream_type == 'audio' else fourcc, + 'acodec': 'none' if stream_type == 'video' else fourcc, + 'protocol': 'ism', + 'fragments': fragments, + '_download_params': { + 'duration': duration, + 'timescale': stream_timescale, + 'width': width or 0, + 'height': height or 0, + 'fourcc': fourcc, + 'codec_private_data': track.get('CodecPrivateData'), + 'sampling_rate': sampling_rate, + 'channels': int_or_none(track.get('Channels', 2)), + 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)), + 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)), + }, + }) + return formats + def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'): def absolute_url(video_url): return compat_urlparse.urljoin(base_url, video_url) From 639e3b5c9985aacf7c0dc018c211a78161bbafd2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 19 Oct 2016 16:24:43 +0100 Subject: [PATCH 39/86] extract ISM formats in some of the extractors --- youtube_dl/extractor/microsoftvirtualacademy.py | 7 +++++-- youtube_dl/extractor/msn.py | 5 ++--- youtube_dl/extractor/onet.py | 4 ++-- youtube_dl/extractor/tvp.py | 3 +++ 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/microsoftvirtualacademy.py b/youtube_dl/extractor/microsoftvirtualacademy.py index afd3e98ec..8e0aee0e6 100644 --- a/youtube_dl/extractor/microsoftvirtualacademy.py +++ b/youtube_dl/extractor/microsoftvirtualacademy.py @@ -71,12 +71,15 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): formats = [] for sources in settings.findall(compat_xpath('.//MediaSources')): - if sources.get('videoType') == 'smoothstreaming': - continue + sources_type = sources.get('videoType') for source in sources.findall(compat_xpath('./MediaSource')): video_url = source.text if not video_url or not video_url.startswith('http'): continue + if sources_type == 'smoothstreaming': + formats.extend(self._extract_ism_formats( + video_url, video_id, 'mss', fatal=False)) + continue video_mode = source.get('videoMode') height = int_or_none(self._search_regex( r'^(\d+)[pP]$', video_mode or '', 'height', default=None)) diff --git a/youtube_dl/extractor/msn.py b/youtube_dl/extractor/msn.py index 1ec8e0f50..d75ce8b3b 100644 --- a/youtube_dl/extractor/msn.py +++ b/youtube_dl/extractor/msn.py @@ -69,10 +69,9 @@ class MSNIE(InfoExtractor): if not format_url: continue ext = determine_ext(format_url) - # .ism is not yet supported (see - # https://github.com/rg3/youtube-dl/issues/8118) if ext == 'ism': - continue + formats.extend(self._extract_ism_formats( + format_url + '/Manifest', display_id, 'mss', fatal=False)) if 'm3u8' in format_url: # m3u8_native should not be used here until # https://github.com/rg3/youtube-dl/issues/9913 is fixed diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 9cbc7c2e2..0a501b3e5 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -56,8 +56,8 @@ class OnetBaseIE(InfoExtractor): continue ext = determine_ext(video_url) if format_id == 'ism': - # TODO: Support Microsoft Smooth Streaming - continue + formats.extend(self._extract_ism_formats( + video_url, video_id, 'mss', fatal=False)) elif ext == 'mpd': formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index 2dbbc2ca7..06ea2b40a 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -139,6 +139,9 @@ class TVPEmbedIE(InfoExtractor): # formats.extend(self._extract_mpd_formats( # video_url_base + '.ism/video.mpd', # video_id, mpd_id='dash', fatal=False)) + formats.extend(self._extract_ism_formats( + video_url_base + '.ism/Manifest', + video_id, 'mss', fatal=False)) formats.extend(self._extract_f4m_formats( video_url_base + '.ism/video.f4m', video_id, f4m_id='hds', fatal=False)) From 02dc0a36b72b7312996d59b9ec96768f925cb4a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 02:14:01 +0700 Subject: [PATCH 40/86] [utils] Introduce base_url --- test/test_utils.py | 8 ++++++++ youtube_dl/extractor/common.py | 5 +++-- youtube_dl/utils.py | 4 ++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index b1b2effca..cb75ca53e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -69,6 +69,7 @@ from youtube_dl.utils import ( uppercase_escape, lowercase_escape, url_basename, + base_url, urlencode_postdata, urshift, update_url_query, @@ -437,6 +438,13 @@ class TestUtil(unittest.TestCase): url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), 'trailer.mp4') + def test_base_url(self): + self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/') + self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/') + self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/') + self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/') + self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/') + def test_parse_age_limit(self): self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(False), None) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2e9f05ae3..140ccf234 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -30,6 +30,7 @@ from ..downloader.f4m import remove_encrypted_media from ..utils import ( NO_DEFAULT, age_restricted, + base_url, bug_reports_message, clean_html, compiled_regex_type, @@ -1539,7 +1540,7 @@ class InfoExtractor(object): if res is False: return [] mpd, urlh = res - mpd_base_url = re.match(r'https?://[^?#&]+/', urlh.geturl()).group() + mpd_base_url = base_url(urlh.geturl()) return self._parse_mpd_formats( compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, @@ -1797,7 +1798,7 @@ class InfoExtractor(object): if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None: return [] - ism_base_url = re.match(r'https?://.+/', ism_url).group() + ism_base_url = base_url(ism_url) duration = int(ism_doc.attrib['Duration']) timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000 diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1a5ce8688..9595bcf9f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1691,6 +1691,10 @@ def url_basename(url): return path.strip('/').split('/')[-1] +def base_url(url): + return re.match(r'https?://[^?#&]+/', url).group() + + class HEADRequest(compat_urllib_request.Request): def get_method(self): return 'HEAD' From 1616f9b4525b8db229c162c21681e3c75abe4ce3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 02:21:43 +0700 Subject: [PATCH 41/86] [extractor/common] Fix typo --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 140ccf234..7e01c5fbb 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1839,10 +1839,10 @@ class InfoExtractor(object): next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t']) except IndexError: next_fragment_time = duration - fragment_ctx['duration'] = (next_fragment_time - frgament_time) / fragment_repeat + fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat for _ in range(fragment_repeat): fragments.append({ - 'url': re.sub(r'{start[ _]time}', str(fragment_ctx['time']), track_url_pattern), + 'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern), 'duration': fragment_ctx['duration'] / stream_timescale, }) fragment_ctx['time'] += fragment_ctx['duration'] From a18aeee8030a8c4eb4c69107d181195b17d08fa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 02:33:17 +0700 Subject: [PATCH 42/86] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index f36e04d13..6f2a946d1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Core ++ Add basic support for Smooth Streaming protocol (#8118, #10969) +* Improve MPD manifest base URL extraction (#10909, #11079) +* Fix --match-filter for int-like strings (#11082) + +Extractors ++ [mva] Add support for ISM formats ++ [msn] Add support for ISM formats ++ [onet] Add support for ISM formats ++ [tvp] Add support for ISM formats ++ [nicknight] Add support for nicknight sites (#10769) + + version 2016.10.30 Extractors From 3365ea8929e53955fa1dd46b2b30492619c17055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 02:34:23 +0700 Subject: [PATCH 43/86] [extractor/common] Remove unused code --- youtube_dl/extractor/common.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7e01c5fbb..50841f0cf 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1798,8 +1798,6 @@ class InfoExtractor(object): if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None: return [] - ism_base_url = base_url(ism_url) - duration = int(ism_doc.attrib['Duration']) timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000 From 8956d6608a02f8745a8d619b0f697a95e98981c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 02:39:36 +0700 Subject: [PATCH 44/86] release 2016.11.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 3e020524b..975ea8700 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.31** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.10.31 +[debug] youtube-dl version 2016.11.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 6f2a946d1..c80828512 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.02 Core + Add basic support for Smooth Streaming protocol (#8118, #10969) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e04fb6be2..7ed6b9006 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -483,6 +483,7 @@ - **nhl.com:videocenter:category**: NHL videocenter category - **nick.com** - **nick.de** + - **nicknight** - **niconico**: ニコニコ動画 - **NiconicoPlaylist** - **Nintendo** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 16274c22d..7cdd94f29 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.10.31' +__version__ = '2016.11.02' From cc99a77ac1c5fb5859d75589d49bb25361a1fb2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 03:01:13 +0700 Subject: [PATCH 45/86] [extractor/generic] Add support for ISM manifests --- ChangeLog | 6 ++++++ youtube_dl/extractor/generic.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/ChangeLog b/ChangeLog index c80828512..ec26e0c8d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [generic] Add support for ISM manifests + + version 2016.11.02 Core diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 15d1c0225..fc3d01eed 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1634,6 +1634,10 @@ class GenericIE(InfoExtractor): doc = compat_etree_fromstring(webpage.encode('utf-8')) if doc.tag == 'rss': return self._extract_rss(url, video_id, doc) + elif doc.tag == 'SmoothStreamingMedia': + info_dict['formats'] = self._parse_ism_formats(doc, url) + self._sort_formats(info_dict['formats']) + return info_dict elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): smil = self._parse_smil(doc, url, video_id) self._sort_formats(smil['formats']) @@ -2449,6 +2453,8 @@ class GenericIE(InfoExtractor): entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id) elif ext == 'f4m': entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id) + elif re.search(r'(?i)\.ism/manifest', video_url): + entry_info_dict['formats'] = self._extract_ism_formats(video_url, video_id) else: entry_info_dict['url'] = video_url From 4f9cd4d36fa88758cdff822f03879c6e0b6aa42d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 2 Nov 2016 13:55:40 +0100 Subject: [PATCH 46/86] [radiocanada] extract subtitle(closes #11096) --- youtube_dl/extractor/radiocanada.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 6751270ee..321917ad0 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -125,6 +125,14 @@ class RadioCanadaIE(InfoExtractor): f4m_id='hds', fatal=False)) self._sort_formats(formats) + subtitles = {} + closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5') + if closed_caption_url: + subtitles['fr'] = [{ + 'url': closed_caption_url, + 'ext': determine_ext(closed_caption_url, 'vtt'), + }] + return { 'id': video_id, 'title': get_meta('Title'), @@ -135,6 +143,7 @@ class RadioCanadaIE(InfoExtractor): 'season_number': int_or_none('SrcSaison'), 'episode_number': int_or_none('SrcEpisode'), 'upload_date': unified_strdate(get_meta('Date')), + 'subtitles': subtitles, 'formats': formats, } From 26aae566902251f9674593a2b0f0ca7477b96a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 23:34:37 +0700 Subject: [PATCH 47/86] [extractor/generic] Improve ISM extraction --- youtube_dl/extractor/generic.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index fc3d01eed..0bb263ce7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2453,8 +2453,21 @@ class GenericIE(InfoExtractor): entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id) elif ext == 'f4m': entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id) - elif re.search(r'(?i)\.ism/manifest', video_url): - entry_info_dict['formats'] = self._extract_ism_formats(video_url, video_id) + elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url): + # Just matching .ism/manifest is not enough to be reliably sure + # whether it's actually an ISM manifest or some other streaming + # manifest since there are various streaming URL formats + # possible (see [1]) as well as some other shenanigans like + # .smil/manifest URLs that actually serve an ISM (see [2]) and + # so on. + # Thus the most reasonable way to solve this is to delegate + # to generic extractor in order to look into the contents of + # the manifest itself. + # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats + # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest + entry_info_dict = self.url_result( + smuggle_url(video_url, {'to_generic': True}), + GenericIE.ie_key()) else: entry_info_dict['url'] = video_url From 4119a96ce57b11437efd329a8c2602ee7fa7ea2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 23:43:41 +0700 Subject: [PATCH 48/86] [extractor/generic] Skip URLs we came from when delegating ISM extraction --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0bb263ce7..a0a45dce0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2453,7 +2453,7 @@ class GenericIE(InfoExtractor): entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id) elif ext == 'f4m': entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id) - elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url): + elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url: # Just matching .ism/manifest is not enough to be reliably sure # whether it's actually an ISM manifest or some other streaming # manifest since there are various streaming URL formats From 3b4b66b50c6605a7c878364e023776aa8ac7b8b8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 3 Nov 2016 00:43:33 +0100 Subject: [PATCH 49/86] [shahid] add support for authentication(closes #11091) --- youtube_dl/extractor/shahid.py | 74 +++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index ca286abb1..62d41e88a 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -1,17 +1,24 @@ # coding: utf-8 from __future__ import unicode_literals +import re +import json + from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( ExtractorError, int_or_none, parse_iso8601, str_or_none, + urlencode_postdata, + clean_html, ) class ShahidIE(InfoExtractor): - _VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P\d+)/?' + _NETRC_MACHINE = 'shahid' + _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?Pepisode|movie)/(?P\d+)' _TESTS = [{ 'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html', 'info_dict': { @@ -27,18 +34,54 @@ class ShahidIE(InfoExtractor): # m3u8 download 'skip_download': True, } + }, { + 'url': 'https://shahid.mbc.net/ar/movie/151746/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9.html', + 'only_matching': True }, { # shahid plus subscriber only 'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html', 'only_matching': True }] - def _call_api(self, path, video_id, note): - data = self._download_json( - 'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={ - 'apiKey': 'sh@hid0nlin3', - 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', - }).get('data', {}) + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + return + + try: + user_data = self._download_json( + 'https://shahid.mbc.net/wd/service/users/login', + None, 'Logging in', data=json.dumps({ + 'email': email, + 'password': password, + 'basic': 'false', + }).encode('utf-8'), headers={ + 'Content-Type': 'application/json; charset=UTF-8', + })['user'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + fail_data = self._parse_json( + e.cause.read().decode('utf-8'), None, fatal=False) + if fail_data: + faults = fail_data.get('faults', []) + faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')]) + if faults_message: + raise ExtractorError(faults_message, expected=True) + raise + + self._download_webpage( + 'https://shahid.mbc.net/populateContext', + None, 'Populate Context', data=urlencode_postdata({ + 'firstName': user_data['firstName'], + 'lastName': user_data['lastName'], + 'userName': user_data['email'], + 'csg_user_name': user_data['email'], + 'subscriberId': user_data['id'], + 'sessionId': user_data['sessionId'], + })) + + def _get_api_data(self, response): + data = response.get('data', {}) error = data.get('error') if error: @@ -49,11 +92,11 @@ class ShahidIE(InfoExtractor): return data def _real_extract(self, url): - video_id = self._match_id(url) + page_type, video_id = re.match(self._VALID_URL, url).groups() - player = self._call_api( - 'Content/Episode/%s' % video_id, - video_id, 'Downloading player JSON') + player = self._get_api_data(self._download_json( + 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-player.html' % video_id, + video_id, 'Downloading player JSON')) if player.get('drm'): raise ExtractorError('This video is DRM protected.', expected=True) @@ -61,9 +104,12 @@ class ShahidIE(InfoExtractor): formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') self._sort_formats(formats) - video = self._call_api( - 'episode/%s' % video_id, video_id, - 'Downloading video JSON')['episode'] + video = self._get_api_data(self._download_json( + 'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id), + video_id, 'Downloading video JSON', query={ + 'apiKey': 'sh@hid0nlin3', + 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', + }))[page_type] title = video['title'] categories = [ From f4dfa9a5ed0756c882a947bd455cc2488d51ffd7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 3 Nov 2016 09:04:20 +0100 Subject: [PATCH 50/86] [tubitv] fix extraction(closes #11061) --- youtube_dl/extractor/tubitv.py | 39 ++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index c6572defb..3a37df2e8 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -9,7 +9,6 @@ from ..utils import ( int_or_none, sanitized_Request, urlencode_postdata, - parse_iso8601, ) @@ -19,17 +18,13 @@ class TubiTvIE(InfoExtractor): _NETRC_MACHINE = 'tubitv' _TEST = { 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday', + 'md5': '43ac06be9326f41912dc64ccf7a80320', 'info_dict': { 'id': '283829', 'ext': 'mp4', 'title': 'The Comedian at The Friday', 'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.', - 'uploader': 'Indie Rights Films', - 'upload_date': '20160111', - 'timestamp': 1452555979, - }, - 'params': { - 'skip_download': 'HLS download', + 'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434', }, } @@ -58,19 +53,28 @@ class TubiTvIE(InfoExtractor): video_id = self._match_id(url) video_data = self._download_json( 'http://tubitv.com/oz/videos/%s/content' % video_id, video_id) - title = video_data['n'] + title = video_data['title'] formats = self._extract_m3u8_formats( - video_data['mh'], video_id, 'mp4', 'm3u8_native') + self._proto_relative_url(video_data['url']), + video_id, 'mp4', 'm3u8_native') self._sort_formats(formats) + thumbnails = [] + for thumbnail_url in video_data.get('thumbnails', []): + if not thumbnail_url: + continue + thumbnails.append({ + 'url': self._proto_relative_url(thumbnail_url), + }) + subtitles = {} - for sub in video_data.get('sb', []): - sub_url = sub.get('u') + for sub in video_data.get('subtitles', []): + sub_url = sub.get('url') if not sub_url: continue - subtitles.setdefault(sub.get('l', 'en'), []).append({ - 'url': sub_url, + subtitles.setdefault(sub.get('lang', 'English'), []).append({ + 'url': self._proto_relative_url(sub_url), }) return { @@ -78,9 +82,8 @@ class TubiTvIE(InfoExtractor): 'title': title, 'formats': formats, 'subtitles': subtitles, - 'thumbnail': video_data.get('ph'), - 'description': video_data.get('d'), - 'duration': int_or_none(video_data.get('s')), - 'timestamp': parse_iso8601(video_data.get('u')), - 'uploader': video_data.get('on'), + 'thumbnails': thumbnails, + 'description': video_data.get('description'), + 'duration': int_or_none(video_data.get('duration')), + 'uploader_id': video_data.get('publisher_id'), } From b811b4c93bd21c5a053f7deddd3c0a3fe2486184 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 3 Nov 2016 10:37:07 +0100 Subject: [PATCH 51/86] [vice] add support for uplynk preplay videos(#11101) --- youtube_dl/extractor/vice.py | 124 +++++++++++++++++++++++++++---- youtube_dl/extractor/viceland.py | 82 +------------------- 2 files changed, 114 insertions(+), 92 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index e2b2ce098..065c1fa82 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -1,12 +1,92 @@ from __future__ import unicode_literals import re +import time +import hashlib +import json +from .adobepass import AdobePassIE from .common import InfoExtractor -from ..utils import ExtractorError +from ..compat import compat_HTTPError +from ..utils import ( + int_or_none, + parse_age_limit, + str_or_none, + parse_duration, + ExtractorError, + extract_attributes, +) -class ViceIE(InfoExtractor): +class ViceBaseIE(AdobePassIE): + def _extract_preplay_video(self, url, webpage): + watch_hub_data = extract_attributes(self._search_regex( + r'(?s)()', webpage, 'watch hub')) + video_id = watch_hub_data['vms-id'] + title = watch_hub_data['video-title'] + + query = {} + is_locked = watch_hub_data.get('video-locked') == '1' + if is_locked: + resource = self._get_mvpd_resource( + 'VICELAND', title, video_id, + watch_hub_data.get('video-rating')) + query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource) + + # signature generation algorithm is reverse engineered from signatureGenerator in + # webpack:///../shared/~/vice-player/dist/js/vice-player.js in + # https://www.viceland.com/assets/common/js/web.vendor.bundle.js + exp = int(time.time()) + 14400 + query.update({ + 'exp': exp, + 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(), + }) + + try: + host = 'www.viceland' if is_locked else self._PREPLAY_HOST + preplay = self._download_json('https://%s.com/en_us/preplay/%s' % (host, video_id), video_id, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + error = json.loads(e.cause.read().decode()) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True) + raise + + video_data = preplay['video'] + base = video_data['base'] + uplynk_preplay_url = preplay['preplayURL'] + episode = video_data.get('episode', {}) + channel = video_data.get('channel', {}) + + subtitles = {} + cc_url = preplay.get('ccURL') + if cc_url: + subtitles['en'] = [{ + 'url': cc_url, + }] + + return { + '_type': 'url_transparent', + 'url': uplynk_preplay_url, + 'id': video_id, + 'title': title, + 'description': base.get('body'), + 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), + 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')), + 'timestamp': int_or_none(video_data.get('created_at')), + 'age_limit': parse_age_limit(video_data.get('video_rating')), + 'series': video_data.get('show_title') or watch_hub_data.get('show-title'), + 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')), + 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')), + 'season_number': int_or_none(watch_hub_data.get('season')), + 'season_id': str_or_none(episode.get('season_id')), + 'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'), + 'uploader_id': str_or_none(channel.get('id')), + 'subtitles': subtitles, + 'ie_key': 'UplynkPreplay', + } + + +class ViceIE(ViceBaseIE): _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P[^/?#&]+)' _TESTS = [{ @@ -21,7 +101,7 @@ class ViceIE(InfoExtractor): 'add_ie': ['Ooyala'], }, { 'url': 'http://www.vice.com/video/how-to-hack-a-car', - 'md5': '6fb2989a3fed069fb8eab3401fc2d3c9', + 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', 'info_dict': { 'id': '3jstaBeXgAs', 'ext': 'mp4', @@ -32,6 +112,22 @@ class ViceIE(InfoExtractor): 'upload_date': '20140529', }, 'add_ie': ['Youtube'], + }, { + 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56', + 'md5': '', + 'info_dict': { + 'id': '5816510690b70e6c5fd39a56', + 'ext': 'mp4', + 'uploader': 'Waypoint', + 'title': 'The Signal From Tölva', + 'uploader_id': '57f7d621e05ca860fa9ccaf9', + 'timestamp': 1477941983938, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': ['UplynkPreplay'], }, { 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', 'only_matching': True, @@ -42,21 +138,21 @@ class ViceIE(InfoExtractor): 'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show', 'only_matching': True, }] + _PREPLAY_HOST = 'video.vice' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - try: - embed_code = self._search_regex( - r'embedCode=([^&\'"]+)', webpage, - 'ooyala embed code', default=None) - if embed_code: - return self.url_result('ooyala:%s' % embed_code, 'Ooyala') - youtube_id = self._search_regex( - r'data-youtube-id="([^"]+)"', webpage, 'youtube id') + webpage, urlh = self._download_webpage_handle(url, video_id) + embed_code = self._search_regex( + r'embedCode=([^&\'"]+)', webpage, + 'ooyala embed code', default=None) + if embed_code: + return self.url_result('ooyala:%s' % embed_code, 'Ooyala') + youtube_id = self._search_regex( + r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None) + if youtube_id: return self.url_result(youtube_id, 'Youtube') - except ExtractorError: - raise ExtractorError('The page doesn\'t contain a video', expected=True) + return self._extract_preplay_video(urlh.geturl(), webpage) class ViceShowIE(InfoExtractor): diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index 8742b607a..0eff055a6 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -1,23 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import time -import hashlib -import json - -from .adobepass import AdobePassIE -from ..compat import compat_HTTPError -from ..utils import ( - int_or_none, - parse_age_limit, - str_or_none, - parse_duration, - ExtractorError, - extract_attributes, -) +from .vice import ViceBaseIE -class VicelandIE(AdobePassIE): +class VicelandIE(ViceBaseIE): _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P[a-f0-9]+)' _TEST = { 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e', @@ -38,70 +25,9 @@ class VicelandIE(AdobePassIE): }, 'add_ie': ['UplynkPreplay'], } + _PREPLAY_HOST = 'www.viceland' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - watch_hub_data = extract_attributes(self._search_regex( - r'(?s)()', webpage, 'watch hub')) - video_id = watch_hub_data['vms-id'] - title = watch_hub_data['video-title'] - - query = {} - if watch_hub_data.get('video-locked') == '1': - resource = self._get_mvpd_resource( - 'VICELAND', title, video_id, - watch_hub_data.get('video-rating')) - query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource) - - # signature generation algorithm is reverse engineered from signatureGenerator in - # webpack:///../shared/~/vice-player/dist/js/vice-player.js in - # https://www.viceland.com/assets/common/js/web.vendor.bundle.js - exp = int(time.time()) + 14400 - query.update({ - 'exp': exp, - 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(), - }) - - try: - preplay = self._download_json('https://www.viceland.com/en_us/preplay/%s' % video_id, video_id, query=query) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - error = json.loads(e.cause.read().decode()) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True) - raise - - video_data = preplay['video'] - base = video_data['base'] - uplynk_preplay_url = preplay['preplayURL'] - episode = video_data.get('episode', {}) - channel = video_data.get('channel', {}) - - subtitles = {} - cc_url = preplay.get('ccURL') - if cc_url: - subtitles['en'] = [{ - 'url': cc_url, - }] - - return { - '_type': 'url_transparent', - 'url': uplynk_preplay_url, - 'id': video_id, - 'title': title, - 'description': base.get('body'), - 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), - 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')), - 'timestamp': int_or_none(video_data.get('created_at')), - 'age_limit': parse_age_limit(video_data.get('video_rating')), - 'series': video_data.get('show_title') or watch_hub_data.get('show-title'), - 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')), - 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')), - 'season_number': int_or_none(watch_hub_data.get('season')), - 'season_id': str_or_none(episode.get('season_id')), - 'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'), - 'uploader_id': str_or_none(channel.get('id')), - 'subtitles': subtitles, - 'ie_key': 'UplynkPreplay', - } + return self._extract_preplay_video(url, webpage) From 3a86b2c51e508c11502fcf7b74c470c68fd5bcd6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 3 Nov 2016 18:55:55 +0800 Subject: [PATCH 52/86] Ignore and clean .wav files --- .gitignore | 1 + Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 002b700f5..354505d66 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,7 @@ updates_key.pem *.m4v *.mp3 *.3gp +*.wav *.part *.swp test/testdata diff --git a/Makefile b/Makefile index 8d66e48c9..b7cec1666 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From b47ecd0b74010cdd4b16049c33ef048b72cf6207 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 3 Nov 2016 12:50:25 +0100 Subject: [PATCH 53/86] [vzaar] Add new extractor(closes #11093) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vzaar.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/vzaar.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f30ac5aaf..499239a22 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1101,6 +1101,7 @@ from .vrt import VRTIE from .vube import VubeIE from .vuclip import VuClipIE from .vyborymos import VyboryMosIE +from .vzaar import VzaarIE from .walla import WallaIE from .washingtonpost import ( WashingtonPostIE, diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py new file mode 100644 index 000000000..b270f08d1 --- /dev/null +++ b/youtube_dl/extractor/vzaar.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + float_or_none, +) + + +class VzaarIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P\d+)' + _TESTS = [{ + 'url': 'https://vzaar.com/videos/1152805', + 'md5': 'bde5ddfeb104a6c56a93a06b04901dbf', + 'info_dict': { + 'id': '1152805', + 'ext': 'mp4', + 'title': 'sample video (public)', + }, + }, { + 'url': 'https://view.vzaar.com/27272/player', + 'md5': '3b50012ac9bbce7f445550d54e0508f2', + 'info_dict': { + 'id': '27272', + 'ext': 'mp3', + 'title': 'MP3', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + 'http://view.vzaar.com/v2/%s/video' % video_id, video_id) + source_url = video_data['sourceUrl'] + + info = { + 'id': video_id, + 'title': video_data['videoTitle'], + 'url': source_url, + 'thumbnail': self._proto_relative_url(video_data.get('poster')), + 'duration': float_or_none(video_data.get('videoDuration')), + } + if 'audio' in source_url: + info.update({ + 'vcodec': 'none', + 'ext': 'mp3', + }) + else: + info.update({ + 'width': int_or_none(video_data.get('width')), + 'height': int_or_none(video_data.get('height')), + 'ext': 'mp4', + }) + return info From 22979993e7afd0b8792011ead3d64d3945703ce8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 3 Nov 2016 16:07:22 +0100 Subject: [PATCH 54/86] [vice] add coding cookie --- youtube_dl/extractor/vice.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 065c1fa82..8a00c8fee 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import re From 10380e55def817f568b3e1b8e16a2133ded19124 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 3 Nov 2016 16:08:57 +0100 Subject: [PATCH 55/86] [downloader/ism] fix AVC Decoder Configuration Record creation in python 3 --- youtube_dl/downloader/ism.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 9f8f14b66..53f319cae 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -149,9 +149,7 @@ def write_piff_header(stream, params): if fourcc in ('H264', 'AVC1'): sps, pps = codec_private_data.split(u32.pack(1))[1:] avcc_payload = u8.pack(1) # configuration version - avcc_payload += sps[1] # avc profile indication - avcc_payload += sps[2] # profile compatibility - avcc_payload += sps[3] # avc level indication + avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001) avcc_payload += u16.pack(len(sps)) From 9d64e1dcdc610491217a3197f97e5bd8120d1974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 3 Nov 2016 22:15:09 +0700 Subject: [PATCH 56/86] [downloader/ism] Fix typo --- youtube_dl/downloader/ism.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 53f319cae..93cac5e98 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -129,7 +129,7 @@ def write_piff_header(stream, params): sample_entry_payload += u1616.pack(params['sampling_rate']) if fourcc == 'AACL': - smaple_entry_box = box(b'mp4a', sample_entry_payload) + sample_entry_box = box(b'mp4a', sample_entry_payload) else: sample_entry_payload = sample_entry_payload sample_entry_payload += u16.pack(0) # pre defined @@ -158,8 +158,8 @@ def write_piff_header(stream, params): avcc_payload += u16.pack(len(pps)) avcc_payload += pps sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record - smaple_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry - stsd_payload += smaple_entry_box + sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry + stsd_payload += sample_entry_box stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box From 32f2627aed68ed2981f438067252b9921c4a39fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 3 Nov 2016 22:22:40 +0700 Subject: [PATCH 57/86] [vodlocker] Add another removed file pattern (closes #11106) --- youtube_dl/extractor/vodlocker.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index c85b474d2..bbfa6e5f2 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -31,7 +31,8 @@ class VodlockerIE(InfoExtractor): if any(p in webpage for p in ( '>THIS FILE WAS DELETED<', '>File Not Found<', - 'The file you were looking for could not be found, sorry for any inconvenience.<')): + 'The file you were looking for could not be found, sorry for any inconvenience.<', + '>The file was removed')): raise ExtractorError('Video %s does not exist' % video_id, expected=True) fields = self._hidden_inputs(webpage) From c4c9b8440cd19838a1ef283cc54ebf0630905698 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Nov 2016 05:02:31 +0700 Subject: [PATCH 58/86] [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8 manifests (closes #11113) --- youtube_dl/extractor/common.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 50841f0cf..5f4c984a9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1280,9 +1280,10 @@ class InfoExtractor(object): } resolution = last_info.get('RESOLUTION') if resolution: - width_str, height_str = resolution.split('x') - f['width'] = int(width_str) - f['height'] = int(height_str) + mobj = re.search(r'(?P\d+)[xX](?P\d+)', resolution) + if mobj: + f['width'] = int(mobj.group('width')) + f['height'] = int(mobj.group('height')) # Unified Streaming Platform mobj = re.search( r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url']) From f93ac1d17571d6ddf9cfb56f0bb51bdef6a04799 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Nov 2016 21:17:56 +0700 Subject: [PATCH 59/86] [anvato] Extract more metadata --- youtube_dl/extractor/anvato.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py index cb29cf111..46ca1899e 100644 --- a/youtube_dl/extractor/anvato.py +++ b/youtube_dl/extractor/anvato.py @@ -157,14 +157,7 @@ class AnvatoIE(InfoExtractor): video_data_url, video_id, transform_source=strip_jsonp, data=json.dumps(payload).encode('utf-8')) - def _extract_anvato_videos(self, webpage, video_id): - anvplayer_data = self._parse_json(self._html_search_regex( - r']+data-anvp=\'([^\']+)\'', webpage, - 'Anvato player data'), video_id) - - video_id = anvplayer_data['video'] - access_key = anvplayer_data['accessKey'] - + def _get_anvato_videos(self, access_key, video_id): video_data = self._get_video_json(access_key, video_id) formats = [] @@ -218,7 +211,19 @@ class AnvatoIE(InfoExtractor): 'formats': formats, 'title': video_data.get('def_title'), 'description': video_data.get('def_description'), + 'tags': video_data.get('def_tags', '').split(','), 'categories': video_data.get('categories'), 'thumbnail': video_data.get('thumbnail'), + 'timestamp': int_or_none(video_data.get( + 'ts_published') or video_data.get('ts_added')), + 'uploader': video_data.get('mcp_id'), + 'duration': int_or_none(video_data.get('duration')), 'subtitles': subtitles, } + + def _extract_anvato_videos(self, webpage, video_id): + anvplayer_data = self._parse_json(self._html_search_regex( + r']+data-anvp=\'([^\']+)\'', webpage, + 'Anvato player data'), video_id) + return self._get_anvato_videos( + anvplayer_data['accessKey'], anvplayer_data['video']) From f3c705f8ec3505240bdfbe622693c3cd2ce10857 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Nov 2016 21:32:30 +0700 Subject: [PATCH 60/86] [fox9] Add extractor (closes #11110) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/fox9.py | 43 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 youtube_dl/extractor/fox9.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 499239a22..d7ad5b8fc 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -296,6 +296,7 @@ from .footyroom import FootyRoomIE from .formula1 import Formula1IE from .fourtube import FourTubeIE from .fox import FOXIE +from .fox9 import FOX9IE from .foxgay import FoxgayIE from .foxnews import ( FoxNewsIE, diff --git a/youtube_dl/extractor/fox9.py b/youtube_dl/extractor/fox9.py new file mode 100644 index 000000000..56d9975d0 --- /dev/null +++ b/youtube_dl/extractor/fox9.py @@ -0,0 +1,43 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .anvato import AnvatoIE +from ..utils import js_to_json + + +class FOX9IE(AnvatoIE): + _VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P\d+)-story' + _TESTS = [{ + 'url': 'http://www.fox9.com/news/215123287-story', + 'md5': 'd6e1b2572c3bab8a849c9103615dd243', + 'info_dict': { + 'id': '314473', + 'ext': 'mp4', + 'title': 'Bear climbs tree in downtown Duluth', + 'description': 'md5:6a36bfb5073a411758a752455408ac90', + 'duration': 51, + 'timestamp': 1478123580, + 'upload_date': '20161102', + 'uploader': 'EPFOX', + 'categories': ['News', 'Sports'], + 'tags': ['news', 'video'], + }, + }, { + 'url': 'http://www.fox9.com/news/investigators/214070684-story', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_id = self._parse_json( + self._search_regex( + r'AnvatoPlaylist\s*\(\s*(\[.+?\])\s*\)\s*;', + webpage, 'anvato playlist'), + video_id, transform_source=js_to_json)[0]['video'] + + return self._get_anvato_videos( + 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b', + video_id) From c897af8aacae08852c70fbcec9fa6c42e6e278f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Nov 2016 21:33:08 +0700 Subject: [PATCH 61/86] [cbslocal] Update test --- youtube_dl/extractor/cbslocal.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 4bcd104af..289709c97 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -22,6 +22,7 @@ class CBSLocalIE(AnvatoIE): 'thumbnail': 're:^https?://.*', 'timestamp': 1463440500, 'upload_date': '20160516', + 'uploader': 'CBS', 'subtitles': { 'en': 'mincount:5', }, @@ -35,6 +36,7 @@ class CBSLocalIE(AnvatoIE): 'Syndication\\Curb.tv', 'Content\\News' ], + 'tags': ['CBS 2 News Evening'], }, }, { # SendtoNews embed From 640aff1d0c3d008f1286bb49b559938a7b5cd65a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Nov 2016 21:45:24 +0700 Subject: [PATCH 62/86] [anvato] Improve formats extraction --- youtube_dl/extractor/anvato.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py index 46ca1899e..623f44dce 100644 --- a/youtube_dl/extractor/anvato.py +++ b/youtube_dl/extractor/anvato.py @@ -163,9 +163,10 @@ class AnvatoIE(InfoExtractor): formats = [] for published_url in video_data['published_urls']: video_url = published_url['embed_url'] + media_format = published_url.get('format') ext = determine_ext(video_url) - if ext == 'smil': + if ext == 'smil' or media_format == 'smil': formats.extend(self._extract_smil_formats(video_url, video_id)) continue @@ -176,7 +177,7 @@ class AnvatoIE(InfoExtractor): 'tbr': tbr if tbr != 0 else None, } - if ext == 'm3u8': + if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'): # Not using _extract_m3u8_formats here as individual media # playlists are also included in published_urls. if tbr is None: @@ -187,7 +188,7 @@ class AnvatoIE(InfoExtractor): 'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])), 'ext': 'mp4', }) - elif ext == 'mp3': + elif ext == 'mp3' or media_format == 'mp3': a_format['vcodec'] = 'none' else: a_format.update({ From 09ffe34b001ed0af411f964305ccb7595a745a99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Nov 2016 21:59:42 +0700 Subject: [PATCH 63/86] [ChangeLog] Actualize --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index ec26e0c8d..75a01388a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,19 @@ version +Core +* [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8 + manifests (#11113) +* [downloader/ism] Fix AVC Decoder Configuration Record + Extractors ++ [fox9] Add support for fox9.com (#11110) ++ [anvato] Extract more metadata and improve formats extraction +* [vodlocker] Improve removed videos detection (#11106) ++ [vzaar] Add support for vzaar.com (#11093) ++ [vice] Add support for uplynk preplay videos (#11101) +* [tubitv] Fix extraction (#11061) ++ [shahid] Add support for authentication (#11091) ++ [radiocanada] Add subtitles support (#11096) + [generic] Add support for ISM manifests From b30e4c275431124453ba4bc67b99d05bbc7ae8b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Nov 2016 22:07:54 +0700 Subject: [PATCH 64/86] release 2016.11.04 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 975ea8700..f8b195fe7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.04** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.02 +[debug] youtube-dl version 2016.11.04 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 75a01388a..8a98f6f7e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.04 Core * [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8 diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7ed6b9006..e7e452d93 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -247,6 +247,7 @@ - **FootyRoom** - **Formula1** - **FOX** + - **FOX9** - **Foxgay** - **foxnews**: Fox News and Fox Business Video - **foxnews:article** @@ -870,6 +871,7 @@ - **vube**: Vube.com - **VuClip** - **VyboryMos** + - **Vzaar** - **Walla** - **washingtonpost** - **washingtonpost:article** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7cdd94f29..7ae7f6279 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.02' +__version__ = '2016.11.04' From de328af36264c35a1af6037b1a39f42d5832887a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 5 Nov 2016 03:24:42 +0700 Subject: [PATCH 65/86] [toutv] Relax _VALID_URL (closes #11121) --- youtube_dl/extractor/toutv.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index d2d5c1171..573f2ff6b 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -15,11 +15,11 @@ from ..utils import ( class TouTvIE(InfoExtractor): _NETRC_MACHINE = 'toutv' IE_NAME = 'tou.tv' - _VALID_URL = r'https?://ici\.tou\.tv/(?P[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)' + _VALID_URL = r'https?://ici\.tou\.tv/(?P[a-zA-Z0-9_-]+(?:/S[0-9]+E[0-9]+)?)' _access_token = None _claims = None - _TEST = { + _TESTS = [{ 'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', 'info_dict': { 'id': '122017', @@ -33,7 +33,10 @@ class TouTvIE(InfoExtractor): 'skip_download': True, }, 'skip': '404 Not Found', - } + }, { + 'url': 'http://ici.tou.tv/hackers', + 'only_matching': True, + }] def _real_initialize(self): email, password = self._get_login_info() From f420902a3b144c94fba449537e474aca0e101112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Nov 2016 21:11:18 +0700 Subject: [PATCH 66/86] [yahoo] Add another content id regex (closes #11088) --- youtube_dl/extractor/yahoo.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 91f0a0dbb..ca92c60c3 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -201,6 +201,19 @@ class YahooIE(InfoExtractor): }, 'skip': 'redirect to https://www.yahoo.com/music', }, + { + # ytwnews://cavideo/ + 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html', + 'info_dict': { + 'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff', + 'ext': 'mp4', + 'title': '單車天使 - 中文版預', + 'description': '中文版預', + }, + 'params': { + 'skip_download': True, + }, + }, ] def _real_extract(self, url): @@ -270,6 +283,7 @@ class YahooIE(InfoExtractor): r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), r']data-uuid=["\']([^"\']+)', r'yahoo://article/view\?.*\buuid=([^&"\']+)', + r']+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']', ] video_id = self._search_regex( CONTENT_ID_REGEXES, webpage, 'content ID') From b61cd51869d382d19dbd232cc74e010bb2b1ed12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Nov 2016 21:16:33 +0700 Subject: [PATCH 67/86] [yahoo] Add test and improve some content id regex --- youtube_dl/extractor/yahoo.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index ca92c60c3..4951414e9 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -201,6 +201,19 @@ class YahooIE(InfoExtractor): }, 'skip': 'redirect to https://www.yahoo.com/music', }, + { + # yahoo://article/ + 'url': 'https://www.yahoo.com/movies/video/true-story-trailer-173000497.html', + 'info_dict': { + 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1', + 'ext': 'mp4', + 'title': "'True Story' Trailer", + 'description': 'True Story', + }, + 'params': { + 'skip_download': True, + }, + }, { # ytwnews://cavideo/ 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html', @@ -282,7 +295,7 @@ class YahooIE(InfoExtractor): r'"first_videoid"\s*:\s*"([^"]+)"', r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), r']data-uuid=["\']([^"\']+)', - r'yahoo://article/view\?.*\buuid=([^&"\']+)', + r']+yahoo://article/view\?.*\buuid=([^&"\']+)', r']+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']', ] video_id = self._search_regex( From 519d8970496125bca8a7067d841e5c5e5263c26d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Nov 2016 21:28:51 +0700 Subject: [PATCH 68/86] [drtuber] Add support for embed URLs --- youtube_dl/extractor/drtuber.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index e8870c460..8baad18f6 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -10,8 +10,8 @@ from ..utils import ( class DrTuberIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P\d+)/(?P[\w-]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' + _TESTS = [{ 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', 'md5': '93e680cf2536ad0dfb7e74d94a89facd', 'info_dict': { @@ -25,14 +25,18 @@ class DrTuberIE(InfoExtractor): 'thumbnail': 're:https?://.*\.jpg$', 'age_limit': 18, } - } + }, { + 'url': 'http://www.drtuber.com/embed/489939', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - display_id = mobj.group('display_id') + display_id = mobj.group('display_id') or video_id - webpage = self._download_webpage(url, display_id) + webpage = self._download_webpage( + 'http://www.drtuber.com/video/%s' % video_id, display_id) video_url = self._html_search_regex( r' Date: Sun, 6 Nov 2016 21:29:15 +0700 Subject: [PATCH 69/86] [drtuber] Fix title extraction --- youtube_dl/extractor/drtuber.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index 8baad18f6..95ecef660 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -42,7 +42,7 @@ class DrTuberIE(InfoExtractor): r']*>

([^<]+)<', + (r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<', r']+class="title_substrate">([^<]+)

', r'([^<]+) - \d+'), webpage, 'title') From 37e7a71c6c777635c4ed02339339ff27ccb04f58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Nov 2016 21:33:51 +0700 Subject: [PATCH 70/86] [extractor/generic] Add support for drtuber embds (closes #11098) --- youtube_dl/extractor/drtuber.py | 6 ++++++ youtube_dl/extractor/generic.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index 95ecef660..22da8e481 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -30,6 +30,12 @@ class DrTuberIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)', + webpage) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a0a45dce0..5c2782754 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -47,6 +47,7 @@ from .svt import SVTIE from .pornhub import PornHubIE from .xhamster import XHamsterEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE +from .drtuber import DrTuberIE from .vimeo import VimeoIE from .dailymotion import ( DailymotionIE, @@ -1996,6 +1997,11 @@ class GenericIE(InfoExtractor): if tnaflix_urls: return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key()) + # Look for embedded DrTuber player + drtuber_urls = DrTuberIE._extract_urls(webpage) + if drtuber_urls: + return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key()) + # Look for embedded Tvigle player mobj = re.search( r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) From 5021ca6c13e3d011dc24ecf38d326e3a59e726a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Nov 2016 21:39:29 +0700 Subject: [PATCH 71/86] [redtube] Add support for embed URLs --- youtube_dl/extractor/redtube.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 721fc3a9e..7d9285ffb 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -10,8 +10,8 @@ from ..utils import ( class RedTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?redtube\.com/(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)' + _TESTS = [{ 'url': 'http://www.redtube.com/66418', 'md5': '7b8c22b5e7098a3e1c09709df1126d2d', 'info_dict': { @@ -23,11 +23,15 @@ class RedTubeIE(InfoExtractor): 'view_count': int, 'age_limit': 18, } - } + }, { + 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'http://www.redtube.com/%s' % video_id, video_id) if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']): raise ExtractorError('Video %s has been removed' % video_id, expected=True) From e28ed498e64545f02f2d3dbccf97ecf0e47aa82a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Nov 2016 21:42:41 +0700 Subject: [PATCH 72/86] [extractor/generic] Add support for redtube embds (closes #11099) --- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/redtube.py | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5c2782754..b1315a9c8 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -48,6 +48,7 @@ from .pornhub import PornHubIE from .xhamster import XHamsterEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE from .drtuber import DrTuberIE +from .redtube import RedTubeIE from .vimeo import VimeoIE from .dailymotion import ( DailymotionIE, @@ -2002,6 +2003,11 @@ class GenericIE(InfoExtractor): if drtuber_urls: return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key()) + # Look for embedded RedTube player + redtube_urls = RedTubeIE._extract_urls(webpage) + if redtube_urls: + return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key()) + # Look for embedded Tvigle player mobj = re.search( r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 7d9285ffb..c367a6ae7 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -28,6 +30,12 @@ class RedTubeIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)', + webpage) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( From b52c9ef1655042688a4822d241af398592b951f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Nov 2016 21:52:00 +0700 Subject: [PATCH 73/86] [extractor/generic] Improve support for pornhub embeds (closes #11100) --- youtube_dl/extractor/generic.py | 10 +++++----- youtube_dl/extractor/pornhub.py | 13 ++++++------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b1315a9c8..bde65fa27 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1983,11 +1983,6 @@ class GenericIE(InfoExtractor): if sportbox_urls: return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') - # Look for embedded PornHub player - pornhub_url = PornHubIE._extract_url(webpage) - if pornhub_url: - return self.url_result(pornhub_url, 'PornHub') - # Look for embedded XHamster player xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) if xhamster_urls: @@ -1998,6 +1993,11 @@ class GenericIE(InfoExtractor): if tnaflix_urls: return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key()) + # Look for embedded PornHub player + pornhub_urls = PornHubIE._extract_urls(webpage) + if pornhub_urls: + return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key()) + # Look for embedded DrTuber player drtuber_urls = DrTuberIE._extract_urls(webpage) if drtuber_urls: diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 0724efc09..40dbe6967 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor): (?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ ) - (?P<id>[0-9a-z]+) + (?P<id>[\da-z]+) ''' _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', @@ -96,12 +96,11 @@ class PornHubIE(InfoExtractor): 'only_matching': True, }] - @classmethod - def _extract_url(cls, webpage): - mobj = re.search( - r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage) - if mobj: - return mobj.group('url') + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)', + webpage) def _extract_count(self, pattern, webpage, name): return str_to_int(self._search_regex( From 98708e6cbdc8b94723b30341b714a94905c70c7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Nov 2016 23:20:15 +0700 Subject: [PATCH 74/86] [ard] Remove age restriction check (closes #11129) --- youtube_dl/extractor/ard.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 95ada0274..35f3656f1 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -178,8 +178,6 @@ class ARDMediathekIE(InfoExtractor): ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'), ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<', 'Video %s is no longer available'), - ('Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.', - 'This program is only suitable for those aged 12 and older. Video %s is therefore only available between 8 pm and 6 am.'), ) for pattern, message in ERRORS: From cb882540e85ea01e83745f24454ebee733044d4f Mon Sep 17 00:00:00 2001 From: DarkZeros <mailszeros@gmail.com> Date: Tue, 4 Oct 2016 02:04:24 +0100 Subject: [PATCH 75/86] [mitele] Fix extraction after website redesign (fixes #10824) --- ChangeLog | 6 +++ youtube_dl/extractor/mitele.py | 93 +++++++++++++++++++--------------- youtube_dl/extractor/ooyala.py | 7 +-- 3 files changed, 63 insertions(+), 43 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8a98f6f7e..7ca72b3d7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [mitele] Fix extraction after website redesign (#10824) + + version 2016.11.04 Core diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 2294745d4..c41ab1e91 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -1,19 +1,20 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import uuid from .common import InfoExtractor from ..compat import ( + compat_str, compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( - get_element_by_attribute, int_or_none, - remove_start, extract_attributes, determine_ext, + smuggle_url, + parse_duration, ) @@ -72,16 +73,14 @@ class MiTeleBaseIE(InfoExtractor): } -class MiTeleIE(MiTeleBaseIE): +class MiTeleIE(InfoExtractor): IE_DESC = 'mitele.es' - _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/' + _VALID_URL = r'https?://(?:www\.)?mitele\.es/programas-tv/(?:[^/]+/)(?P<id>[^/]+)/player' _TESTS = [{ - 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', - # MD5 is unstable + 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player', 'info_dict': { - 'id': '0NF1jJnxS1Wu3pHrmvFyw2', - 'display_id': 'programa-144', + 'id': '57b0dfb9c715da65618b4afa', 'ext': 'mp4', 'title': 'Tor, la web invisible', 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', @@ -91,57 +90,71 @@ class MiTeleIE(MiTeleBaseIE): 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 2913, }, + 'add_ie': ['Ooyala'], }, { # no explicit title - 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/', + 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player', 'info_dict': { - 'id': 'eLZSwoEd1S3pVyUm8lc6F', - 'display_id': 'programa-226', + 'id': '57b0de3dc915da14058b4876', 'ext': 'mp4', - 'title': 'Cuarto Milenio - Temporada 6 - Programa 226', - 'description': 'md5:50daf9fadefa4e62d9fc866d0c015701', + 'title': 'Cuarto Milenio Temporada 6 Programa 226', + 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f', 'series': 'Cuarto Milenio', 'season': 'Temporada 6', 'episode': 'Programa 226', 'thumbnail': 're:(?i)^https?://.*\.jpg$', - 'duration': 7312, + 'duration': 7313, }, 'params': { 'skip_download': True, }, + 'add_ie': ['Ooyala'], }] def _real_extract(self, url): - display_id = self._match_id(url) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - webpage = self._download_webpage(url, display_id) + gigya_url = self._search_regex(r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s*src="([^"]*)">[^>]*</script>', webpage, 'gigya', default=None) + gigya_sc = self._download_webpage(compat_urlparse.urljoin(r'http://www.mitele.es/', gigya_url), video_id, 'Downloading gigya script') + # Get a appKey/uuid for getting the session key + appKey_var = self._search_regex(r'value\("appGridApplicationKey",([0-9a-f]+)\)', gigya_sc, 'appKey variable') + appKey = self._search_regex(r'var %s="([0-9a-f]+)"' % appKey_var, gigya_sc, 'appKey') + uid = compat_str(uuid.uuid4()) + session_url = 'https://appgrid-api.cloud.accedo.tv/session?appKey=%s&uuid=%s' % (appKey, uid) + session_json = self._download_json(session_url, video_id, 'Downloading session keys') + sessionKey = compat_str(session_json['sessionKey']) - info = self._get_player_info(url, webpage) + paths_url = 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration?sessionKey=' + sessionKey + paths = self._download_json(paths_url, video_id, 'Downloading paths JSON') + ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search'] + data_p = ( + 'http://' + ooyala_s['base_url'] + ooyala_s['full_path'] + ooyala_s['provider_id'] + + '/docs/' + video_id + '?include_titles=Series,Season&product_name=test&format=full') + data = self._download_json(data_p, video_id, 'Downloading data JSON') + source = data['hits']['hits'][0]['_source'] + embedCode = source['offers'][0]['embed_codes'][0] - title = self._search_regex( - r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', - webpage, 'title', default=None) + titles = source['localizable_titles'][0] + title = titles.get('title_medium') or titles['title_long'] + episode = titles['title_sort_name'] + description = titles['summary_long'] + titles_series = source['localizable_titles_series'][0] + series = titles_series['title_long'] + titles_season = source['localizable_titles_season'][0] + season = titles_season['title_medium'] + duration = parse_duration(source['videos'][0]['duration']) - mobj = re.search(r'''(?sx) - class="Destacado-text"[^>]*>.*?<h1>\s* - <span>(?P<series>[^<]+)</span>\s* - <span>(?P<season>[^<]+)</span>\s* - <span>(?P<episode>[^<]+)</span>''', webpage) - series, season, episode = mobj.groups() if mobj else [None] * 3 - - if not title: - if mobj: - title = '%s - %s - %s' % (series, season, episode) - else: - title = remove_start(self._search_regex( - r'<title>([^<]+)', webpage, 'title'), 'Ver online ') - - info.update({ - 'display_id': display_id, + return { + '_type': 'url_transparent', + # for some reason only HLS is supported + 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}), + 'id': video_id, 'title': title, - 'description': get_element_by_attribute('class', 'text', webpage), + 'description': description, 'series': series, 'season': season, 'episode': episode, - }) - return info + 'duration': duration, + 'thumbnail': source['images'][0]['url'], + } diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 72ec20938..c2807d0f6 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor): _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?' - def _extract(self, content_tree_url, video_id, domain='example.org'): + def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None): content_tree = self._download_json(content_tree_url, video_id)['content_tree'] metadata = content_tree[list(content_tree)[0]] embed_code = metadata['embed_code'] @@ -29,7 +29,7 @@ class OoyalaBaseIE(InfoExtractor): self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + compat_urllib_parse_urlencode({ 'domain': domain, - 'supportedFormats': 'mp4,rtmp,m3u8,hds', + 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds', }), video_id) cur_auth_data = auth_data['authorization_data'][embed_code] @@ -145,8 +145,9 @@ class OoyalaIE(OoyalaBaseIE): url, smuggled_data = unsmuggle_url(url, {}) embed_code = self._match_id(url) domain = smuggled_data.get('domain') + supportedformats = smuggled_data.get('supportedformats') content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code) - return self._extract(content_tree_url, embed_code, domain) + return self._extract(content_tree_url, embed_code, domain, supportedformats) class OoyalaExternalIE(OoyalaBaseIE): From 97726317ac8e905dc72e75c7c2a823280c51af00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 7 Nov 2016 23:53:22 +0700 Subject: [PATCH 76/86] [README.md] Mention HTTP headers and alternative way to obtain cookies and headers in -g FAQ --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0f4088adc..98e374420 100644 --- a/README.md +++ b/README.md @@ -758,7 +758,7 @@ Once the video is fully downloaded, use any video player, such as [mpv](https:// ### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser. -It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. +It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`. It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule. From ebc7ab1e231483f189290608425a23590cae6af9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Nov 2016 00:29:12 +0700 Subject: [PATCH 77/86] [espn] Fix extraction (closes #11041) --- youtube_dl/extractor/espn.py | 126 ++++++++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 6d10f8e68..8795e0ddf 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -1,38 +1,117 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import remove_end +from ..compat import compat_str +from ..utils import ( + determine_ext, + int_or_none, + unified_timestamp, +) class ESPNIE(InfoExtractor): - _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P[^/]+)' + _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P\d+)' _TESTS = [{ 'url': 'http://espn.go.com/video/clip?id=10365079', - 'md5': '60e5d097a523e767d06479335d1bdc58', 'info_dict': { - 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', + 'id': '10365079', 'ext': 'mp4', 'title': '30 for 30 Shorts: Judging Jewell', - 'description': None, + 'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f', + 'timestamp': 1390936111, + 'upload_date': '20140128', }, 'params': { 'skip_download': True, }, - 'add_ie': ['OoyalaExternal'], }, { # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season 'url': 'http://espn.go.com/video/clip?id=2743663', - 'md5': 'f4ac89b59afc7e2d7dbb049523df6768', 'info_dict': { - 'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg', + 'id': '2743663', 'ext': 'mp4', 'title': 'Must-See Moments: Best of the MLS season', + 'description': 'md5:4c2d7232beaea572632bec41004f0aeb', + 'timestamp': 1449446454, + 'upload_date': '20151207', }, 'params': { 'skip_download': True, }, - 'add_ie': ['OoyalaExternal'], + 'expected_warnings': ['Unable to download f4m manifest'], }, { + 'url': 'http://www.espn.com/video/clip?id=10365079', + 'only_matching': True, + }, { + 'url': 'http://www.espn.com/video/clip/_/id/17989860', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + clip = self._download_json( + 'http://api-app.espn.com/v1/video/clips/%s' % video_id, + video_id)['videos'][0] + + title = clip['headline'] + + format_urls = set() + formats = [] + + def traverse_source(source, base_source_id=None): + for source_id, source in source.items(): + if isinstance(source, compat_str): + extract_source(source, base_source_id) + elif isinstance(source, dict): + traverse_source( + source, + '%s-%s' % (base_source_id, source_id) + if base_source_id else source_id) + + def extract_source(source_url, source_id=None): + if source_url in format_urls: + return + format_urls.add(source_url) + ext = determine_ext(source_url) + if ext == 'smil': + formats.extend(self._extract_smil_formats( + source_url, video_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + source_url, video_id, f4m_id=source_id, fatal=False)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + source_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=source_id, fatal=False)) + else: + formats.append({ + 'url': source_url, + 'format_id': source_id, + }) + + traverse_source(clip['links']['source']) + self._sort_formats(formats) + + description = clip.get('caption') or clip.get('description') + thumbnail = clip.get('thumbnail') + duration = int_or_none(clip.get('duration')) + timestamp = unified_timestamp(clip.get('originalPublishDate')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'formats': formats, + } + + +class ESPNArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P[^/]+)' + _TESTS = [{ 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', 'only_matching': True, }, { @@ -47,11 +126,12 @@ class ESPNIE(InfoExtractor): }, { 'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return', 'only_matching': True, - }, { - 'url': 'http://www.espn.com/video/clip?id=10365079', - 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url) + def _real_extract(self, url): video_id = self._match_id(url) @@ -61,23 +141,5 @@ class ESPNIE(InfoExtractor): r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P\d+)', webpage, 'video id', group='id') - cms = 'espn' - if 'data-source="intl"' in webpage: - cms = 'intl' - player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms) - player = self._download_webpage( - player_url, video_id) - - pcode = self._search_regex( - r'["\']pcode=([^"\']+)["\']', player, 'pcode') - - title = remove_end( - self._og_search_title(webpage), - '- ESPN Video').strip() - - return { - '_type': 'url_transparent', - 'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode), - 'ie_key': 'OoyalaExternal', - 'title': title, - } + return self.url_result( + 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) From 5d47b38cf5acb252e126ebdd81a21b5035256bed Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 8 Nov 2016 21:53:41 +0800 Subject: [PATCH 78/86] [tmz:article] Fix extraction (closes #11052) --- ChangeLog | 1 + youtube_dl/extractor/tmz.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7ca72b3d7..78c78afcd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [tmz:article] Fix extraction (#11052) * [mitele] Fix extraction after website redesign (#10824) diff --git a/youtube_dl/extractor/tmz.py b/youtube_dl/extractor/tmz.py index 979856e9a..419f9d92e 100644 --- a/youtube_dl/extractor/tmz.py +++ b/youtube_dl/extractor/tmz.py @@ -32,12 +32,15 @@ class TMZArticleIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P[^/]+)/?' _TEST = { 'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert', - 'md5': 'e482a414a38db73087450e3a6ce69d00', + 'md5': '3316ff838ae5bb7f642537825e1e90d2', 'info_dict': { 'id': '0_6snoelag', - 'ext': 'mp4', + 'ext': 'mov', 'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake', 'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."', + 'timestamp': 1429467813, + 'upload_date': '20150419', + 'uploader_id': 'batchUser', } } @@ -45,12 +48,9 @@ class TMZArticleIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - embedded_video_info_str = self._html_search_regex( - r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info') - - embedded_video_info = self._parse_json( - embedded_video_info_str, video_id, - transform_source=lambda s: s.replace('\\', '')) + embedded_video_info = self._parse_json(self._html_search_regex( + r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'), + video_id) return self.url_result( 'http://www.tmz.com/videos/%s/' % embedded_video_info['id']) From f700afa24c802c2a157a67e00c874679678d5062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Nov 2016 22:09:03 +0700 Subject: [PATCH 79/86] [ChangeLog] Actualize --- ChangeLog | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ChangeLog b/ChangeLog index 78c78afcd..e61585904 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,7 +2,16 @@ version Extractors * [tmz:article] Fix extraction (#11052) +* [espn] Fix extraction (#11041) * [mitele] Fix extraction after website redesign (#10824) +- [ard] Remove age restriction check (#11129) +* [generic] Improve support for pornhub.com embeds (#11100) ++ [generic] Add support for redtube.com embeds (#11099) ++ [generic] Add support for drtuber.com embeds (#11098) ++ [redtube] Add support for embed URLs ++ [drtuber] Add support for embed URLs ++ [yahoo] Improve content id extraction (#11088) +* [toutv] Relax URL regular expression (#11121) version 2016.11.04 From c58e07a7aae2af4f6fdd503cc0749d6ee9bdc908 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Nov 2016 22:11:21 +0700 Subject: [PATCH 80/86] release 2016.11.08 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f8b195fe7..f871f37d9 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.04** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.08*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.08** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.04 +[debug] youtube-dl version 2016.11.08 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e61585904..5f1564c81 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.08 Extractors * [tmz:article] Fix extraction (#11052) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7ae7f6279..b2ee2f345 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.04' +__version__ = '2016.11.08' From 9946aa5ccfe0b944f36cf18f41fcf4db28da8a6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Nov 2016 22:26:33 +0700 Subject: [PATCH 81/86] [franceculture] Fix extraction (closes #11140) --- youtube_dl/extractor/franceculture.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py index 186da0d3b..56048ffc2 100644 --- a/youtube_dl/extractor/franceculture.py +++ b/youtube_dl/extractor/franceculture.py @@ -29,7 +29,7 @@ class FranceCultureIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_url = self._search_regex( - r'(?s)]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?]+href="([^"]+)"', + r'(?s)]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?]+data-asset-source="([^"]+)"', webpage, 'video path') title = self._og_search_title(webpage) @@ -38,7 +38,7 @@ class FranceCultureIE(InfoExtractor): '(?s)]+class="date"[^>]*>.*?]+class="inner"[^>]*>([^<]+)<', webpage, 'upload date', fatal=False)) thumbnail = self._search_regex( - r'(?s)]+itemtype="https://schema.org/ImageObject"[^>]*>.*?]+data-pagespeed-(?:lazy|high-res)-src="([^"]+)"', + r'(?s)]+itemtype="https://schema.org/ImageObject"[^>]*>.*?]+data-dejavu-src="([^"]+)"', webpage, 'thumbnail', fatal=False) uploader = self._html_search_regex( r'(?s)
(.*?)', From 4719af097c47f4e28f4a16eb86275caf35552dfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Nov 2016 22:27:02 +0700 Subject: [PATCH 82/86] [extractors] Add forgotten import for espn:article --- youtube_dl/extractor/extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d7ad5b8fc..578359a5e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -267,7 +267,10 @@ from .engadget import EngadgetIE from .eporner import EpornerIE from .eroprofile import EroProfileIE from .escapist import EscapistIE -from .espn import ESPNIE +from .espn import ( + ESPNIE, + ESPNArticleIE, +) from .esri import EsriVideoIE from .europa import EuropaIE from .everyonesmixtape import EveryonesMixtapeIE From 6590925c2759f4ea33ded5e5bcb76a0ad8f3d2d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Nov 2016 22:29:16 +0700 Subject: [PATCH 83/86] [ChangeLog] Actualize --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5f1564c81..c33ab4ec6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +version + +Extractors +* [espn:article] Fix support for espn.com articles +* [franceculture] Fix extraction (#11140) + + version 2016.11.08 Extractors From db3367f43ee607364da493191acd745da889e3ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Nov 2016 22:30:53 +0700 Subject: [PATCH 84/86] release 2016.11.08.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f871f37d9..bfae97ddd 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.08*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.08** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.08.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.08.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.08 +[debug] youtube-dl version 2016.11.08.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c33ab4ec6..21b212e86 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.08.1 Extractors * [espn:article] Fix support for espn.com articles diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e7e452d93..77832504a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -225,6 +225,7 @@ - **EroProfile** - **Escapist** - **ESPN** + - **ESPNArticle** - **EsriVideo** - **Europa** - **EveryonesMixtape** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b2ee2f345..69df88c6e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.08' +__version__ = '2016.11.08.1' From 3eaaa8abace00d22a85a8b0b2c8ae1d6c4d52781 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 10 Nov 2016 14:52:34 +0800 Subject: [PATCH 85/86] [audioboom] Recognize /posts/ URLs (closes #11149) --- ChangeLog | 6 ++++++ youtube_dl/extractor/audioboom.py | 9 ++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 21b212e86..d97156e20 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [audioboom] Recognize /posts/ URLs (#11149) + + version 2016.11.08.1 Extractors diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py index 2ec2d7092..d7d1c6306 100644 --- a/youtube_dl/extractor/audioboom.py +++ b/youtube_dl/extractor/audioboom.py @@ -6,8 +6,8 @@ from ..utils import float_or_none class AudioBoomIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?audioboom\.com/boos/(?P[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P[0-9]+)' + _TESTS = [{ 'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0', 'md5': '63a8d73a055c6ed0f1e51921a10a5a76', 'info_dict': { @@ -19,7 +19,10 @@ class AudioBoomIE(InfoExtractor): 'uploader': 'Steve Czaban', 'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio', } - } + }, { + 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From bc40b3a5ba44006c23daf7fe0ed872af5e33bdc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 11 Nov 2016 03:26:29 +0700 Subject: [PATCH 86/86] [eagleplatform] Fix extraction (closes #11160) --- youtube_dl/extractor/eagleplatform.py | 53 ++++++++++++++++----------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index d4dfda8cd..c2f593eca 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -4,11 +4,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( ExtractorError, int_or_none, - url_basename, ) @@ -77,7 +79,7 @@ class EaglePlatformIE(InfoExtractor): if status != 200: raise ExtractorError(' '.join(response['errors']), expected=True) - def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'): + def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs): try: response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note) except ExtractorError as ee: @@ -116,29 +118,38 @@ class EaglePlatformIE(InfoExtractor): m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON') m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, - 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False) formats.extend(m3u8_formats) - mp4_url = self._get_video_url( + m3u8_formats_dict = {} + for f in m3u8_formats: + if f.get('height') is not None: + m3u8_formats_dict[f['height']] = f + + mp4_data = self._download_json( # Secure mp4 URL is constructed according to Player.prototype.mp4 from # http://lentaru.media.eagleplatform.com/player/player.js - re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8), - video_id, 'Downloading mp4 JSON') - mp4_url_basename = url_basename(mp4_url) - for m3u8_format in m3u8_formats: - mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url']) - if mobj: - http_format = m3u8_format.copy() - video_url = mp4_url.replace(mp4_url_basename, mobj.group(1)) - if not self._is_valid_url(video_url, video_id): + re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8), + video_id, 'Downloading mp4 JSON', fatal=False) + if mp4_data: + for format_id, format_url in mp4_data.get('data', {}).items(): + if not isinstance(format_url, compat_str): continue - http_format.update({ - 'url': video_url, - 'format_id': m3u8_format['format_id'].replace('hls', 'http'), - 'protocol': 'http', - }) - formats.append(http_format) + height = int_or_none(format_id) + if height is not None and m3u8_formats_dict.get(height): + f = m3u8_formats_dict[height].copy() + f.update({ + 'format_id': f['format_id'].replace('hls', 'http'), + 'protocol': 'http', + }) + else: + f = { + 'format_id': 'http-%s' % format_id, + 'height': int_or_none(format_id), + } + f['url'] = format_url + formats.append(f) self._sort_formats(formats)