From c80037918b86cf79c1542bb7bd7dda94d81c3efb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 23 Apr 2016 00:06:49 +0800 Subject: [PATCH 01/30] [iqiyi] Improve error detection (#9276) --- youtube_dl/extractor/iqiyi.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index ea8fbb329..ffb8008ce 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -287,6 +287,13 @@ class IqiyiIE(InfoExtractor): ('10', 'h1'), ] + AUTH_API_ERRORS = { + # No preview available (不允许试看鉴权失败) + 'Q00505': 'This video requires a VIP account', + # End of preview time (试看结束鉴权失败) + 'Q00506': 'Needs a VIP account for full video', + } + def _real_initialize(self): self._login() @@ -372,14 +379,18 @@ class IqiyiIE(InfoExtractor): note='Downloading video authentication JSON', errnote='Unable to download video authentication JSON') - if auth_result['code'] == 'Q00505': # No preview available (不允许试看鉴权失败) - raise ExtractorError('This video requires a VIP account', expected=True) - if auth_result['code'] == 'Q00506': # End of preview time (试看结束鉴权失败) + code = auth_result.get('code') + msg = self.AUTH_API_ERRORS.get(code) or auth_result.get('msg') or code + if code == 'Q00506': if do_report_warning: - self.report_warning('Needs a VIP account for full video') + self.report_warning(msg) return False + if 'data' not in auth_result: + if msg is not None: + raise ExtractorError('%s said: %s' % (self.IE_NAME, msg), expected=True) + raise ExtractorError('Unexpected error from Iqiyi auth API') - return auth_result + return auth_result['data'] def construct_video_urls(self, data, video_id, _uuid, tvid): def do_xor(x, y): @@ -455,11 +466,11 @@ class IqiyiIE(InfoExtractor): need_vip_warning_report = False break param.update({ - 't': auth_result['data']['t'], + 't': auth_result['t'], # cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as 'cid': 'afbe8fd3d73448c9', 'vid': video_id, - 'QY00001': auth_result['data']['u'], + 'QY00001': auth_result['u'], }) api_video_url += '?' if '?' not in api_video_url else '&' api_video_url += compat_urllib_parse_urlencode(param) From cb7d4d0efd8c58485e5269895e128b649d8c6c0c Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 22 Apr 2016 18:07:40 +0100 Subject: [PATCH 02/30] [nbc] add support for today.com(closes #2909) --- youtube_dl/extractor/nbc.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index e67025ff6..f9d42d07a 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -172,7 +172,7 @@ class CSNNEIE(InfoExtractor): class NBCNewsIE(ThePlatformIE): - _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ + _VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today)\.com/ (?:video/.+?/(?P\d+)| ([^/]+/)*(?P[^/?]+)) ''' @@ -230,6 +230,18 @@ class NBCNewsIE(ThePlatformIE): }, 'expected_warnings': ['http-6000 is not available'] }, + { + 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788', + 'md5': '118d7ca3f0bea6534f119c68ef539f71', + 'info_dict': { + 'id': '669831235788', + 'ext': 'mp4', + 'title': 'See the aurora borealis from space in stunning new NASA video', + 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', + 'upload_date': '20160420', + 'timestamp': 1461152093, + }, + }, { 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952', 'only_matching': True, @@ -264,7 +276,10 @@ class NBCNewsIE(ThePlatformIE): info = bootstrap['results'][0]['video'] else: player_instance_json = self._search_regex( - r'videoObj\s*:\s*({.+})', webpage, 'player instance') + r'videoObj\s*:\s*({.+})', webpage, 'player instance', default=None) + if not player_instance_json: + player_instance_json = self._html_search_regex( + r'data-video="([^"]+)"', webpage, 'video json') info = self._parse_json(player_instance_json, display_id) video_id = info['mpxId'] title = info['title'] @@ -295,7 +310,7 @@ class NBCNewsIE(ThePlatformIE): formats.extend(tp_formats) subtitles = self._merge_subtitles(subtitles, tp_subtitles) else: - tbr = int_or_none(video_asset.get('bitRate'), 1000) + tbr = int_or_none(video_asset.get('bitRate') or video_asset.get('bitrate'), 1000) format_id = 'http%s' % ('-%d' % tbr if tbr else '') video_url = update_url_query( video_url, {'format': 'redirect'}) @@ -321,10 +336,9 @@ class NBCNewsIE(ThePlatformIE): 'id': video_id, 'title': title, 'description': info.get('description'), - 'thumbnail': info.get('description'), 'thumbnail': info.get('thumbnail'), 'duration': int_or_none(info.get('duration')), - 'timestamp': parse_iso8601(info.get('pubDate')), + 'timestamp': parse_iso8601(info.get('pubDate') or info.get('pub_date')), 'formats': formats, 'subtitles': subtitles, } From eb01e97e10a5c1eb096695fbfd91ff7f69a4aa9e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 23 Apr 2016 02:51:17 +0800 Subject: [PATCH 03/30] [youku] Skip streams with channel_type=tail Fixes #9275 These video segments look like ads and they don't appear in the web player. --- youtube_dl/extractor/youku.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index fd7eb5a6d..349ce0941 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -64,6 +64,14 @@ class YoukuIE(InfoExtractor): 'params': { 'videopassword': '100600', }, + }, { + # /play/get.json contains streams with "channel_type":"tail" + 'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html', + 'info_dict': { + 'id': 'XOTUxMzg4NDMy', + 'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft', + }, + 'playlist_count': 6, }] def construct_video_urls(self, data): @@ -92,6 +100,8 @@ class YoukuIE(InfoExtractor): fileid_dict = {} for stream in data['stream']: + if stream.get('channel_type') == 'tail': + continue format = stream.get('stream_type') fileid = stream['stream_fileid'] fileid_dict[format] = fileid @@ -117,6 +127,8 @@ class YoukuIE(InfoExtractor): # generate video_urls video_urls_dict = {} for stream in data['stream']: + if stream.get('channel_type') == 'tail': + continue format = stream.get('stream_type') video_urls = [] for dt in stream['segs']: @@ -253,6 +265,8 @@ class YoukuIE(InfoExtractor): # which one has all } for i in range(max(len(v.get('segs')) for v in data['stream']))] for stream in data['stream']: + if stream.get('channel_type') == 'tail': + continue fm = stream.get('stream_type') video_urls = video_urls_dict[fm] for video_url, seg, entry in zip(video_urls, stream['segs'], entries): From 29a7e8f6f88e9a4c522e67aae5badd26ee226fde Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 22 Apr 2016 20:17:20 +0100 Subject: [PATCH 04/30] [nhl] Add new extractor(closes #8419)(closes #8798) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/nhl.py | 98 ++++++++++++++++++++++++++++-- 2 files changed, 96 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 27a7e89a4..6de3438fc 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -490,9 +490,10 @@ from .nextmovie import NextMovieIE from .nfb import NFBIE from .nfl import NFLIE from .nhl import ( - NHLIE, - NHLNewsIE, NHLVideocenterIE, + NHLNewsIE, + NHLVideocenterCategoryIE, + NHLIE, ) from .nick import NickIE from .niconico import NiconicoIE, NiconicoPlaylistIE diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index c1dea8b6c..b04d21113 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -8,10 +8,15 @@ from .common import InfoExtractor from ..compat import ( compat_urlparse, compat_urllib_parse_urlencode, - compat_urllib_parse_urlparse + compat_urllib_parse_urlparse, + compat_str, ) from ..utils import ( unified_strdate, + determine_ext, + int_or_none, + parse_iso8601, + parse_duration, ) @@ -70,8 +75,8 @@ class NHLBaseInfoExtractor(InfoExtractor): return ret -class NHLIE(NHLBaseInfoExtractor): - IE_NAME = 'nhl.com' +class NHLVideocenterIE(NHLBaseInfoExtractor): + IE_NAME = 'nhl.com:videocenter' _VALID_URL = r'https?://video(?P\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P[-0-9a-zA-Z,]+)' _TESTS = [{ @@ -186,8 +191,8 @@ class NHLNewsIE(NHLBaseInfoExtractor): return self._real_extract_video(video_id) -class NHLVideocenterIE(NHLBaseInfoExtractor): - IE_NAME = 'nhl.com:videocenter' +class NHLVideocenterCategoryIE(NHLBaseInfoExtractor): + IE_NAME = 'nhl.com:videocenter:category' IE_DESC = 'NHL videocenter category' _VALID_URL = r'https?://video\.(?P[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P[0-9]+)(?![&?]id=).*?)?$' _TEST = { @@ -236,3 +241,86 @@ class NHLVideocenterIE(NHLBaseInfoExtractor): 'id': cat_id, 'entries': [self._extract_video(v) for v in videos], } + + +class NHLIE(InfoExtractor): + IE_NAME = 'nhl.com' + _VALID_URL = r'https?://(?:www\.)?nhl\.com/([^/]+/)*c-(?P\d+)' + _TESTS = [{ + # type=video + 'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503', + 'md5': '0f7b9a8f986fb4b4eeeece9a56416eaf', + 'info_dict': { + 'id': '43663503', + 'ext': 'mp4', + 'title': 'Anisimov cleans up mess', + 'description': 'md5:a02354acdfe900e940ce40706939ca63', + 'timestamp': 1461288600, + 'upload_date': '20160422', + }, + }, { + # type=article + 'url': 'https://www.nhl.com/news/dennis-wideman-suspended/c-278258934', + 'md5': '1f39f4ea74c1394dea110699a25b366c', + 'info_dict': { + 'id': '40784403', + 'ext': 'mp4', + 'title': 'Wideman suspended by NHL', + 'description': 'Flames defenseman Dennis Wideman was banned 20 games for violation of Rule 40 (Physical Abuse of Officials)', + 'upload_date': '20160204', + 'timestamp': 1454544904, + }, + }] + + def _real_extract(self, url): + tmp_id = self._match_id(url) + video_data = self._download_json( + 'https://nhl.bamcontent.com/nhl/id/v1/%s/details/web-v1.json' % tmp_id, + tmp_id) + if video_data.get('type') == 'article': + video_data = video_data['media'] + + video_id = compat_str(video_data['id']) + title = video_data['title'] + + formats = [] + for playback in video_data.get('playbacks', []): + playback_url = playback.get('url') + if not playback_url: + continue + ext = determine_ext(playback_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + playback_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=playback.get('name', 'hls'), fatal=False)) + else: + height = int_or_none(playback.get('height')) + formats.append({ + 'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')), + 'url': playback_url, + 'width': int_or_none(playback.get('width')), + 'height': height, + }) + self._sort_formats(formats, ('preference', 'width', 'height', 'tbr', 'format_id')) + + thumbnails = [] + for thumbnail_id, thumbnail_data in video_data.get('image', {}).get('cuts', {}).items(): + thumbnail_url = thumbnail_data.get('src') + if not thumbnail_url: + continue + thumbnails.append({ + 'id': thumbnail_id, + 'url': thumbnail_url, + 'width': int_or_none(thumbnail_data.get('width')), + 'height': int_or_none(thumbnail_data.get('height')), + }) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'timestamp': parse_iso8601(video_data.get('date')), + 'duration': parse_duration(video_data.get('duration')), + 'thumbnails': thumbnails, + 'formats': formats, + } From 397ec446f3816a0b13bb71068c10ab8122192cbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Apr 2016 15:59:30 +0600 Subject: [PATCH 05/30] [dplay] Try secure api for no tld (Closes #9282) --- youtube_dl/extractor/dplay.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 66bbfc6ca..ef68ef087 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -99,7 +99,7 @@ class DPlayIE(InfoExtractor): video_id, f4m_id=protocol, fatal=False)) domain_tld = domain.split('.')[-1] - if domain_tld in ('se', 'dk'): + if domain_tld in ('se', 'dk', 'no'): for protocol in PROTOCOLS: self._set_cookie( 'secure.dplay.%s' % domain_tld, 'dsc-geo', From fd0ff8bad85595fc7bb7fdafb09be63c502927ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Apr 2016 16:36:17 +0600 Subject: [PATCH 06/30] [dplay] Improve extraction and document workarounds and tests --- youtube_dl/extractor/dplay.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index ef68ef087..b2f654ca5 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -13,6 +13,7 @@ class DPlayIE(InfoExtractor): _VALID_URL = r'https?://(?Pit\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P[^/?#]+)' _TESTS = [{ + # geo restricted, via direct unsigned hls URL 'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/', 'info_dict': { 'id': '1255600', @@ -31,6 +32,7 @@ class DPlayIE(InfoExtractor): }, 'expected_warnings': ['Unable to download f4m manifest'], }, { + # non geo restricted, via secure api 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', 'info_dict': { 'id': '3172', @@ -48,6 +50,7 @@ class DPlayIE(InfoExtractor): 'age_limit': 0, }, }, { + # geo restricted, via secure api 'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/', 'info_dict': { 'id': '70816', @@ -65,6 +68,7 @@ class DPlayIE(InfoExtractor): 'age_limit': 0, }, }, { + # geo restricted, via direct unsigned hls URL 'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/', 'only_matching': True, }] @@ -101,6 +105,7 @@ class DPlayIE(InfoExtractor): domain_tld = domain.split('.')[-1] if domain_tld in ('se', 'dk', 'no'): for protocol in PROTOCOLS: + # Providing dsc-geo allows to bypass geo restriction in some cases self._set_cookie( 'secure.dplay.%s' % domain_tld, 'dsc-geo', json.dumps({ @@ -113,7 +118,11 @@ class DPlayIE(InfoExtractor): 'Downloading %s stream JSON' % protocol, fatal=False) if stream and stream.get(protocol): extract_formats(protocol, stream[protocol]) - else: + + # The last resort is to try direct unsigned hls/hds URLs from info dictionary. + # Sometimes this does work even when secure API with dsc-geo has failed (e.g. + # http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/). + if not formats: for protocol in PROTOCOLS: if info.get(protocol): extract_formats(protocol, info[protocol]) From e239413fbc9f37996a5fb1ed5b5d211f5a0e613b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Apr 2016 16:50:31 +0600 Subject: [PATCH 07/30] [dplay] Extract subtitles (Closes #9284) --- youtube_dl/extractor/dplay.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index b2f654ca5..a7994697c 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -129,6 +129,13 @@ class DPlayIE(InfoExtractor): self._sort_formats(formats) + subtitles = {} + for lang in ('se', 'sv', 'da', 'nl', 'no'): + for format_id in ('web_vtt', 'vtt', 'srt'): + subtitle_url = info.get('subtitles_%s_%s' % (lang, format_id)) + if subtitle_url: + subtitles.setdefault(lang, []).append({'url': subtitle_url}) + return { 'id': video_id, 'display_id': display_id, @@ -142,4 +149,5 @@ class DPlayIE(InfoExtractor): 'episode_number': int_or_none(info.get('episode')), 'age_limit': int_or_none(info.get('minimum_age')), 'formats': formats, + 'subtitles': subtitles, } From 5448b781f697af2c0ea2efe8e553e2049d4de047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Apr 2016 17:28:45 +0600 Subject: [PATCH 08/30] [dplay] Sign unsigned final download hls URLs --- youtube_dl/extractor/dplay.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index a7994697c..5790553f3 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -6,7 +6,11 @@ import re import time from .common import InfoExtractor -from ..utils import int_or_none +from ..compat import compat_urlparse +from ..utils import ( + int_or_none, + update_url_query, +) class DPlayIE(InfoExtractor): @@ -32,12 +36,12 @@ class DPlayIE(InfoExtractor): }, 'expected_warnings': ['Unable to download f4m manifest'], }, { - # non geo restricted, via secure api + # non geo restricted, via secure api, unsigned download hls URL 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', 'info_dict': { 'id': '3172', 'display_id': 'season-1-svensken-lar-sig-njuta-av-livet', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Svensken lär sig njuta av livet', 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8', 'duration': 2650, @@ -50,18 +54,18 @@ class DPlayIE(InfoExtractor): 'age_limit': 0, }, }, { - # geo restricted, via secure api + # geo restricted, via secure api, unsigned download hls URL 'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/', 'info_dict': { 'id': '70816', 'display_id': 'season-6-episode-12', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Episode 12', 'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90', 'duration': 2563, 'timestamp': 1429696800, 'upload_date': '20150422', - 'creator': 'Kanal 4', + 'creator': 'Kanal 4 (Home)', 'series': 'Mig og min mor', 'season_number': 6, 'episode_number': 12, @@ -94,9 +98,15 @@ class DPlayIE(InfoExtractor): def extract_formats(protocol, manifest_url): if protocol == 'hls': - formats.extend(self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( manifest_url, video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)) + entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False) + # Sometimes final URLs inside m3u8 are unsigned, let's fix this + # ourselves + query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query) + for m3u8_format in m3u8_formats: + m3u8_format['url'] = update_url_query(m3u8_format['url'], query) + formats.extend(m3u8_formats) elif protocol == 'hds': formats.extend(self._extract_f4m_formats( manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0', From 864d5e7231c931d10d3a556224d152c8228270c3 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 24 Apr 2016 02:32:56 +0100 Subject: [PATCH 09/30] [viewster] extract all http formats --- youtube_dl/extractor/viewster.py | 36 +++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index fe94a4793..1fbf9d794 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import ( compat_HTTPError, @@ -14,6 +16,7 @@ from ..utils import ( parse_iso8601, sanitized_Request, HEADRequest, + url_basename, ) @@ -114,6 +117,7 @@ class ViewsterIE(InfoExtractor): return self.playlist_result(entries, video_id, title, description) formats = [] + manifest_url = None for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'): media = self._download_json( 'https://public-api.viewster.com/movies/%s/video?mediaType=%s' @@ -126,29 +130,37 @@ class ViewsterIE(InfoExtractor): continue ext = determine_ext(video_url) if ext == 'f4m': + manifest_url = video_url video_url += '&' if '?' in video_url else '?' video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1' formats.extend(self._extract_f4m_formats( video_url, video_id, f4m_id='hds')) elif ext == 'm3u8': + manifest_url = video_url m3u8_formats = self._extract_m3u8_formats( video_url, video_id, 'mp4', m3u8_id='hls', fatal=False) # m3u8 sometimes fail if m3u8_formats: formats.extend(m3u8_formats) else: - format_id = media.get('Bitrate') - f = { - 'url': video_url, - 'format_id': 'mp4-%s' % format_id, - 'height': int_or_none(media.get('Height')), - 'width': int_or_none(media.get('Width')), - 'preference': 1, - } - if format_id and not f['height']: - f['height'] = int_or_none(self._search_regex( - r'^(\d+)[pP]$', format_id, 'height', default=None)) - formats.append(f) + qualities_basename = self._search_regex( + '/([^/]+)(?:.csmil/manifest.f4m|.csmil/master.m3u8)', + manifest_url, 'qualities basename', default=None) + if qualities_basename: + QUALITIES_RE = r'((,\d+k)+,?)' + qualities = self._search_regex( + QUALITIES_RE, qualities_basename, + 'qualities').strip(',').split(',') + http_template = re.sub(QUALITIES_RE, r'%s', qualities_basename) + http_url_basename = url_basename(video_url) + for q in qualities: + formats.append({ + 'url': video_url.replace(http_url_basename, http_template % q), + 'ext': 'mp4', + 'format_id': 'http-%s' % q, + 'tbr': int_or_none(self._search_regex( + r'(\d+)k', q, 'bitrate', default=None)), + }) if not formats and not info.get('LanguageSets') and not info.get('VODSettings'): self.raise_geo_restricted() From fda9a1ca9e469b97bb28edc00bc6b9974e1fab7a Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 24 Apr 2016 03:06:46 +0100 Subject: [PATCH 10/30] [viewster] simplify qualities_basename regex --- youtube_dl/extractor/viewster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index 1fbf9d794..4289a2f31 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -144,7 +144,7 @@ class ViewsterIE(InfoExtractor): formats.extend(m3u8_formats) else: qualities_basename = self._search_regex( - '/([^/]+)(?:.csmil/manifest.f4m|.csmil/master.m3u8)', + '/([^/]+)\.csmil/', manifest_url, 'qualities basename', default=None) if qualities_basename: QUALITIES_RE = r'((,\d+k)+,?)' From 2c21152ca7af4f96fccb225f161862046280a12a Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 24 Apr 2016 12:22:18 +0600 Subject: [PATCH 11/30] [README.md] Document track metafields in output template --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index cd18edd87..7a54d651d 100644 --- a/README.md +++ b/README.md @@ -515,6 +515,18 @@ Available for the video that is an episode of some series or programme: - `episode_number`: Number of the video episode within a season - `episode_id`: Id of the video episode +Available for the media that is a track or a part of a music album: + - `track`: Title of the track + - `track_number`: Number of the track within an album or a disc + - `track_id`: Id of the track + - `artist`: Artist(s) of the track + - `genre`: Genre(s) of the track + - `album`: Title of the album the track belongs to + - `album_type`: Type of the album + - `album_artist`: List of all artists appeared on the album + - `disc_number`: Number of the disc or other physical medium the track belongs to + - `release_year`: Year (YYYY) when the album was released + Each aforementioned sequence when referenced in output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`. For example for `-o %(title)s-%(id)s.%(ext)s` and mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj` this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory. From 949b6497cc2be1f21cd439c6fc6e4047eddb0d66 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 24 Apr 2016 16:23:21 +0800 Subject: [PATCH 12/30] [generic] Unescape the video URL Fixes #9279 --- youtube_dl/extractor/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 95d233259..16c2c60d7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2045,6 +2045,7 @@ class GenericIE(InfoExtractor): entries = [] for video_url in found: + video_url = unescapeHTML(video_url) video_url = video_url.replace('\\/', '/') video_url = compat_urlparse.urljoin(url, video_url) video_id = compat_urllib_parse_unquote(os.path.basename(video_url)) From 2a7c38831cc8f789cdf4ee63f8d4450a46f45017 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 24 Apr 2016 17:01:18 +0800 Subject: [PATCH 13/30] [yahoo] Extend _VALID_URL and fix extraction Closes #9271 --- youtube_dl/extractor/yahoo.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b2d8f4b48..e2613659c 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -24,7 +24,7 @@ from .nbc import NBCSportsVPlayerIE class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?P(?Phttps?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P.+)?-(?P[0-9]+)(?:-[a-z]+)?\.html)' + _VALID_URL = r'(?P(?Phttps?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P.+)?-(?P[0-9]+)(?:-[a-z]+)?(?:\.html)?)' _TESTS = [ { 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', @@ -166,6 +166,17 @@ class YahooIE(InfoExtractor): 'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.', }, }, + { + # config['models']['applet_model']['data']['sapi'] has no query + 'url': 'https://www.yahoo.com/music/livenation/event/galactic-2016', + 'md5': 'dac0c72d502bc5facda80c9e6d5c98db', + 'info_dict': { + 'id': 'a6015640-e9e5-3efb-bb60-05589a183919', + 'ext': 'mp4', + 'description': 'Galactic', + 'title': 'Dolla Diva (feat. Maggie Koerner)', + }, + }, ] def _real_extract(self, url): @@ -202,7 +213,7 @@ class YahooIE(InfoExtractor): config = self._parse_json(config_json, display_id, fatal=False) if config: sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi') - if sapi: + if sapi and 'query' in sapi: return self._extract_info(display_id, sapi, webpage) items_json = self._search_regex( From 4f549580977ab94364fd404cdebba22575c74b91 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 24 Apr 2016 17:28:18 +0800 Subject: [PATCH 14/30] [yahoo] Update some tests One has new fields as ThePlatformIE changed, and others have changed files. --- youtube_dl/extractor/yahoo.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index e2613659c..e4f3d8937 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -38,7 +38,7 @@ class YahooIE(InfoExtractor): }, { 'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', - 'md5': 'd6e6fc6e1313c608f316ddad7b82b306', + 'md5': 'c3466d2b6d5dd6b9f41ba9ed04c24b23', 'info_dict': { 'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9', 'ext': 'mp4', @@ -49,7 +49,7 @@ class YahooIE(InfoExtractor): }, { 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed', - 'md5': '60e8ac193d8fb71997caa8fce54c6460', + 'md5': '75ffabdb87c16d4ffe8c036dc4d1c136', 'info_dict': { 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb', 'ext': 'mp4', @@ -122,7 +122,7 @@ class YahooIE(InfoExtractor): } }, { 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html', - 'md5': '989396ae73d20c6f057746fb226aa215', + 'md5': 'b17ac378b1134fa44370fb27db09a744', 'info_dict': { 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1', 'ext': 'mp4', @@ -141,6 +141,9 @@ class YahooIE(InfoExtractor): 'ext': 'flv', 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', + 'upload_date': '20150313', + 'uploader': 'NBCU-SPORTS', + 'timestamp': 1426270238, } }, { 'url': 'https://tw.news.yahoo.com/-100120367.html', @@ -148,7 +151,7 @@ class YahooIE(InfoExtractor): }, { # Query result is embedded in webpage, but explicit request to video API fails with geo restriction 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html', - 'md5': '4fbafb9c9b6f07aa8f870629f6671b35', + 'md5': '1ddbf7c850777548438e5c4f147c7b8c', 'info_dict': { 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504', 'ext': 'mp4', From d9ed362116969362e1c404aea63d9f6f3e833478 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 24 Apr 2016 17:46:25 +0800 Subject: [PATCH 15/30] [yahoo] Extract all