From a0758dfa1afd5b04773ba3b3b17ac71d22054821 Mon Sep 17 00:00:00 2001 From: felix Date: Wed, 5 Aug 2015 22:40:46 +0200 Subject: [PATCH 001/195] [filmon] new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/filmon.py | 144 +++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 youtube_dl/extractor/filmon.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 578359a5e..c9b9ebd23 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -287,6 +287,7 @@ from .fc2 import ( FC2EmbedIE, ) from .fczenit import FczenitIE +from .filmon import FilmOnIE, FilmOnVODIE from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE diff --git a/youtube_dl/extractor/filmon.py b/youtube_dl/extractor/filmon.py new file mode 100644 index 000000000..987792fec --- /dev/null +++ b/youtube_dl/extractor/filmon.py @@ -0,0 +1,144 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import qualities +from ..compat import compat_urllib_request + + +_QUALITY = qualities(('low', 'high')) + + +class FilmOnIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P[a-z0-9-]+)' + _TESTS = [{ + 'url': 'https://www.filmon.com/channel/filmon-sports', + 'only_matching': True, + }, { + 'url': 'https://www.filmon.com/tv/2894', + 'only_matching': True, + }] + + def _real_extract(self, url): + channel_id = self._match_id(url) + + request = compat_urllib_request.Request('https://www.filmon.com/channel/%s' % (channel_id)) + request.add_header('X-Requested-With', 'XMLHttpRequest') + channel_info = self._download_json(request, channel_id) + now_playing = channel_info['now_playing'] + + thumbnails = [] + for thumb in now_playing.get('images', ()): + if thumb['type'] != '2': + continue + thumbnails.append({ + 'url': thumb['url'], + 'width': int(thumb['width']), + 'height': int(thumb['height']), + }) + + formats = [] + + for stream in channel_info['streams']: + formats.append({ + 'format_id': str(stream['id']), + # this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats + # because 0) it doesn't have bitrate variants anyway, and 1) the ids generated + # by that method are highly unstable (because the bitrate is variable) + 'url': stream['url'], + 'resolution': stream['name'], + 'format_note': 'expires after %u seconds' % int(stream['watch-timeout']), + 'ext': 'mp4', + 'quality': _QUALITY(stream['quality']), + 'preference': int(stream['watch-timeout']), + }) + self._sort_formats(formats) + + return { + 'id': str(channel_info['id']), + 'display_id': channel_info['alias'], + 'formats': formats, + # XXX: use the channel description (channel_info['description'])? + 'uploader_id': channel_info['alias'], + 'uploader': channel_info['title'], # XXX: kinda stretching it... + 'title': now_playing.get('programme_name') or channel_info['title'], + 'description': now_playing.get('programme_description'), + 'thumbnails': thumbnails, + 'is_live': True, + } + + +class FilmOnVODIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?filmon\.com/vod/view/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space', + 'info_dict': { + 'id': '24869', + 'ext': 'mp4', + 'title': 'Plan 9 From Outer Space', + 'description': 'Dead human, zombies and vampires', + }, + }, { + 'url': 'https://www.filmon.com/vod/view/2825-1-popeye-series-1', + 'info_dict': { + 'id': '2825', + 'title': 'Popeye Series 1', + }, + 'playlist_count': 8, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + result = self._download_json('https://www.filmon.com/api/vod/movie?id=%s' % (video_id), video_id) + if result['code'] != 200: + raise ExtractorError('FilmOn said: %s' % (result['reason']), expected=True) + + response = result['response'] + + if response.get('episodes'): + return { + '_type': 'playlist', + 'id': video_id, + 'title': response['title'], + 'entries': [{ + '_type': 'url', + 'url': 'https://www.filmon.com/vod/view/%s' % (ep), + } for ep in response['episodes']] + } + + formats = [] + for (id, stream) in response['streams'].items(): + formats.append({ + 'format_id': id, + 'url': stream['url'], + 'resolution': stream['name'], + 'format_note': 'expires after %u seconds' % int(stream['watch-timeout']), + 'ext': 'mp4', + 'quality': _QUALITY(stream['quality']), + 'preference': int(stream['watch-timeout']), + }) + self._sort_formats(formats) + + poster = response['poster'] + thumbnails = [{ + 'id': 'poster', + 'url': poster['url'], + 'width': poster['width'], + 'height': poster['height'], + }] + for (id, thumb) in poster['thumbs'].items(): + thumbnails.append({ + 'id': id, + 'url': thumb['url'], + 'width': thumb['width'], + 'height': thumb['height'], + }) + + return { + 'id': video_id, + 'title': response['title'], + 'formats': formats, + 'description': response['description'], + 'thumbnails': thumbnails, + } From 06e9363b7a21acf6a592780a706b0fdd6b5a2d4e Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Sun, 8 Jan 2017 22:27:28 +0530 Subject: [PATCH 002/195] [openload] Fix extraction (closes #10408) Just a minor fix for openload --- youtube_dl/extractor/openload.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 2ce9f3826..3d4ad7dca 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -64,16 +64,17 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True) ol_id = self._search_regex( - ']+id="[a-zA-Z0-9]+x"[^>]*>([0-9]+)', + ']+id="[^"]+"[^>]*>([0-9]+)', webpage, 'openload ID') - first_two_chars = int(float(ol_id[0:][:2])) + first_three_chars = int(float(ol_id[0:][:3])) + fifth_char = int(float(ol_id[3:5])) urlcode = '' - num = 2 + num = 5 while num < len(ol_id): - urlcode += compat_chr(int(float(ol_id[num:][:3])) - - first_two_chars * int(float(ol_id[num + 3:][:2]))) + urlcode += compat_chr(int(float(ol_id[num:][:3])) + + first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2]))) num += 5 video_url = 'https://openload.co/stream/' + urlcode From fb6a59205e3dc5bb1d37d50ac1161314c0d66cf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Jan 2017 23:55:55 +0700 Subject: [PATCH 003/195] [mixcloud] Fix extraction (closes #11674) --- youtube_dl/extractor/mixcloud.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 4ba2310fd..a24b3165a 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -16,7 +16,6 @@ from ..utils import ( clean_html, ExtractorError, OnDemandPagedList, - parse_count, str_to_int, ) @@ -36,7 +35,6 @@ class MixcloudIE(InfoExtractor): 'uploader_id': 'dholbach', 'thumbnail': r're:https?://.*\.jpg', 'view_count': int, - 'like_count': int, }, }, { 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', @@ -49,7 +47,6 @@ class MixcloudIE(InfoExtractor): 'uploader_id': 'gillespeterson', 'thumbnail': 're:https?://.*', 'view_count': int, - 'like_count': int, }, }, { 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', @@ -89,26 +86,18 @@ class MixcloudIE(InfoExtractor): song_url = play_info['stream_url'] - PREFIX = ( - r'm-play-on-spacebar[^>]+' - r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') - title = self._html_search_regex( - PREFIX + r'm-title="([^"]+)"', webpage, 'title') + title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title') thumbnail = self._proto_relative_url(self._html_search_regex( - PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', - fatal=False)) + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False)) uploader = self._html_search_regex( - PREFIX + r'm-owner-name="([^"]+)"', - webpage, 'uploader', fatal=False) + r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False) uploader_id = self._search_regex( r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) description = self._og_search_description(webpage) - like_count = parse_count(self._search_regex( - r'\bbutton-favorite[^>]+>.*?]+class=["\']toggle-number[^>]+>\s*([^<]+)', - webpage, 'like count', default=None)) view_count = str_to_int(self._search_regex( [r'([0-9,.]+)'], + r'/listeners/?">([0-9,.]+)', + r'm-tooltip=["\']([\d,.]+) plays'], webpage, 'play count', default=None)) return { @@ -120,7 +109,6 @@ class MixcloudIE(InfoExtractor): 'uploader': uploader, 'uploader_id': uploader_id, 'view_count': view_count, - 'like_count': like_count, } From 9837cb7507e0635755082a7fd2e748c4106fefc4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 13 Jan 2017 23:02:50 +0100 Subject: [PATCH 004/195] [ooyala] add support for videos with embedToken(#11684) --- youtube_dl/extractor/generic.py | 9 ++++++++- youtube_dl/extractor/ooyala.py | 14 +++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 86dc79307..ac29ec600 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1939,7 +1939,14 @@ class GenericIE(InfoExtractor): re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P.{32})[\'"]\)', webpage) or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P.{32})[\'"]', webpage)) if mobj is not None: - return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url})) + embed_token = self._search_regex( + r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)', + webpage, 'ooyala embed token', default=None) + return OoyalaIE._build_url_result(smuggle_url( + mobj.group('ec'), { + 'domain': url, + 'embed_token': embed_token, + })) # Look for multiple Ooyala embeds on SBN network websites mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index c2807d0f6..f00cf745b 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor): _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?' - def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None): + def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None, embed_token=None): content_tree = self._download_json(content_tree_url, video_id)['content_tree'] metadata = content_tree[list(content_tree)[0]] embed_code = metadata['embed_code'] @@ -29,7 +29,8 @@ class OoyalaBaseIE(InfoExtractor): self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + compat_urllib_parse_urlencode({ 'domain': domain, - 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds', + 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth', + 'embedToken': embed_token, }), video_id) cur_auth_data = auth_data['authorization_data'][embed_code] @@ -52,6 +53,12 @@ class OoyalaBaseIE(InfoExtractor): elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) + elif delivery_type == 'hds' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + s_url, embed_code, mpd_id='dash', fatal=False)) + elif delivery_type == 'smooth': + self._extract_ism_formats( + s_url, embed_code, ism_id='mss', fatal=False) elif ext == 'smil': formats.extend(self._extract_smil_formats( s_url, embed_code, fatal=False)) @@ -146,8 +153,9 @@ class OoyalaIE(OoyalaBaseIE): embed_code = self._match_id(url) domain = smuggled_data.get('domain') supportedformats = smuggled_data.get('supportedformats') + embed_token = smuggled_data.get('embed_token') content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code) - return self._extract(content_tree_url, embed_code, domain, supportedformats) + return self._extract(content_tree_url, embed_code, domain, supportedformats, embed_token) class OoyalaExternalIE(OoyalaBaseIE): From 5e8eebb6009ac3e9f7dfc803d8561174d207c1a2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 13 Jan 2017 23:06:07 +0100 Subject: [PATCH 005/195] [mitele] extract dash formats --- youtube_dl/extractor/mitele.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 8984d3b8d..79e0b8ada 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -190,7 +190,7 @@ class MiTeleIE(InfoExtractor): return { '_type': 'url_transparent', # for some reason only HLS is supported - 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}), + 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8,dash'}), 'id': video_id, 'title': title, 'description': description, From adf063dad1792f0c9c680d13ccd984b4ad60ac29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 06:17:03 +0700 Subject: [PATCH 006/195] [mtv,cc,cmt,spike] Improve and refactor - Eliminate _transform_rtmp_url * Generalize triforce mgid extraction + [cmt] Add support for full-episodes (closes #11623) --- youtube_dl/extractor/cmt.py | 25 ++++++------ youtube_dl/extractor/comedycentral.py | 17 +------- youtube_dl/extractor/mtv.py | 58 ++++++++++++++++++--------- youtube_dl/extractor/spike.py | 2 +- 4 files changed, 54 insertions(+), 48 deletions(-) diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index 7d3e9b0c9..6302b8d9c 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -1,13 +1,11 @@ from __future__ import unicode_literals from .mtv import MTVIE -from ..utils import ExtractorError class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P\d+)' - _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes)/(?P[^/]+)' _TESTS = [{ 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', @@ -35,15 +33,16 @@ class CMTIE(MTVIE): 'only_matching': True, }] - @classmethod - def _transform_rtmp_url(cls, rtmp_video_url): - if 'error_not_available.swf' in rtmp_video_url: - raise ExtractorError( - '%s said: video is not available' % cls.IE_NAME, expected=True) - - return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url) - def _extract_mgid(self, webpage): - return self._search_regex( + mgid = self._search_regex( r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P.+?)\1', - webpage, 'mgid', group='mgid') + webpage, 'mgid', group='mgid', default=None) + if not mgid: + mgid = self._extract_triforce_mgid(webpage) + return mgid + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mgid = self._extract_mgid(webpage) + return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 816e0bfb6..4cac29415 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -48,17 +48,8 @@ class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - - feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed') - feed = self._parse_json(feed_json, playlist_id) - zones = feed['manifest']['zones'] - - video_zone = zones['t2_lc_promo1'] - feed = self._download_json(video_zone['feed'], playlist_id) - mgid = feed['result']['data']['id'] - + mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1') videos_info = self._get_videos_info(mgid) - return videos_info @@ -94,12 +85,6 @@ class ToshIE(MTVServicesInfoExtractor): 'only_matching': True, }] - @classmethod - def _transform_rtmp_url(cls, rtmp_video_url): - new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url) - new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm') - return new_urls - class ComedyCentralTVIE(MTVServicesInfoExtractor): _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P[^/?#&]+)' diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 5250db212..00a980c7d 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -13,11 +13,11 @@ from ..utils import ( fix_xml_ampersands, float_or_none, HEADRequest, - NO_DEFAULT, RegexNotFoundError, sanitized_Request, strip_or_none, timeconvert, + try_get, unescapeHTML, update_url_query, url_basename, @@ -42,15 +42,6 @@ class MTVServicesInfoExtractor(InfoExtractor): # Remove the templates, like &device={device} return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) - # This was originally implemented for ComedyCentral, but it also works here - @classmethod - def _transform_rtmp_url(cls, rtmp_video_url): - m = re.match(r'^rtmpe?://.*?/(?Pgsp\..+?/.*)$', rtmp_video_url) - if not m: - return {'rtmp': rtmp_video_url} - base = 'http://viacommtvstrmfs.fplive.net/' - return {'http': base + m.group('finalid')} - def _get_feed_url(self, uri): return self._FEED_URL @@ -91,22 +82,28 @@ class MTVServicesInfoExtractor(InfoExtractor): if rendition.get('method') == 'hls': hls_url = rendition.find('./src').text formats.extend(self._extract_m3u8_formats( - hls_url, video_id, ext='mp4', entry_protocol='m3u8_native')) + hls_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls')) else: # fms try: _, _, ext = rendition.attrib['type'].partition('/') rtmp_video_url = rendition.find('./src').text + if 'error_not_available.swf' in rtmp_video_url: + raise ExtractorError( + '%s said: video is not available' % self.IE_NAME, + expected=True) if rtmp_video_url.endswith('siteunavail.png'): continue - new_urls = self._transform_rtmp_url(rtmp_video_url) formats.extend([{ - 'ext': 'flv' if new_url.startswith('rtmp') else ext, - 'url': new_url, - 'format_id': '-'.join(filter(None, [kind, rendition.get('bitrate')])), + 'ext': 'flv' if rtmp_video_url.startswith('rtmp') else ext, + 'url': rtmp_video_url, + 'format_id': '-'.join(filter(None, [ + 'rtmp' if rtmp_video_url.startswith('rtmp') else None, + rendition.get('bitrate')])), 'width': int(rendition.get('width')), 'height': int(rendition.get('height')), - } for kind, new_url in new_urls.items()]) + }]) except (KeyError, TypeError): raise ExtractorError('Invalid rendition field.') self._sort_formats(formats) @@ -212,7 +209,28 @@ class MTVServicesInfoExtractor(InfoExtractor): [self._get_video_info(item, use_hls) for item in idoc.findall('.//item')], playlist_title=title, playlist_description=description) - def _extract_mgid(self, webpage, default=NO_DEFAULT): + def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): + triforce_feed = self._parse_json(self._search_regex( + r'triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, + 'triforce feed', default='{}'), video_id, fatal=False) + + data_zone = self._search_regex( + r'data-zone=(["\'])(?P.+?_lc_promo.*?)\1', webpage, + 'data zone', default=data_zone, group='zone') + + feed_url = try_get( + triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'], + compat_str) + if not feed_url: + return + + feed = self._download_json(feed_url, video_id, fatal=False) + if not feed: + return + + return try_get(feed, lambda x: x['result']['data']['id'], compat_str) + + def _extract_mgid(self, webpage): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -232,7 +250,11 @@ class MTVServicesInfoExtractor(InfoExtractor): sm4_embed = self._html_search_meta( 'sm4:video:embed', webpage, 'sm4 embed', default='') mgid = self._search_regex( - r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default) + r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None) + + if not mgid: + mgid = self._extract_triforce_mgid(webpage) + return mgid def _real_extract(self, url): diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index abfee3ece..c59896a17 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -46,7 +46,7 @@ class SpikeIE(MTVServicesInfoExtractor): _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)') def _extract_mgid(self, webpage): - mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None) + mgid = super(SpikeIE, self)._extract_mgid(webpage) if mgid is None: url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id') video_type, episode_id = url_parts.split('/', 1) From e54fc0524ebf7e3ec02fbd22f00fce466c952791 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 06:23:24 +0700 Subject: [PATCH 007/195] [cmt] Add support for video-clips --- youtube_dl/extractor/cmt.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index 6302b8d9c..f6b794fb3 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -5,7 +5,7 @@ from .mtv import MTVIE class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes)/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes|video-clips)/(?P[^/]+)' _TESTS = [{ 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', @@ -31,6 +31,12 @@ class CMTIE(MTVIE): }, { 'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172', 'only_matching': True, + }, { + 'url': 'http://www.cmt.com/full-episodes/537qb3/nashville-the-wayfaring-stranger-season-5-ep-501', + 'only_matching': True, + }, { + 'url': 'http://www.cmt.com/video-clips/t9e4ci/nashville-juliette-in-2-minutes', + 'only_matching': True, }] def _extract_mgid(self, webpage): From 4f66c16f337f3b2250d369b56bc31cfd7de06f89 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Sat, 14 Jan 2017 00:26:11 +0100 Subject: [PATCH 008/195] [brightcove:legacy] Fix misplaced backslash in a regexp --- youtube_dl/extractor/brightcove.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index aa2923ccf..2e56d1df9 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -204,7 +204,7 @@ class BrightcoveLegacyIE(InfoExtractor): # // build Brightcove XML # } m = re.search( - r'''(?x)customBC.\createVideo\( + r'''(?x)customBC\.createVideo\( .*? # skipping width and height ["\'](?P\d+)["\']\s*,\s* # playerID ["\'](?PAQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters From 0b94510cd00d50ddda74ba0079f856650f24680e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 07:27:20 +0700 Subject: [PATCH 009/195] [ChangeLog] Actualize --- ChangeLog | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ChangeLog b/ChangeLog index f1e234507..0106a7ae8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +version + +Core ++ [common] Add ability to customize akamai manifest host ++ [utils] Add more date formats + +Extractors +- [mtv] Eliminate _transform_rtmp_url +* [mtv] Generalize triforce mgid extraction ++ [cmt] Add support for full episodes and video clips (#11623) ++ [mitele] Extract DASH formats ++ [ooyala] Add support for videos with embedToken (#11684) +* [mixcloud] Fix extraction (#11674) +* [openload] Fix extraction (#10408) +* [tv4] Improve extraction (#11698) +* [freesound] Fix and improve extraction (#11602) ++ [nick] Add support for beta.nick.com (#11655) +* [mtv,cc] Use HLS by default with native HLS downloader (#11641) +* [mtv] Fix non-HLS extraction + + version 2017.01.10 Extractors From 5d4c7daa49b8ff83aa6fb13b183f47d4427c6513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 07:31:07 +0700 Subject: [PATCH 010/195] release 2017.01.14 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 6a4c25680..a7bf2b90c 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.14** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.10 +[debug] youtube-dl version 2017.01.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 0106a7ae8..dba18d39b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.14 Core + [common] Add ability to customize akamai manifest host diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 214124722..17c6f9eb2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.10' +__version__ = '2017.01.14' From abe8cb763fd43ee2db09c73965f38db7db02559e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 08:30:00 +0700 Subject: [PATCH 011/195] [cbc] Improve playlist support (closes #11704) --- youtube_dl/extractor/cbc.py | 55 +++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 7c76ceac8..a291685bf 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -90,36 +90,49 @@ class CBCIE(InfoExtractor): }, }], 'skip': 'Geo-restricted to Canada', + }, { + # multiple CBC.APP.Caffeine.initInstance(...) + 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238', + 'info_dict': { + 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', + 'id': 'dog-indoor-exercise-winter-1.3928238', + }, + 'playlist_mincount': 6, }] @classmethod def suitable(cls, url): return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url) + def _extract_player_init(self, player_init, display_id): + player_info = self._parse_json(player_init, display_id, js_to_json) + media_id = player_info.get('mediaId') + if not media_id: + clip_id = player_info['clipId'] + feed = self._download_json( + 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, + clip_id, fatal=False) + if feed: + media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) + if not media_id: + media_id = self._download_json( + 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, + clip_id)['entries'][0]['id'].split('/')[-1] + return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - player_init = self._search_regex( - r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init', - default=None) - if player_init: - player_info = self._parse_json(player_init, display_id, js_to_json) - media_id = player_info.get('mediaId') - if not media_id: - clip_id = player_info['clipId'] - feed = self._download_json( - 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, - clip_id, fatal=False) - if feed: - media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) - if not media_id: - media_id = self._download_json( - 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, - clip_id)['entries'][0]['id'].split('/')[-1] - return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) - else: - entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r']+src="[^"]+?mediaId=(\d+)"', webpage)] - return self.playlist_result(entries) + entries = [ + self._extract_player_init(player_init, display_id) + for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)] + entries.extend([ + self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + for media_id in re.findall(r']+src="[^"]+?mediaId=(\d+)"', webpage)]) + return self.playlist_result( + entries, display_id, + self._og_search_title(webpage, fatal=False), + self._og_search_description(webpage)) class CBCPlayerIE(InfoExtractor): From 8854f3fe782e48f4b145eacf58cca533a9f9b199 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 08:30:00 +0700 Subject: [PATCH 012/195] [README.md] Clarify newline format in cookies section (closes #11709) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 905c1b73f..a606346b2 100644 --- a/README.md +++ b/README.md @@ -841,7 +841,7 @@ Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox). -Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. +Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, Mac OS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare). From 99d537a5e08499e20c3507c3f84048feacf77522 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 14 Jan 2017 07:12:31 +0100 Subject: [PATCH 013/195] [ooyala] fix typo --- youtube_dl/extractor/ooyala.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index f00cf745b..84be2b1e3 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -53,7 +53,7 @@ class OoyalaBaseIE(InfoExtractor): elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) - elif delivery_type == 'hds' or ext == 'mpd': + elif delivery_type == 'dash' or ext == 'mpd': formats.extend(self._extract_mpd_formats( s_url, embed_code, mpd_id='dash', fatal=False)) elif delivery_type == 'smooth': From b80e2ebc8daa1ec30396cfa69836f1d96d23028f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 18:27:22 +0700 Subject: [PATCH 014/195] [dramafever] Add support for URLs with language code (#11714) --- youtube_dl/extractor/dramafever.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 1edd8e7bd..bcd9fe2a0 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -66,7 +66,7 @@ class DramaFeverBaseIE(AMPIE): class DramaFeverIE(DramaFeverBaseIE): IE_NAME = 'dramafever' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+/[0-9]+)(?:/|$)' + _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+/[0-9]+)(?:/|$)' _TESTS = [{ 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/', 'info_dict': { @@ -103,6 +103,9 @@ class DramaFeverIE(DramaFeverBaseIE): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/', + 'only_matching': True, }] def _real_extract(self, url): @@ -148,7 +151,7 @@ class DramaFeverIE(DramaFeverBaseIE): class DramaFeverSeriesIE(DramaFeverBaseIE): IE_NAME = 'dramafever:series' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$' + _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$' _TESTS = [{ 'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/', 'info_dict': { From 621a2800ca259399c0c010a1cbc2c56aee90228c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 04:42:05 +0700 Subject: [PATCH 015/195] [vevo] Improve geo restriction detection --- youtube_dl/extractor/vevo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index d82261e5e..f0a8075fb 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -206,7 +206,7 @@ class VevoIE(VevoBaseIE): note='Retrieving oauth token', errnote='Unable to retrieve oauth token') - if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage: + if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage): self.raise_geo_restricted( '%s said: This page is currently unavailable in your region' % self.IE_NAME) From cd55c6ccd7b9cd0c48d475330c40f382eb0bc625 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Wed, 4 Jan 2017 01:51:08 +0300 Subject: [PATCH 016/195] [beam:live] Add extractor --- youtube_dl/extractor/beampro.py | 82 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 83 insertions(+) create mode 100644 youtube_dl/extractor/beampro.py diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py new file mode 100644 index 000000000..dc0a2b4af --- /dev/null +++ b/youtube_dl/extractor/beampro.py @@ -0,0 +1,82 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + compat_str, + int_or_none, + parse_iso8601, + try_get, +) + + +class BeamProLiveIE(InfoExtractor): + IE_NAME = 'Beam:live' + _VALID_URL = r'https?://(?:\w+.)?beam.pro/(?P[^?]+)$' + _API_CHANNEL = 'https://beam.pro/api/v1/channels/{0}' + _API_MANIFEST = 'https://beam.pro/api/v1/channels/{0}/manifest.m3u8' + _RATINGS = {'family': 0, 'teen': 13, '18+': 18} + + _TEST = { + 'url': 'http://www.beam.pro/niterhayven', + 'info_dict': { + 'id': '261562', + 'ext': 'mp4', + 'uploader': 'niterhayven', + 'timestamp': 1483477281, + 'age_limit': 18, + 'title': 'Introducing The Witcher 3 // The Grind Starts Now!', + 'thumbnail': r're:https://.*\.jpg$', + 'upload_date': '20170103', + 'uploader_id': 373396, + 'description': 'md5:0b161ac080f15fe05d18a07adb44a74d', + 'is_live': True, + }, + 'skip': 'niterhayven is offline', + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + chan_data = self._download_json(self._API_CHANNEL.format(channel_id), channel_id) + + if not chan_data.get('online'): + raise ExtractorError('{0} is offline'.format(channel_id), expected=True) + + formats = self._extract_m3u8_formats( + self._API_MANIFEST.format(chan_data.get('id')), channel_id, ext='mp4') + + self._sort_formats(formats) + info = {} + info['formats'] = formats + if chan_data: + info.update(self._extract_info(chan_data)) + if not info.get('title'): + info['title'] = self._live_title(channel_id) + if not info.get('id'): # barely possible but just in case + info['id'] = compat_str(abs(hash(channel_id)) % (10 ** 8)) + + return info + + def _extract_info(self, info): + thumbnail = try_get(info, lambda x: x['thumbnail']['url'], compat_str) + username = try_get(info, lambda x: x['user']['url'], compat_str) + video_id = compat_str(info['id']) if info.get('id') else None + rating = info.get('audience') + + return { + 'id': video_id, + 'title': info.get('name'), + 'description': clean_html(info.get('description')), + 'age_limit': self._RATINGS[rating] if rating in self._RATINGS else None, + 'is_live': True if info.get('online') else False, + 'timestamp': parse_iso8601(info.get('updatedAt')), + 'uploader': info.get('token') or username, + 'uploader_id': int_or_none(info.get('userId')), + 'view_count': int_or_none(info.get('viewersTotal')), + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5ba8efb0e..9d0610d21 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -88,6 +88,7 @@ from .bbc import ( BBCCoUkPlaylistIE, BBCIE, ) +from .beampro import BeamProLiveIE from .beeg import BeegIE from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE From af62de104f33ebf8b473b3f7935451077fa56ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 06:07:35 +0700 Subject: [PATCH 017/195] [beam:live] Improve and simplify (#10702, closes #11596) --- youtube_dl/extractor/beampro.py | 73 +++++++++++++++------------------ 1 file changed, 32 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index dc0a2b4af..f3a9e3278 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -14,25 +14,23 @@ from ..utils import ( class BeamProLiveIE(InfoExtractor): IE_NAME = 'Beam:live' - _VALID_URL = r'https?://(?:\w+.)?beam.pro/(?P[^?]+)$' - _API_CHANNEL = 'https://beam.pro/api/v1/channels/{0}' - _API_MANIFEST = 'https://beam.pro/api/v1/channels/{0}/manifest.m3u8' + _VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P[^/?#&]+)' _RATINGS = {'family': 0, 'teen': 13, '18+': 18} - _TEST = { 'url': 'http://www.beam.pro/niterhayven', 'info_dict': { 'id': '261562', 'ext': 'mp4', - 'uploader': 'niterhayven', - 'timestamp': 1483477281, - 'age_limit': 18, 'title': 'Introducing The Witcher 3 // The Grind Starts Now!', - 'thumbnail': r're:https://.*\.jpg$', - 'upload_date': '20170103', - 'uploader_id': 373396, 'description': 'md5:0b161ac080f15fe05d18a07adb44a74d', + 'thumbnail': r're:https://.*\.jpg$', + 'timestamp': 1483477281, + 'upload_date': '20170103', + 'uploader': 'niterhayven', + 'uploader_id': '373396', + 'age_limit': 18, 'is_live': True, + 'view_count': int, }, 'skip': 'niterhayven is offline', 'params': { @@ -41,42 +39,35 @@ class BeamProLiveIE(InfoExtractor): } def _real_extract(self, url): - channel_id = self._match_id(url) - chan_data = self._download_json(self._API_CHANNEL.format(channel_id), channel_id) + channel_name = self._match_id(url) - if not chan_data.get('online'): - raise ExtractorError('{0} is offline'.format(channel_id), expected=True) + chan = self._download_json( + 'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name) + + if chan.get('online') is False: + raise ExtractorError( + '{0} is offline'.format(channel_name), expected=True) + + channel_id = chan['id'] formats = self._extract_m3u8_formats( - self._API_MANIFEST.format(chan_data.get('id')), channel_id, ext='mp4') - + 'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id, + channel_name, ext='mp4', m3u8_id='hls', fatal=False) self._sort_formats(formats) - info = {} - info['formats'] = formats - if chan_data: - info.update(self._extract_info(chan_data)) - if not info.get('title'): - info['title'] = self._live_title(channel_id) - if not info.get('id'): # barely possible but just in case - info['id'] = compat_str(abs(hash(channel_id)) % (10 ** 8)) - return info - - def _extract_info(self, info): - thumbnail = try_get(info, lambda x: x['thumbnail']['url'], compat_str) - username = try_get(info, lambda x: x['user']['url'], compat_str) - video_id = compat_str(info['id']) if info.get('id') else None - rating = info.get('audience') + user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id']) return { - 'id': video_id, - 'title': info.get('name'), - 'description': clean_html(info.get('description')), - 'age_limit': self._RATINGS[rating] if rating in self._RATINGS else None, - 'is_live': True if info.get('online') else False, - 'timestamp': parse_iso8601(info.get('updatedAt')), - 'uploader': info.get('token') or username, - 'uploader_id': int_or_none(info.get('userId')), - 'view_count': int_or_none(info.get('viewersTotal')), - 'thumbnail': thumbnail, + 'id': compat_str(chan.get('id') or channel_name), + 'title': self._live_title(chan.get('name') or channel_name), + 'description': clean_html(chan.get('description')), + 'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str), + 'timestamp': parse_iso8601(chan.get('updatedAt')), + 'uploader': chan.get('token') or try_get( + chan, lambda x: x['user']['username'], compat_str), + 'uploader_id': compat_str(user_id) if user_id else None, + 'age_limit': self._RATINGS.get(chan.get('audience')), + 'is_live': True, + 'view_count': int_or_none(chan.get('viewersTotal')), + 'formats': formats, } From 6f0be937473c5d5f60cd8e712287fcee844093d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 06:09:32 +0700 Subject: [PATCH 018/195] [YoutubeDL] Improve protocol auto determining (closes #11720) --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5d654f55f..41d9a63ee 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1363,7 +1363,7 @@ class YoutubeDL(object): format['ext'] = determine_ext(format['url']).lower() # Automatically determine protocol if missing (useful for format # selection purposes) - if 'protocol' not in format: + if format.get('protocol') is None: format['protocol'] = determine_protocol(format) # Add HTTP headers, so that external programs can use them from the # json output From a7acf868a55b3d734bef564e3392020f18c20422 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 10:34:39 +0700 Subject: [PATCH 019/195] [yourupload] Fix extraction (closes #11601) --- youtube_dl/extractor/yourupload.py | 49 +++++++++++++----------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/yourupload.py b/youtube_dl/extractor/yourupload.py index 4ce327845..9fa772838 100644 --- a/youtube_dl/extractor/yourupload.py +++ b/youtube_dl/extractor/yourupload.py @@ -2,44 +2,37 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import urljoin class YourUploadIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)? - (?:yourupload\.com/watch| - embed\.yourupload\.com| - embed\.yucache\.net - )/(?P[A-Za-z0-9]+) - ''' - _TESTS = [ - { - 'url': 'http://yourupload.com/watch/14i14h', - 'md5': '5e2c63385454c557f97c4c4131a393cd', - 'info_dict': { - 'id': '14i14h', - 'ext': 'mp4', - 'title': 'BigBuckBunny_320x180.mp4', - 'thumbnail': r're:^https?://.*\.jpe?g', - } - }, - { - 'url': 'http://embed.yourupload.com/14i14h', - 'only_matching': True, - }, - { - 'url': 'http://embed.yucache.net/14i14h?client_file_id=803349', - 'only_matching': True, - }, - ] + _VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P[A-Za-z0-9]+)' + _TESTS = [{ + 'url': 'http://yourupload.com/watch/14i14h', + 'md5': '5e2c63385454c557f97c4c4131a393cd', + 'info_dict': { + 'id': '14i14h', + 'ext': 'mp4', + 'title': 'BigBuckBunny_320x180.mp4', + 'thumbnail': r're:^https?://.*\.jpe?g', + } + }, { + 'url': 'http://www.yourupload.com/embed/14i14h', + 'only_matching': True, + }, { + 'url': 'http://embed.yourupload.com/14i14h', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - embed_url = 'http://embed.yucache.net/{0:}'.format(video_id) + embed_url = 'http://www.yourupload.com/embed/%s' % video_id + webpage = self._download_webpage(embed_url, video_id) title = self._og_search_title(webpage) - video_url = self._og_search_video_url(webpage) + video_url = urljoin(embed_url, self._og_search_video_url(webpage)) thumbnail = self._og_search_thumbnail(webpage, default=None) return { From 8e4988f1a21184839dcd23d7133c250a43c5ea58 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 15 Jan 2017 22:10:57 +0800 Subject: [PATCH 020/195] [niconico] Remove codes for downloading anonymously Apparently Niconico now blocks playing without an account Closes #11170 --- youtube_dl/extractor/niconico.py | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index a104e33f8..7e6c594c8 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -7,7 +7,6 @@ import datetime from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -40,6 +39,7 @@ class NiconicoIE(InfoExtractor): 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', 'duration': 33, }, + 'skip': 'Requires an account', }, { # File downloaded with and without credentials are different, so omit # the md5 field @@ -55,6 +55,7 @@ class NiconicoIE(InfoExtractor): 'timestamp': 1304065916, 'duration': 209, }, + 'skip': 'Requires an account', }, { # 'video exists but is marked as "deleted" # md5 is unstable @@ -65,9 +66,10 @@ class NiconicoIE(InfoExtractor): 'description': 'deleted', 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>', 'upload_date': '20071224', - 'timestamp': 1198527840, # timestamp field has different value if logged in + 'timestamp': int, # timestamp field has different value if logged in 'duration': 304, }, + 'skip': 'Requires an account', }, { 'url': 'http://www.nicovideo.jp/watch/so22543406', 'info_dict': { @@ -79,7 +81,8 @@ class NiconicoIE(InfoExtractor): 'upload_date': '20140104', 'uploader': 'アニメロチャンネル', 'uploader_id': '312', - } + }, + 'skip': 'The viewing period of the video you were searching for has expired.', }] _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P(?:[a-z]{2})?[0-9]+)' @@ -134,23 +137,7 @@ class NiconicoIE(InfoExtractor): 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', video_id, 'Downloading flv info') else: - # Get external player info - ext_player_info = self._download_webpage( - 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id) - thumb_play_key = self._search_regex( - r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') - - # Get flv info - flv_info_data = compat_urllib_parse_urlencode({ - 'k': thumb_play_key, - 'v': video_id - }) - flv_info_request = sanitized_Request( - 'http://ext.nicovideo.jp/thumb_watch', flv_info_data, - {'Content-Type': 'application/x-www-form-urlencoded'}) - flv_info_webpage = self._download_webpage( - flv_info_request, video_id, - note='Downloading flv info', errnote='Unable to download flv info') + raise ExtractorError('Niconico videos now require logging in', expected=True) flv_info = compat_urlparse.parse_qs(flv_info_webpage) if 'url' not in flv_info: From dcae7b3fdc6e6812e78c8dba96d671ccf0ab068e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 15 Jan 2017 22:51:54 +0800 Subject: [PATCH 021/195] [niconico] Allow login via cookies Some codes are borrowed from #7968, which is by @jlhg Closes #7968 --- ChangeLog | 5 +++++ youtube_dl/extractor/niconico.py | 18 +++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index dba18d39b..029d13426 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version + +Extractors ++ [niconico] Support login via cookies (#7968) + version 2017.01.14 Core diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 7e6c594c8..8baac23e4 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -87,8 +87,6 @@ class NiconicoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P(?:[a-z]{2})?[0-9]+)' _NETRC_MACHINE = 'niconico' - # Determine whether the downloader used authentication to download video - _AUTHENTICATED = False def _real_initialize(self): self._login() @@ -112,8 +110,6 @@ class NiconicoIE(InfoExtractor): if re.search(r'(?i)

Log in error

', login_results) is not None: self._downloader.report_warning('unable to log in: bad username or password') return False - # Successful login - self._AUTHENTICATED = True return True def _real_extract(self, url): @@ -131,19 +127,19 @@ class NiconicoIE(InfoExtractor): 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, note='Downloading video info page') - if self._AUTHENTICATED: - # Get flv info - flv_info_webpage = self._download_webpage( - 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', - video_id, 'Downloading flv info') - else: - raise ExtractorError('Niconico videos now require logging in', expected=True) + # Get flv info + flv_info_webpage = self._download_webpage( + 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', + video_id, 'Downloading flv info') flv_info = compat_urlparse.parse_qs(flv_info_webpage) if 'url' not in flv_info: if 'deleted' in flv_info: raise ExtractorError('The video has been deleted.', expected=True) + elif 'closed' in flv_info: + raise ExtractorError('Niconico videos now require logging in', + expected=True) else: raise ExtractorError('Unable to find video URL') From 16e2c8f7710bffb462921dbc93adfa6274bd9334 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 16 Jan 2017 00:06:52 +0800 Subject: [PATCH 022/195] [brightcove] Recognize another player ID Closes #11688 --- ChangeLog | 1 + youtube_dl/extractor/brightcove.py | 2 +- youtube_dl/extractor/generic.py | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 029d13426..2e0ddd4f6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [brightcove] Recognize another player ID pattern (#11688) + [niconico] Support login via cookies (#7968) version 2017.01.14 diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 2e56d1df9..5c6e99da1 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -179,7 +179,7 @@ class BrightcoveLegacyIE(InfoExtractor): params = {} - playerID = find_param('playerID') + playerID = find_param('playerID') or find_param('playerId') if playerID is None: raise ExtractorError('Cannot find player ID') params['playerID'] = playerID diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ac29ec600..a3ac7d26b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -422,6 +422,26 @@ class GenericIE(InfoExtractor): 'skip_download': True, # m3u8 download }, }, + { + # Brightcove with alternative playerID key + 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html', + 'info_dict': { + 'id': 'nmeth.2062_SV1', + 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research', + }, + 'playlist': [{ + 'info_dict': { + 'id': '2228375078001', + 'ext': 'mp4', + 'title': 'nmeth.2062-sv1', + 'description': 'nmeth.2062-sv1', + 'timestamp': 1363357591, + 'upload_date': '20130315', + 'uploader': 'Nature Publishing Group', + 'uploader_id': '1964492299001', + }, + }], + }, # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', From 906420cae37ee3c2f48d23c3a4fa0543a66947d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 21:54:47 +0700 Subject: [PATCH 023/195] [limelight] Improve and make more robust (closes #11737) + Add support for direct http for videos hosted on video.llnw.net * Check handmade http URLs --- youtube_dl/extractor/limelight.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 905a0e85f..e635f3c4d 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -59,14 +59,26 @@ class LimelightBaseIE(InfoExtractor): format_id = 'rtmp' if stream.get('videoBitRate'): format_id += '-%d' % int_or_none(stream['videoBitRate']) - http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:] - urls.append(http_url) - http_fmt = fmt.copy() - http_fmt.update({ - 'url': http_url, - 'format_id': format_id.replace('rtmp', 'http'), - }) - formats.append(http_fmt) + http_format_id = format_id.replace('rtmp', 'http') + + CDN_HOSTS = ( + ('delvenetworks.com', 'cpl.delvenetworks.com'), + ('video.llnw.net', 's2.content.video.llnw.net'), + ) + for cdn_host, http_host in CDN_HOSTS: + if cdn_host not in rtmp.group('host').lower(): + continue + http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:]) + urls.append(http_url) + if self._is_valid_url(http_url, video_id, http_format_id): + http_fmt = fmt.copy() + http_fmt.update({ + 'url': http_url, + 'format_id': http_format_id, + }) + formats.append(http_fmt) + break + fmt.update({ 'url': rtmp.group('url'), 'play_path': rtmp.group('playpath'), From 0ce8c66fb05fefbe51ac1eca8d3ddbd561b38a54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 22:07:12 +0700 Subject: [PATCH 024/195] [options] Include custom conf in final argv (closes #11741) --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 0eb4924b6..0b8c1671d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -867,7 +867,7 @@ def parseOpts(overrideArguments=None): if '--ignore-config' not in system_conf: user_conf = _readUserConf() - argv = system_conf + user_conf + command_line_conf + argv = system_conf + user_conf + custom_conf + command_line_conf opts, args = parser.parse_args(argv) if opts.verbose: for conf_label, conf in ( From 79fc8496c6ab423d591f9ed1a41358d038242bbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:31:50 +0700 Subject: [PATCH 025/195] [xiami] Improve extraction (closes #11699) * Relax _VALID_URLs * Improve track metadata extraction --- youtube_dl/extractor/xiami.py | 53 +++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index 86abef257..d017e03de 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -16,7 +16,9 @@ class XiamiBaseIE(InfoExtractor): return webpage def _extract_track(self, track, track_id=None): - title = track['title'] + track_name = track.get('songName') or track.get('name') or track['subName'] + artist = track.get('artist') or track.get('artist_name') or track.get('singers') + title = '%s - %s' % (artist, track_name) if artist else track_name track_url = self._decrypt(track['location']) subtitles = {} @@ -31,9 +33,10 @@ class XiamiBaseIE(InfoExtractor): 'thumbnail': track.get('pic') or track.get('album_pic'), 'duration': int_or_none(track.get('length')), 'creator': track.get('artist', '').split(';')[0], - 'track': title, - 'album': track.get('album_name'), - 'artist': track.get('artist'), + 'track': track_name, + 'track_number': int_or_none(track.get('track')), + 'album': track.get('album_name') or track.get('title'), + 'artist': artist, 'subtitles': subtitles, } @@ -68,14 +71,14 @@ class XiamiBaseIE(InfoExtractor): class XiamiSongIE(XiamiBaseIE): IE_NAME = 'xiami:song' IE_DESC = '虾米音乐' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.xiami.com/song/1775610518', 'md5': '521dd6bea40fd5c9c69f913c232cb57e', 'info_dict': { 'id': '1775610518', 'ext': 'mp3', - 'title': 'Woman', + 'title': 'HONNE - Woman', 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', 'duration': 265, 'creator': 'HONNE', @@ -95,7 +98,7 @@ class XiamiSongIE(XiamiBaseIE): 'info_dict': { 'id': '1775256504', 'ext': 'mp3', - 'title': '悟空', + 'title': '戴荃 - 悟空', 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', 'duration': 200, 'creator': '戴荃', @@ -109,6 +112,26 @@ class XiamiSongIE(XiamiBaseIE): }, }, 'skip': 'Georestricted', + }, { + 'url': 'http://www.xiami.com/song/1775953850', + 'info_dict': { + 'id': '1775953850', + 'ext': 'mp3', + 'title': 'До Скону - Чума Пожирает Землю', + 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', + 'duration': 683, + 'creator': 'До Скону', + 'track': 'Чума Пожирает Землю', + 'track_number': 7, + 'album': 'Ад', + 'artist': 'До Скону', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.xiami.com/song/xLHGwgd07a1', + 'only_matching': True, }] def _real_extract(self, url): @@ -124,7 +147,7 @@ class XiamiPlaylistBaseIE(XiamiBaseIE): class XiamiAlbumIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:album' IE_DESC = '虾米音乐 - 专辑' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P[^/?#&]+)' _TYPE = '1' _TESTS = [{ 'url': 'http://www.xiami.com/album/2100300444', @@ -136,28 +159,34 @@ class XiamiAlbumIE(XiamiPlaylistBaseIE): }, { 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', 'only_matching': True, + }, { + 'url': 'http://www.xiami.com/album/URVDji2a506', + 'only_matching': True, }] class XiamiArtistIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:artist' IE_DESC = '虾米音乐 - 歌手' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P[^/?#&]+)' _TYPE = '2' - _TEST = { + _TESTS = [{ 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp', 'info_dict': { 'id': '2132', }, 'playlist_count': 20, 'skip': 'Georestricted', - } + }, { + 'url': 'http://www.xiami.com/artist/bC5Tk2K6eb99', + 'only_matching': True, + }] class XiamiCollectionIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:collection' IE_DESC = '虾米音乐 - 精选集' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P[^/?#&]+)' _TYPE = '3' _TEST = { 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr', From ddd53c392e0b3d3d2c62ba28117a9b07702c5bd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:42:04 +0700 Subject: [PATCH 026/195] [ChangeLog] Actualize --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2e0ddd4f6..ee59e120c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,22 @@ version +Core +* [options] Apply custom config to final composite configuration (#11741) +* [YoutubeDL] Improve protocol auto determining (#11720) + Extractors +* [xiami] Relax URL regular expressions +* [xiami] Improve track metadata extraction (#11699) ++ [limelight] Check hand-make direct HTTP links ++ [limelight] Add support for direct HTTP links at video.llnw.net (#11737) + [brightcove] Recognize another player ID pattern (#11688) + [niconico] Support login via cookies (#7968) +* [yourupload] Fix extraction (#11601) ++ [beam:live] Add support for beam.pro live streams (#10702, #11596) +* [vevo] Improve geo restriction detection ++ [dramafever] Add support for URLs with language code (#11714) +* [cbc] Improve playlist support (#11704) + version 2017.01.14 From c1c2fe2045911c310fd5d2eda7bbb53ad581d250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:44:04 +0700 Subject: [PATCH 027/195] release 2017.01.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a7bf2b90c..c04f6246a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.14 +[debug] youtube-dl version 2017.01.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index ee59e120c..f6d73f982 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.16 Core * [options] Apply custom config to final composite configuration (#11741) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 0f6c4ec0c..a3c76d5db 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -86,6 +86,7 @@ - **bbc.co.uk:article**: BBC articles - **bbc.co.uk:iplayer:playlist** - **bbc.co.uk:playlist** + - **Beam:live** - **Beatport** - **Beeg** - **BehindKink** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 17c6f9eb2..c20718dd6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.14' +__version__ = '2017.01.16' From c0bd51c090d617811f5e405294dce06f5871d717 Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi Date: Mon, 16 Jan 2017 22:19:52 +0300 Subject: [PATCH 028/195] [naver] Support tv.naver.com links --- youtube_dl/extractor/naver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 055070ff5..aba0a9a70 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -12,7 +12,7 @@ from ..utils import ( class NaverIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P\d+)' + _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P\d+)' _TESTS = [{ 'url': 'http://tvcast.naver.com/v/81652', From 8a5f0a6357746d293f7330e40a3cf5823b1b626d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 17 Jan 2017 21:19:57 +0700 Subject: [PATCH 029/195] [naver] Update tests for #11743 --- youtube_dl/extractor/naver.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index aba0a9a70..e8131333f 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -15,7 +15,7 @@ class NaverIE(InfoExtractor): _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P\d+)' _TESTS = [{ - 'url': 'http://tvcast.naver.com/v/81652', + 'url': 'http://tv.naver.com/v/81652', 'info_dict': { 'id': '81652', 'ext': 'mp4', @@ -24,7 +24,7 @@ class NaverIE(InfoExtractor): 'upload_date': '20130903', }, }, { - 'url': 'http://tvcast.naver.com/v/395837', + 'url': 'http://tv.naver.com/v/395837', 'md5': '638ed4c12012c458fefcddfd01f173cd', 'info_dict': { 'id': '395837', @@ -34,6 +34,9 @@ class NaverIE(InfoExtractor): 'upload_date': '20150519', }, 'skip': 'Georestricted', + }, { + 'url': 'http://tvcast.naver.com/v/81652', + 'only_matching': True, }] def _real_extract(self, url): From 136078966b2047b21e9784060cebdc893c643ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 17 Jan 2017 23:14:07 +0700 Subject: [PATCH 030/195] [imdb] Extend _VALID_URL (closes #11744) --- youtube_dl/extractor/imdb.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index f0fc8d49a..f95c00c73 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -13,7 +13,7 @@ from ..utils import ( class ImdbIE(InfoExtractor): IE_NAME = 'imdb' IE_DESC = 'Internet Movie Database trailers' - _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P\d+)' + _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-|videoplayer/)vi(?P\d+)' _TESTS = [{ 'url': 'http://www.imdb.com/video/imdb/vi2524815897', @@ -32,6 +32,9 @@ class ImdbIE(InfoExtractor): }, { 'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897', 'only_matching': True, + }, { + 'url': 'http://www.imdb.com/videoplayer/vi1562949145', + 'only_matching': True, }] def _real_extract(self, url): From 4e44598547b02d42aa628506245c40c3d633814e Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Mon, 9 Jan 2017 21:19:55 +0100 Subject: [PATCH 031/195] [20min] Fix extraction --- youtube_dl/extractor/twentymin.py | 37 ++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py index b721ecb0a..68d5a0cb5 100644 --- a/youtube_dl/extractor/twentymin.py +++ b/youtube_dl/extractor/twentymin.py @@ -13,10 +13,10 @@ class TwentyMinutenIE(InfoExtractor): _TESTS = [{ # regular video 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', - 'md5': 'b52d6bc6ea6398e6a38f12cfd418149c', + 'md5': 'e7264320db31eed8c38364150c12496e', 'info_dict': { 'id': '469148', - 'ext': 'flv', + 'ext': 'mp4', 'title': '85 000 Franken für 15 perfekte Minuten', 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' @@ -34,17 +34,29 @@ class TwentyMinutenIE(InfoExtractor): 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' }, 'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.', + }, { + # news article with video + 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'md5': '372917ba85ed969e176d287ae54b2f94', + 'info_dict': { + 'id': '523629', + 'display_id': 'So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'ext': 'mp4', + 'title': 'So kommen Sie bei Eis und Schnee sicher an', + 'description': 'Schneegestöber und Glatteis führten in den letzten Tagen zu zahlreichen Strassenunfällen. Ein Experte erklärt, worauf man nun beim Autofahren achten muss.', + 'thumbnail': 'http://www.20min.ch/images/content/2/7/0/27032552/83/teaserbreit.jpg', + } }, { # YouTube embed 'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184', - 'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f', + 'md5': 'e7e237fd98da2a3cc1422ce683df234d', 'info_dict': { 'id': 'ivM7A7SpDOs', 'ext': 'mp4', 'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016', 'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a', 'upload_date': '20160424', - 'uploader': 'RTVCM Castilla-La Mancha', + 'uploader': 'CMM Castilla-La Mancha Media', 'uploader_id': 'RTVCM', }, 'add_ie': ['Youtube'], @@ -77,18 +89,31 @@ class TwentyMinutenIE(InfoExtractor): r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') if not video_id: + params = self._html_search_regex( + r']+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"', + webpage, '20min embed URL') video_id = self._search_regex( - r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id') + r'.*videoId@(\d+)', + params, 'Video Id') description = self._html_search_meta( 'description', webpage, 'description') thumbnail = self._og_search_thumbnail(webpage) + formats = [] + format_preferences = [('sd', ''), ('hd', 'h')] + for format_id, url_extension in format_preferences: + format_url = 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, url_extension) + formats.append({ + 'format_id': format_id, + 'url': format_url, + }) + return { 'id': video_id, 'display_id': display_id, - 'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, + 'formats': formats, } From 538b17a09c6546d58babc5eb4a3abc08dcff2d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:05:11 +0700 Subject: [PATCH 032/195] [20min] Improve --- youtube_dl/extractor/twentymin.py | 122 ++++++++++++------------------ 1 file changed, 47 insertions(+), 75 deletions(-) diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py index 68d5a0cb5..4fd1aa4bf 100644 --- a/youtube_dl/extractor/twentymin.py +++ b/youtube_dl/extractor/twentymin.py @@ -4,116 +4,88 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import remove_end +from ..utils import ( + int_or_none, + try_get, +) class TwentyMinutenIE(InfoExtractor): IE_NAME = '20min' - _VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P\d+)|(?:[^/]+/)*(?P[^/#?]+))' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?20min\.ch/ + (?: + videotv/*\?.*?\bvid=| + videoplayer/videoplayer\.html\?.*?\bvideoId@ + ) + (?P\d+) + ''' _TESTS = [{ - # regular video 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', 'md5': 'e7264320db31eed8c38364150c12496e', 'info_dict': { 'id': '469148', 'ext': 'mp4', 'title': '85 000 Franken für 15 perfekte Minuten', - 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', - 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' - } - }, { - # news article with video - 'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469', - 'md5': 'cd4cbb99b94130cff423e967cd275e5e', - 'info_dict': { - 'id': '469408', - 'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469', - 'ext': 'flv', - 'title': '«Wir müssen mutig nach vorne schauen»', - 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.', - 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' + 'thumbnail': r're:https?://.*\.jpg$', }, - 'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.', }, { - # news article with video - 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', - 'md5': '372917ba85ed969e176d287ae54b2f94', + 'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629', 'info_dict': { 'id': '523629', - 'display_id': 'So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', 'ext': 'mp4', 'title': 'So kommen Sie bei Eis und Schnee sicher an', - 'description': 'Schneegestöber und Glatteis führten in den letzten Tagen zu zahlreichen Strassenunfällen. Ein Experte erklärt, worauf man nun beim Autofahren achten muss.', - 'thumbnail': 'http://www.20min.ch/images/content/2/7/0/27032552/83/teaserbreit.jpg', - } - }, { - # YouTube embed - 'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184', - 'md5': 'e7e237fd98da2a3cc1422ce683df234d', - 'info_dict': { - 'id': 'ivM7A7SpDOs', - 'ext': 'mp4', - 'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016', - 'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a', - 'upload_date': '20160424', - 'uploader': 'CMM Castilla-La Mancha Media', - 'uploader_id': 'RTVCM', + 'description': 'md5:117c212f64b25e3d95747e5276863f7d', + 'thumbnail': r're:https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, }, - 'add_ie': ['Youtube'], }, { 'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738', 'only_matching': True, - }, { - 'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411', - 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [m.group('url') for m in re.finditer( + r']+src=(["\'])(?P(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1', + webpage)] + def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id + video_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + video = self._download_json( + 'http://api.20min.ch/video/%s/show' % video_id, + video_id)['content'] - youtube_url = self._html_search_regex( - r']+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"', - webpage, 'YouTube embed URL', default=None) - if youtube_url is not None: - return self.url_result(youtube_url, 'Youtube') + title = video['title'] - title = self._html_search_regex( - r'

.*?(.+?)

', - webpage, 'title', default=None) - if not title: - title = remove_end(re.sub( - r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') + formats = [{ + 'format_id': format_id, + 'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p), + 'quality': quality, + } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])] + self._sort_formats(formats) - if not video_id: - params = self._html_search_regex( - r']+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"', - webpage, '20min embed URL') - video_id = self._search_regex( - r'.*videoId@(\d+)', - params, 'Video Id') + description = video.get('lead') + thumbnail = video.get('thumbnail') - description = self._html_search_meta( - 'description', webpage, 'description') - thumbnail = self._og_search_thumbnail(webpage) + def extract_count(kind): + return try_get( + video, + lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind])) - formats = [] - format_preferences = [('sd', ''), ('hd', 'h')] - for format_id, url_extension in format_preferences: - format_url = 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, url_extension) - formats.append({ - 'format_id': format_id, - 'url': format_url, - }) + like_count = extract_count('up') + dislike_count = extract_count('down') return { 'id': video_id, - 'display_id': display_id, 'title': title, 'description': description, 'thumbnail': thumbnail, + 'like_count': like_count, + 'dislike_count': dislike_count, 'formats': formats, } From b687c85eab942553e925256ad10de693227ba553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:08:31 +0700 Subject: [PATCH 033/195] [extractor/generic] Add support for 20 minuten embeds (closes #11683, closes #11751) --- youtube_dl/extractor/generic.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a3ac7d26b..154545df7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -78,6 +78,7 @@ from .vbox7 import Vbox7IE from .dbtv import DBTVIE from .piksel import PikselIE from .videa import VideaIE +from .twentymin import TwentyMinutenIE class GenericIE(InfoExtractor): @@ -1468,6 +1469,20 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 2, }, + { + # 20 minuten embed + 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'info_dict': { + 'id': '523629', + 'ext': 'mp4', + 'title': 'So kommen Sie bei Eis und Schnee sicher an', + 'description': 'md5:117c212f64b25e3d95747e5276863f7d', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [TwentyMinutenIE.ie_key()], + } # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2421,6 +2436,12 @@ class GenericIE(InfoExtractor): if videa_urls: return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key()) + # Look for 20 minuten embeds + twentymin_urls = TwentyMinutenIE._extract_urls(webpage) + if twentymin_urls: + return _playlist_from_matches( + twentymin_urls, ie=TwentyMinutenIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') From aaf2b7c57a3d2dc9ba12f1aa401cba088e114916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:20:11 +0700 Subject: [PATCH 034/195] [canalplus] Add fallback for video id (closes #11764) --- youtube_dl/extractor/canalplus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 10cf165bc..b3f76a7b1 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -107,7 +107,7 @@ class CanalplusIE(InfoExtractor): [r']+?videoId=(["\'])(?P\d+)', r'id=["\']canal_video_player(?P\d+)', r'data-video=["\'](?P\d+)'], - webpage, 'video id', group='id') + webpage, 'video id', default=mobj.group('vid'), group='id') info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) video_data = self._download_json(info_url, video_id, 'Downloading video JSON') From baa3e1845b26d9756642325bbb0d58e22025b2ec Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 18 Jan 2017 17:00:15 +0100 Subject: [PATCH 035/195] [bilibili] fix extraction(closes #11077) --- youtube_dl/extractor/bilibili.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 5051934ef..85ea5e6ee 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -34,8 +34,8 @@ class BiliBiliIE(InfoExtractor): }, } - _APP_KEY = '6f90a59ac58a4123' - _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326' + _APP_KEY = '84956560bc028eb7' + _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' def _real_extract(self, url): video_id = self._match_id(url) From 460f61fac42592eb273b7d58efc314cc83687b8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:06:46 +0700 Subject: [PATCH 036/195] [ChangeLog] Actualize --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index f6d73f982..994895edc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +version + +Extractors +* [bilibili] Fix extraction (#11077) ++ [canalplus] Add fallback for video id (#11764) +* [20min] Fix extraction (#11683, #11751) +* [imdb] Extend URL regular expression (#11744) ++ [naver] Add support for tv.naver.com links (#11743) + + version 2017.01.16 Core From 1560baacc677c43c1007acfc89b8190f81a59684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:10:00 +0700 Subject: [PATCH 037/195] release 2017.01.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c04f6246a..38cb13a33 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.16 +[debug] youtube-dl version 2017.01.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 994895edc..5aa4e3c6b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.18 Extractors * [bilibili] Fix extraction (#11077) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c20718dd6..669f60f65 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.16' +__version__ = '2017.01.18' From f1e70fc2ff6f1536873ed73ffc9bff63653fd5ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:34:11 +0700 Subject: [PATCH 038/195] [mtv] Relax triforce feed regex (closes #11766) --- youtube_dl/extractor/mtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 00a980c7d..e48ea2481 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -211,7 +211,7 @@ class MTVServicesInfoExtractor(InfoExtractor): def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): triforce_feed = self._parse_json(self._search_regex( - r'triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, + r'triforceManifestFeed\s*=\s*({.+?})\s*;\s*\n', webpage, 'triforce feed', default='{}'), video_id, fatal=False) data_zone = self._search_regex( From eb3f008c9e686f38c50511004d5c9a51b2e8cdd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 19 Jan 2017 04:49:31 +0700 Subject: [PATCH 039/195] [uol] Fix extraction (closes #11770) --- youtube_dl/extractor/uol.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/uol.py b/youtube_dl/extractor/uol.py index c27c64387..e67083004 100644 --- a/youtube_dl/extractor/uol.py +++ b/youtube_dl/extractor/uol.py @@ -84,12 +84,27 @@ class UOLIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - if not video_id.isdigit(): - embed_page = self._download_webpage('https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, video_id) - video_id = self._search_regex(r'mediaId=(\d+)', embed_page, 'media id') + media_id = None + + if video_id.isdigit(): + media_id = video_id + + if not media_id: + embed_page = self._download_webpage( + 'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, + video_id, 'Downloading embed page', fatal=False) + if embed_page: + media_id = self._search_regex( + (r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'), + embed_page, 'media id', default=None) + + if not media_id: + webpage = self._download_webpage(url, video_id) + media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id') + video_data = self._download_json( - 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % video_id, - video_id)['item'] + 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id, + media_id)['item'] title = video_data['title'] query = { @@ -118,7 +133,7 @@ class UOLIE(InfoExtractor): tags.append(tag_description) return { - 'id': video_id, + 'id': media_id, 'title': title, 'description': clean_html(video_data.get('desMedia')), 'thumbnail': video_data.get('thumbnail'), From cccd70a2752ad079ed560e42ff085adcabebaac2 Mon Sep 17 00:00:00 2001 From: james mike dupont Date: Thu, 19 Jan 2017 04:18:13 -0500 Subject: [PATCH 040/195] untie --- youtube_dl/extractor/flipagram.py | 2 +- youtube_dl/extractor/vimeo.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py index 1902a2393..b7be40f1b 100644 --- a/youtube_dl/extractor/flipagram.py +++ b/youtube_dl/extractor/flipagram.py @@ -81,7 +81,7 @@ class FlipagramIE(InfoExtractor): 'filesize': int_or_none(cover.get('size')), } for cover in flipagram.get('covers', []) if cover.get('url')] - # Note that this only retrieves comments that are initally loaded. + # Note that this only retrieves comments that are initially loaded. # For videos with large amounts of comments, most won't be retrieved. comments = [] for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []): diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 2e98b0e6f..add753635 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -338,7 +338,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'expected_warnings': ['Unable to download JSON metadata'], }, { - # redirects to ondemand extractor and should be passed throught it + # redirects to ondemand extractor and should be passed through it # for successful extraction 'url': 'https://vimeo.com/73445910', 'info_dict': { From 1fe84be0f3b36822af804db6cf7c06a1ac5ac688 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 Jan 2017 00:47:04 +0700 Subject: [PATCH 041/195] [1tv] Add support for hls (closes #11786) --- youtube_dl/extractor/firsttv.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index c6fb67057..081c71842 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -86,18 +86,43 @@ class FirstTVIE(InfoExtractor): title = item['title'] quality = qualities(QUALITIES) formats = [] + path = None for f in item.get('mbr', []): src = f.get('src') if not src or not isinstance(src, compat_str): continue tbr = int_or_none(self._search_regex( r'_(\d{3,})\.mp4', src, 'tbr', default=None)) + if not path: + path = self._search_regex( + r'//[^/]+/(.+?)_\d+\.mp4', src, + 'm3u8 path', default=None) formats.append({ 'url': src, 'format_id': f.get('name'), 'tbr': tbr, - 'quality': quality(f.get('name')), + 'source_preference': quality(f.get('name')), }) + # m3u8 URL format is reverse engineered from [1] (search for + # master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru) + # is taken from [2]. + # 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted + # 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834 + if not path and len(formats) == 1: + path = self._search_regex( + r'//[^/]+/(.+?$)', formats[0]['url'], + 'm3u8 path', default=None) + if path: + if len(formats) == 1: + m3u8_path = ',' + else: + tbrs = [compat_str(t) for t in sorted(f['tbr'] for f in formats)] + m3u8_path = '_,%s,%s' % (','.join(tbrs), '.mp4') + formats.extend(self._extract_m3u8_formats( + 'http://balancer-vod.1tv.ru/%s%s.urlset/master.m3u8' + % (path, m3u8_path), + display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) self._sort_formats(formats) thumbnail = item.get('poster') or self._og_search_thumbnail(webpage) From d77ac737900eede5e1508b9822e71c8595fe0879 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 21:59:24 +0800 Subject: [PATCH 042/195] [ustream] Add UstreamIE._extract_url() Ref: #11547 --- youtube_dl/extractor/generic.py | 8 ++++---- youtube_dl/extractor/ustream.py | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 154545df7..a7c104845 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -79,6 +79,7 @@ from .dbtv import DBTVIE from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE +from .ustream import UstreamIE class GenericIE(InfoExtractor): @@ -2112,10 +2113,9 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'TED') # Look for embedded Ustream videos - mobj = re.search( - r']+?src=(["\'])(?Phttp://www\.ustream\.tv/embed/.+?)\1', webpage) - if mobj is not None: - return self.url_result(mobj.group('url'), 'Ustream') + ustream_url = UstreamIE._extract_url(webpage) + if ustream_url: + return self.url_result(ustream_url, UstreamIE.ie_key()) # Look for embedded arte.tv player mobj = re.search( diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 0c06bf36b..5737d4d16 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -69,6 +69,13 @@ class UstreamIE(InfoExtractor): }, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+?src=(["\'])(?Phttp://www\.ustream\.tv/embed/.+?)\1', webpage) + if mobj is not None: + return mobj.group('url') + def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None): def num_to_hex(n): return hex(n)[2:] From 4447fb23320b9214ab3188717794d00b18887617 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:11:43 +0800 Subject: [PATCH 043/195] [cspan] Support Ustream embedded videos Closes #11547 --- ChangeLog | 6 ++++++ youtube_dl/extractor/cspan.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5aa4e3c6b..217971ec6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [cspan] Support Ustream embedded videos (#11547) + + version 2017.01.18 Extractors diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 7e5d4f227..92a827a4b 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -12,6 +12,7 @@ from ..utils import ( ExtractorError, ) from .senateisvp import SenateISVPIE +from .ustream import UstreamIE class CSpanIE(InfoExtractor): @@ -57,12 +58,30 @@ class CSpanIE(InfoExtractor): 'params': { 'skip_download': True, # m3u8 downloads } + }, { + # Ustream embedded video + 'url': 'https://www.c-span.org/video/?114917-1/armed-services', + 'info_dict': { + 'id': '58428542', + 'ext': 'flv', + 'title': 'USHR07 Armed Services Committee', + 'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee', + 'timestamp': 1423060374, + 'upload_date': '20150204', + 'uploader': 'HouseCommittee', + 'uploader_id': '12987475', + }, }] def _real_extract(self, url): video_id = self._match_id(url) video_type = None webpage = self._download_webpage(url, video_id) + + ustream_url = UstreamIE._extract_url(webpage) + if ustream_url: + return self.url_result(ustream_url, UstreamIE.ie_key()) + # We first look for clipid, because clipprog always appears before patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] results = list(filter(None, (re.search(p, webpage) for p in patterns))) From 972efe60c3fdaff83f9b8e7a637ee81f4c27bb64 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:13:54 +0800 Subject: [PATCH 044/195] [generic] Remove a dead test The web page does not contain a video anymore Ref: #2694, #2696 --- youtube_dl/extractor/generic.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a7c104845..40201f311 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -589,17 +589,6 @@ class GenericIE(InfoExtractor): 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9', } }, - # Embedded Ustream video - { - 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm', - 'md5': '27b99cdb639c9b12a79bca876a073417', - 'info_dict': { - 'id': '45734260', - 'ext': 'flv', - 'uploader': 'AU SPA: The NSA and Privacy', - 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman' - } - }, # nowvideo embed hidden behind percent encoding { 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/', From f3c21cb7a7e2d8685f466368e3142739077498cf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:25:20 +0800 Subject: [PATCH 045/195] [cspan] Fix _TESTS --- youtube_dl/extractor/cspan.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 92a827a4b..d4576160b 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -23,14 +23,13 @@ class CSpanIE(InfoExtractor): 'md5': '94b29a4f131ff03d23471dd6f60b6a1d', 'info_dict': { 'id': '315139', - 'ext': 'mp4', 'title': 'Attorney General Eric Holder on Voting Rights Act Decision', - 'description': 'Attorney General Eric Holder speaks to reporters following the Supreme Court decision in [Shelby County v. Holder], in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced.', }, + 'playlist_mincount': 2, 'skip': 'Regularly fails on travis, for unknown reasons', }, { 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models', - 'md5': '8e5fbfabe6ad0f89f3012a7943c1287b', + # md5 is unstable 'info_dict': { 'id': 'c4486943', 'ext': 'mp4', @@ -39,14 +38,11 @@ class CSpanIE(InfoExtractor): } }, { 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall', - 'md5': '2ae5051559169baadba13fc35345ae74', 'info_dict': { 'id': '342759', - 'ext': 'mp4', 'title': 'General Motors Ignition Switch Recall', - 'duration': 14848, - 'description': 'md5:118081aedd24bf1d3b68b3803344e7f3' }, + 'playlist_mincount': 6, }, { # Video from senate.gov 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', From f4ec8dce481564589419e4dffc45437211daa13f Mon Sep 17 00:00:00 2001 From: Iulian Onofrei Date: Fri, 20 Jan 2017 18:25:04 +0200 Subject: [PATCH 046/195] Update README.md (#11787) Add audio format argument dependency warning --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 0b8c1671d..0d2ce8d15 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -751,7 +751,7 @@ def parseOpts(overrideArguments=None): help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') postproc.add_option( '--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default') + help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default; No effect without -x') postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5', From 12afdc2ad617dedfd7d60654b8c57b99604332ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 Jan 2017 18:10:32 +0700 Subject: [PATCH 047/195] [youtube] Extract episode metadata (closes #9695, closes #11774) --- youtube_dl/extractor/youtube.py | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e6b840735..63597dd16 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -864,6 +864,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, + { + # YouTube Red video with episode data + 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4', + 'info_dict': { + 'id': 'iqKdEhx-dD4', + 'ext': 'mp4', + 'title': 'Isolation - Mind Field (Ep 1)', + 'description': 'md5:3a72f23c086a1496c9e2c54a25fa0822', + 'upload_date': '20170118', + 'uploader': 'Vsauce', + 'uploader_id': 'Vsauce', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce', + 'license': 'Standard YouTube License', + 'series': 'Mind Field', + 'season_number': 1, + 'episode_number': 1, + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': [ + 'Skipping DASH manifest', + ], + }, { # itag 212 'url': '1t24XAntNCY', @@ -1454,6 +1478,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: video_alt_title = video_creator = None + m_episode = re.search( + r']+id="watch7-headline"[^>]*>\s*]*>.*?>(?P[^<]+)\s*S(?P\d+)\s*•\s*E(?P\d+)', + video_webpage) + if m_episode: + series = m_episode.group('series') + season_number = int(m_episode.group('season')) + episode_number = int(m_episode.group('episode')) + else: + series = season_number = episode_number = None + m_cat_container = self._search_regex( r'(?s)]*>\s*Category\s*\s*]*>(.*?)', video_webpage, 'categories', default=None) @@ -1743,6 +1777,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'is_live': is_live, 'start_time': start_time, 'end_time': end_time, + 'series': series, + 'season_number': season_number, + 'episode_number': episode_number, } From 04a3d4d23472ffa4a482d8ebf2d8fdbb3e974327 Mon Sep 17 00:00:00 2001 From: ha shao Date: Sat, 21 Jan 2017 15:47:39 +0800 Subject: [PATCH 048/195] [vimeo:channel] Extract videos' titles for playlist entries --- youtube_dl/extractor/vimeo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index add753635..a6bbd4c05 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -730,12 +730,12 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): # Try extracting href first since not all videos are available via # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729) clips = re.findall( - r'id="clip_(\d+)"[^>]*>\s*]+href="(/(?:[^/]+/)*\1)', webpage) + r'id="clip_(\d+)"[^>]*>\s*]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage) if clips: - for video_id, video_url in clips: + for video_id, video_url, video_title in clips: yield self.url_result( compat_urlparse.urljoin(base_url, video_url), - VimeoIE.ie_key(), video_id=video_id) + VimeoIE.ie_key(), video_id=video_id, video_title=video_title) # More relaxed fallback else: for video_id in re.findall(r'id=["\']clip_(\d+)', webpage): From 7c20b7484cc91a4818a98ca8d5b7ef94d5c38fb8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 22 Jan 2017 02:06:34 +0800 Subject: [PATCH 049/195] [nextmedia] Support redirected URLs --- ChangeLog | 1 + youtube_dl/extractor/nextmedia.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 217971ec6..00c8a063f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [nextmedia] Support redirected URLs + [cspan] Support Ustream embedded videos (#11547) diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index c900f232a..626ed8b49 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import parse_iso8601 @@ -30,6 +31,12 @@ class NextMediaIE(InfoExtractor): return self._extract_from_nextmedia_page(news_id, url, page) def _extract_from_nextmedia_page(self, news_id, url, page): + redirection_url = self._search_regex( + r'window\.location\.href\s*=\s*([\'"])(?P(?!\1).+)\1', + page, 'redirection URL', default=None, group='url') + if redirection_url: + return self.url_result(compat_urlparse.urljoin(url, redirection_url)) + title = self._fetch_title(page) video_url = self._search_regex(self._URL_PATTERN, page, 'video url') @@ -93,7 +100,7 @@ class NextMediaActionNewsIE(NextMediaIE): class AppleDailyIE(NextMediaIE): IE_DESC = '臺灣蘋果日報' - _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' + _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', @@ -157,6 +164,10 @@ class AppleDailyIE(NextMediaIE): }, { 'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/', 'only_matching': True, + }, { + # Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694 + 'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694', + 'only_matching': True, }] _URL_PATTERN = r'\{url: \'(.+)\'\}' From e84495cd8d7bdb89bbfe233263bd8ad0b448f8cc Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Sat, 21 Jan 2017 15:23:26 +0100 Subject: [PATCH 050/195] [azmedien] Add extractor (closes #11785) --- youtube_dl/extractor/azmedientv.py | 87 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 4 ++ 2 files changed, 91 insertions(+) create mode 100644 youtube_dl/extractor/azmedientv.py diff --git a/youtube_dl/extractor/azmedientv.py b/youtube_dl/extractor/azmedientv.py new file mode 100644 index 000000000..51d46fb94 --- /dev/null +++ b/youtube_dl/extractor/azmedientv.py @@ -0,0 +1,87 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .kaltura import KalturaIE +from ..utils import get_element_by_class + + +class AZMedienTVIE(InfoExtractor): + IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch videos' + _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+(?:/[0-9]+-segment-(?:[^/#]+#)?|#)|#)(?P[^#]+)' + + _TESTS = [{ + # URL with 'segment' + 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', + 'md5': 'fda85ada1299cee517a622bfbc5f6b66', + 'info_dict': { + 'id': '1_2444peh4', + 'ext': 'mov', + 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', + 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', + 'uploader_id': 'TeleZ?ri', + 'upload_date': '20161218', + 'timestamp': 1482084490, + } + }, { + # URL with 'segment' and fragment: + 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', + 'only_matching': True + }, { + # URL with 'episode' and fragment: + 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', + 'only_matching': True + }, { + # URL with 'show' and fragment: + 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + kaltura_partner_id = self._html_search_regex( + r']+src=["\']https?://www\.kaltura\.com/.*/partner_id/([0-9]+)', + webpage, 'Kaltura partner ID') + kaltura_entry_id = self._html_search_regex( + r']+data-id=["\'](.*?)["\'][^>]+data-slug=["\']%s' % video_id, + webpage, 'Kaltura entry ID') + + return self.url_result( + 'kaltura:%s:%s' % (kaltura_partner_id, kaltura_entry_id), + ie=KalturaIE.ie_key()) + + +class AZMedienTVShowIE(InfoExtractor): + IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch shows' + _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' + + _TESTS = [{ + # URL with 'episode': + 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'info_dict': { + 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'title': 'News', + }, + 'playlist_count': 9, + }, { + # URL with 'show' only: + 'url': 'http://www.telezueri.ch/86-show-talktaeglich', + 'only_matching': True + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + webpage = self._download_webpage(url, show_id) + + title = get_element_by_class('title-block-cell', webpage) + if title: + title = title.strip() + + entries = [self.url_result(m.group('url'), ie=AZMedienTVIE.ie_key()) for m in re.finditer( + r']+data-real=["\'](?P.+?)["\']', webpage)] + + return self.playlist_result( + entries, show_id, title) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9d0610d21..4cfb3c70f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -77,6 +77,10 @@ from .awaan import ( AWAANLiveIE, AWAANSeasonIE, ) +from .azmedientv import ( + AZMedienTVIE, + AZMedienTVShowIE, +) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE From 94629e537f2f6ed80b19e3863456f9ba8073af36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Jan 2017 02:15:20 +0700 Subject: [PATCH 051/195] [azmedien] Improve (closes #11784) --- youtube_dl/extractor/azmedien.py | 132 +++++++++++++++++++++++++++++ youtube_dl/extractor/azmedientv.py | 87 ------------------- youtube_dl/extractor/extractors.py | 6 +- 3 files changed, 135 insertions(+), 90 deletions(-) create mode 100644 youtube_dl/extractor/azmedien.py delete mode 100644 youtube_dl/extractor/azmedientv.py diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py new file mode 100644 index 000000000..059dc6e4b --- /dev/null +++ b/youtube_dl/extractor/azmedien.py @@ -0,0 +1,132 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .kaltura import KalturaIE +from ..utils import ( + get_element_by_class, + strip_or_none, +) + + +class AZMedienBaseIE(InfoExtractor): + def _kaltura_video(self, partner_id, entry_id): + return self.url_result( + 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(), + video_id=entry_id) + + +class AZMedienIE(AZMedienBaseIE): + IE_DESC = 'AZ Medien videos' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + telezueri\.ch| + telebaern\.tv| + telem1\.ch + )/ + [0-9]+-show-[^/\#]+ + (?: + /[0-9]+-episode-[^/\#]+ + (?: + /[0-9]+-segment-(?:[^/\#]+\#)?| + \# + )| + \# + ) + (?P[^\#]+) + ''' + + _TESTS = [{ + # URL with 'segment' + 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', + 'info_dict': { + 'id': '1_2444peh4', + 'ext': 'mov', + 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', + 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', + 'uploader_id': 'TeleZ?ri', + 'upload_date': '20161218', + 'timestamp': 1482084490, + }, + 'params': { + 'skip_download': True, + }, + }, { + # URL with 'segment' and fragment: + 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', + 'only_matching': True + }, { + # URL with 'episode' and fragment: + 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', + 'only_matching': True + }, { + # URL with 'show' and fragment: + 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + partner_id = self._search_regex( + r']+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)', + webpage, 'kaltura partner id') + entry_id = self._html_search_regex( + r']+data-id=(["\'])(?P(?:(?!\1).)+)\1[^>]+data-slug=["\']%s' + % re.escape(video_id), webpage, 'kaltura entry id', group='id') + + return self._kaltura_video(partner_id, entry_id) + + +class AZMedienShowIE(AZMedienBaseIE): + IE_DESC = 'AZ Medien shows' + _VALID_URL = r'https?://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' + + _TESTS = [{ + # URL with 'episode' + 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'info_dict': { + 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'title': 'News - Donnerstag, 15. Dezember 2016', + }, + 'playlist_count': 9, + }, { + # URL with 'show' only + 'url': 'http://www.telezueri.ch/86-show-talktaeglich', + 'only_matching': True + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + webpage = self._download_webpage(url, show_id) + + entries = [] + + partner_id = self._search_regex( + r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)', + webpage, 'kaltura partner id', default=None) + + if partner_id: + entries = [ + self._kaltura_video(partner_id, m.group('id')) + for m in re.finditer( + r'data-id=(["\'])(?P(?:(?!\1).)+)\1', webpage)] + + if not entries: + entries = [ + self.url_result(m.group('url'), ie=AZMedienIE.ie_key()) + for m in re.finditer( + r']+data-real=(["\'])(?Phttp.+?)\1', webpage)] + + title = self._search_regex( + r'episodeShareTitle\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'title', + default=strip_or_none(get_element_by_class( + 'title-block-cell', webpage)), group='title') + + return self.playlist_result(entries, show_id, title) diff --git a/youtube_dl/extractor/azmedientv.py b/youtube_dl/extractor/azmedientv.py deleted file mode 100644 index 51d46fb94..000000000 --- a/youtube_dl/extractor/azmedientv.py +++ /dev/null @@ -1,87 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from .kaltura import KalturaIE -from ..utils import get_element_by_class - - -class AZMedienTVIE(InfoExtractor): - IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch videos' - _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+(?:/[0-9]+-segment-(?:[^/#]+#)?|#)|#)(?P<id>[^#]+)' - - _TESTS = [{ - # URL with 'segment' - 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', - 'md5': 'fda85ada1299cee517a622bfbc5f6b66', - 'info_dict': { - 'id': '1_2444peh4', - 'ext': 'mov', - 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', - 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', - 'uploader_id': 'TeleZ?ri', - 'upload_date': '20161218', - 'timestamp': 1482084490, - } - }, { - # URL with 'segment' and fragment: - 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', - 'only_matching': True - }, { - # URL with 'episode' and fragment: - 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', - 'only_matching': True - }, { - # URL with 'show' and fragment: - 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', - 'only_matching': True - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - kaltura_partner_id = self._html_search_regex( - r'<script[^>]+src=["\']https?://www\.kaltura\.com/.*/partner_id/([0-9]+)', - webpage, 'Kaltura partner ID') - kaltura_entry_id = self._html_search_regex( - r'<a[^>]+data-id=["\'](.*?)["\'][^>]+data-slug=["\']%s' % video_id, - webpage, 'Kaltura entry ID') - - return self.url_result( - 'kaltura:%s:%s' % (kaltura_partner_id, kaltura_entry_id), - ie=KalturaIE.ie_key()) - - -class AZMedienTVShowIE(InfoExtractor): - IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch shows' - _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P<id>[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' - - _TESTS = [{ - # URL with 'episode': - 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', - 'info_dict': { - 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', - 'title': 'News', - }, - 'playlist_count': 9, - }, { - # URL with 'show' only: - 'url': 'http://www.telezueri.ch/86-show-talktaeglich', - 'only_matching': True - }] - - def _real_extract(self, url): - show_id = self._match_id(url) - webpage = self._download_webpage(url, show_id) - - title = get_element_by_class('title-block-cell', webpage) - if title: - title = title.strip() - - entries = [self.url_result(m.group('url'), ie=AZMedienTVIE.ie_key()) for m in re.finditer( - r'<a href=["\']#["\'][^>]+data-real=["\'](?P<url>.+?)["\']', webpage)] - - return self.playlist_result( - entries, show_id, title) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4cfb3c70f..de5f94738 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -77,9 +77,9 @@ from .awaan import ( AWAANLiveIE, AWAANSeasonIE, ) -from .azmedientv import ( - AZMedienTVIE, - AZMedienTVShowIE, +from .azmedien import ( + AZMedienIE, + AZMedienShowIE, ) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE From 42697bab3c4d65a232054d5d5482cc177da12c72 Mon Sep 17 00:00:00 2001 From: einstein95 <einstein95@users.noreply.github.com> Date: Sun, 22 Jan 2017 02:00:38 +1300 Subject: [PATCH 052/195] [chaturbate] Fix extraction --- youtube_dl/extractor/chaturbate.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index 29a8820d5..1c2f065df 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ExtractorError @@ -31,30 +33,32 @@ class ChaturbateIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - m3u8_url = self._search_regex( - r'src=(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage, - 'playlist', default=None, group='url') + m3u8_urls = re.findall( + r'var hlsSource.+? = (["\'])(?P<url>http.+?\.m3u8)', webpage) - if not m3u8_url: + if not m3u8_urls: error = self._search_regex( [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>', r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'], webpage, 'error', group='error', default=None) if not error: - if any(p not in webpage for p in ( + if any(p in webpage for p in ( self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): error = self._ROOM_OFFLINE if error: raise ExtractorError(error, expected=True) raise ExtractorError('Unable to find stream URL') - formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + formats = [] + for m3u8_url in m3u8_urls: + formats.append(self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')[0]) + self._sort_formats(formats) return { 'id': video_id, 'title': self._live_title(video_id), - 'thumbnail': 'https://cdn-s.highwebmedia.com/uHK3McUtGCG3SMFcd4ZJsRv8/roomimage/%s.jpg' % video_id, + 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id, 'age_limit': self._rta_search(webpage), 'is_live': True, 'formats': formats, From a243abb80d5fdaacc502bc5a2b5cb20d0766e93a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 03:00:10 +0700 Subject: [PATCH 053/195] [chaturbate] Improve (closes #11797) --- youtube_dl/extractor/chaturbate.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index 1c2f065df..8fbc91c1f 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -33,10 +33,10 @@ class ChaturbateIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - m3u8_urls = re.findall( - r'var hlsSource.+? = (["\'])(?P<url>http.+?\.m3u8)', webpage) + m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer( + r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)] - if not m3u8_urls: + if not m3u8_formats: error = self._search_regex( [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>', r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'], @@ -50,9 +50,12 @@ class ChaturbateIE(InfoExtractor): raise ExtractorError('Unable to find stream URL') formats = [] - for m3u8_url in m3u8_urls: - formats.append(self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')[0]) - + for m3u8_id, m3u8_url in m3u8_formats: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', + # ffmpeg skips segments for fast m3u8 + preference=-10 if m3u8_id == 'fast' else None, + m3u8_id=m3u8_id, fatal=False, live=True)) self._sort_formats(formats) return { From 8d1fbe0cb20fdfab8487bb478c2a002f12c1a5d9 Mon Sep 17 00:00:00 2001 From: einstein95 <einstein95@users.noreply.github.com> Date: Sat, 21 Jan 2017 20:02:55 +1300 Subject: [PATCH 054/195] [pornflip] Add extractor (closes #11556) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pornflip.py | 59 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 youtube_dl/extractor/pornflip.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de5f94738..cfddf5b92 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -724,6 +724,7 @@ from .polskieradio import ( ) from .porn91 import Porn91IE from .porncom import PornComIE +from .pornflip import PornFlipIE from .pornhd import PornHdIE from .pornhub import ( PornHubIE, diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py new file mode 100644 index 000000000..b6077f7cb --- /dev/null +++ b/youtube_dl/extractor/pornflip.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, +) +from ..utils import ( + int_or_none, + try_get, + RegexNotFoundError, +) + + +class PornFlipIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/v/(?P<id>[0-9A-Za-z]{11})' + _TEST = { + 'url': 'https://www.pornflip.com/v/wz7DfNhMmep', + 'md5': '98c46639849145ae1fd77af532a9278c', + 'info_dict': { + 'id': 'wz7DfNhMmep', + 'ext': 'mp4', + 'title': '2 Amateurs swallow make his dream cumshots true', + 'uploader': 'figifoto', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + uploader = self._html_search_regex( + r'<span class="name">\s+<a class="ajax" href=".+>\s+<strong>([^<]+)<', webpage, 'uploader', fatal=False) + flashvars = compat_parse_qs(self._html_search_regex( + r'<embed.+?flashvars="([^"]+)"', + webpage, 'flashvars')) + title = flashvars['video_vars[title]'][0] + thumbnail = try_get(flashvars, lambda x: x['video_vars[big_thumb]'][0]) + formats = [] + for k, v in flashvars.items(): + height = self._search_regex(r'video_vars\[video_urls\]\[(\d+).+?\]', k, 'height', default=None) + if height: + url = v[0] + formats.append({ + 'height': int_or_none(height), + 'url': url + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'uploader': uploader, + 'thumbnail': thumbnail, + 'age_limit': 18, + } From 271808b6b2bd75ec9bdf943a55dbc4737bfa6f81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 03:43:27 +0700 Subject: [PATCH 055/195] [pornflip] Improve and extract dash formats (closes #11795) --- youtube_dl/extractor/pornflip.py | 79 ++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py index b6077f7cb..a4a5d390e 100644 --- a/youtube_dl/extractor/pornflip.py +++ b/youtube_dl/extractor/pornflip.py @@ -4,56 +4,89 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( compat_parse_qs, + compat_str, ) from ..utils import ( int_or_none, try_get, - RegexNotFoundError, + unified_timestamp, ) class PornFlipIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornflip\.com/v/(?P<id>[0-9A-Za-z]{11})' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})' + _TESTS = [{ 'url': 'https://www.pornflip.com/v/wz7DfNhMmep', 'md5': '98c46639849145ae1fd77af532a9278c', 'info_dict': { 'id': 'wz7DfNhMmep', 'ext': 'mp4', 'title': '2 Amateurs swallow make his dream cumshots true', - 'uploader': 'figifoto', 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 112, + 'timestamp': 1481655502, + 'upload_date': '20161213', + 'uploader_id': '106786', + 'uploader': 'figifoto', + 'view_count': int, 'age_limit': 18, } - } + }, { + 'url': 'https://www.pornflip.com/embed/wz7DfNhMmep', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - uploader = self._html_search_regex( - r'<span class="name">\s+<a class="ajax" href=".+>\s+<strong>([^<]+)<', webpage, 'uploader', fatal=False) - flashvars = compat_parse_qs(self._html_search_regex( - r'<embed.+?flashvars="([^"]+)"', - webpage, 'flashvars')) - title = flashvars['video_vars[title]'][0] - thumbnail = try_get(flashvars, lambda x: x['video_vars[big_thumb]'][0]) - formats = [] - for k, v in flashvars.items(): - height = self._search_regex(r'video_vars\[video_urls\]\[(\d+).+?\]', k, 'height', default=None) - if height: - url = v[0] - formats.append({ - 'height': int_or_none(height), - 'url': url - }) + webpage = self._download_webpage( + 'https://www.pornflip.com/v/%s' % video_id, video_id) + + flashvars = compat_parse_qs(self._search_regex( + r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1', + webpage, 'flashvars', group='flashvars')) + + title = flashvars['video_vars[title]'][0] + + def flashvar(kind): + return try_get( + flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str) + + formats = [] + for key, value in flashvars.items(): + if not (value and isinstance(value, list)): + continue + format_url = value[0] + if key == 'video_vars[hds_manifest]': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False)) + continue + height = self._search_regex( + r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None) + if not height: + continue + formats.append({ + 'url': format_url, + 'format_id': 'http-%s' % height, + 'height': int_or_none(height), + }) self._sort_formats(formats) + uploader = self._html_search_regex( + (r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)', + r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'), + webpage, 'uploader', fatal=False, group='uploader') + return { 'id': video_id, 'formats': formats, 'title': title, + 'thumbnail': flashvar('big_thumb'), + 'duration': int_or_none(flashvar('duration')), + 'timestamp': unified_timestamp(self._html_search_meta( + 'uploadDate', webpage, 'timestamp')), + 'uploader_id': flashvar('author_id'), 'uploader': uploader, - 'thumbnail': thumbnail, + 'view_count': int_or_none(flashvar('views')), 'age_limit': 18, } From 6c031a35f31717cc1a535d5d808b94967b841a93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 18:57:15 +0700 Subject: [PATCH 056/195] [ChangeLog] Actualize --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index 00c8a063f..a814b934c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,16 @@ version <unreleased> Extractors ++ [pornflip] Add support for pornflip.com (#11556, #11795) +* [chaturbate] Fix extraction (#11797, #11802) ++ [azmedien] Add support for AZ Medien sites (#11784, #11785) + [nextmedia] Support redirected URLs ++ [vimeo:channel] Extract videos' titles for playlist entries (#11796) ++ [youtube] Extract episode metadata (#9695, #11774) + [cspan] Support Ustream embedded videos (#11547) ++ [1tv] Add support for HLS videos (#11786) +* [uol] Fix extraction (#11770) +* [mtv] Relax triforce feed regular expression (#11766) version 2017.01.18 From 9d5b29c881f679b1d4270326af4ba6f657807011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 18:59:04 +0700 Subject: [PATCH 057/195] release 2017.01.22 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 38cb13a33..30cc27c7b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.22** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.18 +[debug] youtube-dl version 2017.01.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a814b934c..beea17e54 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.01.22 Extractors + [pornflip] Add support for pornflip.com (#11556, #11795) diff --git a/README.md b/README.md index a606346b2..4f677d0cc 100644 --- a/README.md +++ b/README.md @@ -374,7 +374,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo avprobe) --audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; - "best" by default + "best" by default; No effect without -x --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a3c76d5db..b906d443a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -74,6 +74,8 @@ - **awaan:live** - **awaan:season** - **awaan:video** + - **AZMedien**: AZ Medien videos + - **AZMedienShow**: AZ Medien shows - **Azubu** - **AzubuLive** - **BaiduVideo**: 百度视频 @@ -572,6 +574,7 @@ - **PolskieRadio** - **PolskieRadioCategory** - **PornCom** + - **PornFlip** - **PornHd** - **PornHub**: PornHub and Thumbzilla - **PornHubPlaylist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 669f60f65..9466c9637 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.18' +__version__ = '2017.01.22' From 30dda24de304dd53fc63dfb5bf4672c2ec747014 Mon Sep 17 00:00:00 2001 From: Gaetan Gilbert <gaetan.gilbert@ens-lyon.fr> Date: Sun, 22 Jan 2017 20:27:38 +0100 Subject: [PATCH 058/195] [chirbit] Extract uploader --- youtube_dl/extractor/chirbit.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py index f35df143a..4815b34be 100644 --- a/youtube_dl/extractor/chirbit.py +++ b/youtube_dl/extractor/chirbit.py @@ -19,6 +19,7 @@ class ChirbitIE(InfoExtractor): 'title': 'md5:f542ea253f5255240be4da375c6a5d7e', 'description': 'md5:f24a4e22a71763e32da5fed59e47c770', 'duration': 306, + 'uploader': 'Gerryaudio', }, 'params': { 'skip_download': True, @@ -54,6 +55,9 @@ class ChirbitIE(InfoExtractor): duration = parse_duration(self._search_regex( r'class=["\']c-length["\'][^>]*>([^<]+)', webpage, 'duration', fatal=False)) + uploader = self._search_regex( + r'id=["\']chirbit-username["\'][^>]*>([^<]+)', + webpage, 'uploader', fatal=False) return { 'id': audio_id, @@ -61,6 +65,7 @@ class ChirbitIE(InfoExtractor): 'title': title, 'description': description, 'duration': duration, + 'uploader': uploader, } From a089545e036619a798aa19f33085f2b0b87a1b0a Mon Sep 17 00:00:00 2001 From: Alex Seiler <seileralex@gmail.com> Date: Sun, 22 Jan 2017 20:30:29 +0100 Subject: [PATCH 059/195] [azmedien:show] Improve _VALID_URL --- youtube_dl/extractor/azmedien.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index 059dc6e4b..a89f71c20 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -85,7 +85,20 @@ class AZMedienIE(AZMedienBaseIE): class AZMedienShowIE(AZMedienBaseIE): IE_DESC = 'AZ Medien shows' - _VALID_URL = r'https?://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P<id>[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + telezueri\.ch| + telebaern\.tv| + telem1\.ch + )/ + (?P<id>[0-9]+-show-[^/\#]+ + (?: + /[0-9]+-episode-[^/\#]+ + )? + )$ + ''' _TESTS = [{ # URL with 'episode' From 8bc0800d7cf24b17204f0fb3c6e76327ed8d527f Mon Sep 17 00:00:00 2001 From: Grzegorz P <Grzechooo@users.noreply.github.com> Date: Sun, 22 Jan 2017 20:35:38 +0100 Subject: [PATCH 060/195] [youtube:playlist] Fix nonexistent/private playlist detection (closes #11604) --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 63597dd16..644653357 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1998,7 +1998,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) - for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page): + # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604) + for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page): match = match.strip() # Check if the playlist exists or is private if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match): From 4201ba13e674788c36ae69fbfbffc4b246717d6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 Jan 2017 02:49:56 +0700 Subject: [PATCH 061/195] [youtube:playlist] Fix nonexistent/private playlist detection and skip private tests --- youtube_dl/extractor/youtube.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 644653357..5202beb3e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1856,6 +1856,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'title': 'YDL_Empty_List', }, 'playlist_count': 0, + 'skip': 'This playlist is private', }, { 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', @@ -1887,6 +1888,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', }, 'playlist_count': 2, + 'skip': 'This playlist is private', }, { 'note': 'embedded', 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', @@ -2002,11 +2004,14 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page): match = match.strip() # Check if the playlist exists or is private - if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match): - raise ExtractorError( - 'The playlist doesn\'t exist or is private, use --username or ' - '--netrc to access it.', - expected=True) + mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match) + if mobj: + reason = mobj.group('reason') + message = 'This playlist %s' % reason + if 'private' in reason: + message += ', use --username or --netrc to access it' + message += '.' + raise ExtractorError(message, expected=True) elif re.match(r'[^<]*Invalid parameters[^<]*', match): raise ExtractorError( 'Invalid parameters. Maybe URL is incorrect.', From 6d119c2a6bdd2a987ef2e7553b357bd4a3f18690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 Jan 2017 03:50:39 +0700 Subject: [PATCH 062/195] [24video] Fix extraction (closes #11811) --- youtube_dl/extractor/twentyfourvideo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index 1093a3829..a983ebf05 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -12,7 +12,7 @@ from ..utils import ( class TwentyFourVideoIE(InfoExtractor): IE_NAME = '24video' - _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.24video.net/video/view/1044982', @@ -43,7 +43,7 @@ class TwentyFourVideoIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.24video.net/video/view/%s' % video_id, video_id) + 'http://www.24video.sex/video/view/%s' % video_id, video_id) title = self._og_search_title(webpage) description = self._html_search_regex( @@ -69,11 +69,11 @@ class TwentyFourVideoIE(InfoExtractor): # Sets some cookies self._download_xml( - r'http://www.24video.net/video/xml/%s?mode=init' % video_id, + r'http://www.24video.sex/video/xml/%s?mode=init' % video_id, video_id, 'Downloading init XML') video_xml = self._download_xml( - 'http://www.24video.net/video/xml/%s?mode=play' % video_id, + 'http://www.24video.sex/video/xml/%s?mode=play' % video_id, video_id, 'Downloading video XML') video = xpath_element(video_xml, './/video', 'video', fatal=True) From 0c1c6f4b9f97375ffc68cbc9c7276838f7bf8514 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 23 Jan 2017 23:31:43 +0800 Subject: [PATCH 063/195] [utils] Add another date format seen in NextTV --- youtube_dl/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 12863e74a..98acc2b45 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -143,6 +143,7 @@ DATE_FORMATS = ( '%Y/%m/%d', '%Y/%m/%d %H:%M', '%Y/%m/%d %H:%M:%S', + '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M:%S.%f', '%d.%m.%Y %H:%M', From bc35ed3fb6fcae88d59fd440b505b9e1a7cf112e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 23 Jan 2017 23:33:30 +0800 Subject: [PATCH 064/195] =?UTF-8?q?[nextmedia]=20Add=20support=20for=20Nex?= =?UTF-8?q?tTV=20(=E5=A3=B9=E9=9B=BB=E8=A6=96)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog | 6 ++++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nextmedia.py | 54 +++++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index beea17e54..ba2f5cffc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [nextmedia] Add support for NextTV (壹電視) + + version 2017.01.22 Extractors diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cfddf5b92..e23b5d0f6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -598,6 +598,7 @@ from .nextmedia import ( NextMediaIE, NextMediaActionNewsIE, AppleDailyIE, + NextTVIE, ) from .nfb import NFBIE from .nfl import NFLIE diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index 626ed8b49..680f03aad 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -3,7 +3,14 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urlparse -from ..utils import parse_iso8601 +from ..utils import ( + clean_html, + get_element_by_class, + int_or_none, + parse_iso8601, + remove_start, + unified_timestamp, +) class NextMediaIE(InfoExtractor): @@ -184,3 +191,48 @@ class AppleDailyIE(NextMediaIE): def _fetch_description(self, page): return self._html_search_meta('description', page, 'news description') + + +class NextTVIE(InfoExtractor): + IE_DESC = '壹電視' + _VALID_URL = r'https?://(?:www\.)?nexttv\.com\.tw/(?:[^/]+/)+(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.nexttv.com.tw/news/realtime/politics/11779671', + 'info_dict': { + 'id': '11779671', + 'ext': 'mp4', + 'title': '「超收稅」近4千億! 藍議員籲發消費券', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1484825400, + 'upload_date': '20170119', + 'view_count': int, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r'<h1[^>]*>([^<]+)</h1>', webpage, 'title') + + data = self._hidden_inputs(webpage) + + video_url = data['ntt-vod-src-detailview'] + + date_str = get_element_by_class('date', webpage) + timestamp = unified_timestamp(date_str + '+0800') if date_str else None + + view_count = int_or_none(remove_start( + clean_html(get_element_by_class('click', webpage)), '點閱:')) + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'thumbnail': data.get('ntt-vod-img-src'), + 'timestamp': timestamp, + 'view_count': view_count, + } From b494d6856c55bd351107fd7266f8ac2eeaee341f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 02:50:49 +0700 Subject: [PATCH 065/195] [pluralsight] Fix extraction (closes #11820) --- youtube_dl/extractor/pluralsight.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 0ffd41ecd..5c798e874 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -157,13 +157,10 @@ class PluralsightIE(PluralsightBaseIE): display_id = '%s-%s' % (name, clip_id) - parsed_url = compat_urlparse.urlparse(url) - - payload_url = compat_urlparse.urlunparse(parsed_url._replace( - netloc='app.pluralsight.com', path='player/api/v1/payload')) - course = self._download_json( - payload_url, display_id, headers={'Referer': url})['payload']['course'] + 'https://app.pluralsight.com/player/user/api/v1/player/payload', + display_id, data=urlencode_postdata({'courseId': course_name}), + headers={'Referer': url}) collection = course['modules'] From ee4c091ce5bb3732c3016410230f45f2283e5055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 02:56:19 +0700 Subject: [PATCH 066/195] [ChangeLog] Actualize --- ChangeLog | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ChangeLog b/ChangeLog index ba2f5cffc..406301549 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,11 @@ version <unreleased> Extractors +* [pluralsight] Fix extraction (#11820) + [nextmedia] Add support for NextTV (壹電視) +* [24video] Fix extraction (#11811) +* [youtube:playlist] Fix nonexistent and private playlist detection (#11604) ++ [chirbit] Extract uploader (#11809) version 2017.01.22 From c3a65c3de0667b8de4af8fdc8c1eb04a1498e104 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 02:58:37 +0700 Subject: [PATCH 067/195] release 2017.01.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 30cc27c7b..f771d72c0 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.22** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.22 +[debug] youtube-dl version 2017.01.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 406301549..4bc30cff7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.01.24 Extractors * [pluralsight] Fix extraction (#11820) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b906d443a..2d28b3f72 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -485,6 +485,7 @@ - **Newstube** - **NextMedia**: 蘋果日報 - **NextMediaActionNews**: 蘋果日報 - 動新聞 + - **NextTV**: 壹電視 - **nfb**: National Film Board of Canada - **nfl.com** - **NhkVod** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9466c9637..8a66c2fb9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.22' +__version__ = '2017.01.24' From d61aa5eb37244a04caa09f1f238a4f81366c109b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 22:46:40 +0700 Subject: [PATCH 068/195] [vimeo:review] Fix config URL extraction (closes #11821) --- youtube_dl/extractor/vimeo.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a6bbd4c05..c12eeadd4 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -884,10 +884,14 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): def _get_config_url(self, webpage_url, video_id, video_password_verified=False): webpage = self._download_webpage(webpage_url, video_id) - data = self._parse_json(self._search_regex( - r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', - default=NO_DEFAULT if video_password_verified else '{}'), video_id) - config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') + config_url = self._html_search_regex( + r'data-config-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'config URL', default=None, group='url') + if not config_url: + data = self._parse_json(self._search_regex( + r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', + default=NO_DEFAULT if video_password_verified else '{}'), video_id) + config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') if config_url is None: self._verify_video_password(webpage_url, video_id, webpage) config_url = self._get_config_url( From 74af9c700d308e3638db0ff2e4510770f9daf31c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 22:55:49 +0700 Subject: [PATCH 069/195] [konserthusetplay] Add support for hls formats (closes #11823) --- youtube_dl/extractor/konserthusetplay.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/konserthusetplay.py b/youtube_dl/extractor/konserthusetplay.py index 55291c66f..7e6ea9696 100644 --- a/youtube_dl/extractor/konserthusetplay.py +++ b/youtube_dl/extractor/konserthusetplay.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + determine_ext, float_or_none, int_or_none, ) @@ -42,12 +43,18 @@ class KonserthusetPlayIE(InfoExtractor): player_config = media['playerconfig'] playlist = player_config['playlist'] - source = next(f for f in playlist if f.get('bitrates')) + source = next(f for f in playlist if f.get('bitrates') or f.get('provider')) FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4' formats = [] + m3u8_url = source.get('url') + if m3u8_url and determine_ext(m3u8_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + fallback_url = source.get('fallbackUrl') fallback_format_id = None if fallback_url: From 23b35a634e06d9b92c9650b0d66a3d5d7eb03a54 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 24 Jan 2017 16:55:07 +0100 Subject: [PATCH 070/195] [crackle] improve extraction - extract vtt subtitles - extract multiple resolutions for thumbnails - pass geo verification proxy headers - add support for mobile urls --- youtube_dl/extractor/crackle.py | 53 ++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index 25c5e7d04..377fb45e9 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -6,7 +6,7 @@ from ..utils import int_or_none class CrackleIE(InfoExtractor): - _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' + _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' _TEST = { 'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934', 'info_dict': { @@ -31,8 +31,32 @@ class CrackleIE(InfoExtractor): } } + _THUMBNAIL_RES = [ + (120, 90), + (208, 156), + (220, 124), + (220, 220), + (240, 180), + (250, 141), + (315, 236), + (320, 180), + (360, 203), + (400, 300), + (421, 316), + (460, 330), + (460, 460), + (462, 260), + (480, 270), + (587, 330), + (640, 480), + (700, 330), + (700, 394), + (854, 480), + (1024, 1024), + (1920, 1080), + ] + # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx - _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614' _MEDIA_FILE_SLOTS = { 'c544.flv': { 'width': 544, @@ -61,17 +85,25 @@ class CrackleIE(InfoExtractor): item = self._download_xml( 'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, - video_id).find('i') + video_id, headers=self.geo_verification_headers()).find('i') title = item.attrib['t'] subtitles = {} formats = self._extract_m3u8_formats( 'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id), video_id, 'mp4', m3u8_id='hls', fatal=None) - thumbnail = None + thumbnails = [] path = item.attrib.get('p') if path: - thumbnail = self._THUMBNAIL_TEMPLATE % path + for width, height in self._THUMBNAIL_RES: + res = '%dx%d' % (width, height) + thumbnails.append({ + 'id': res, + 'url': 'http://images-us-am.crackle.com/%stnl_%s.jpg' % (path, res), + 'width': width, + 'height': height, + 'resolution': res, + }) http_base_url = 'http://ahttp.crackle.com/' + path for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items(): formats.append({ @@ -86,10 +118,11 @@ class CrackleIE(InfoExtractor): if locale and v: if locale not in subtitles: subtitles[locale] = [] - subtitles[locale] = [{ - 'url': '%s/%s%s_%s.xml' % (config_doc.attrib['strSubtitleServer'], path, locale, v), - 'ext': 'ttml', - }] + for url_ext, ext in (('vtt', 'vtt'), ('xml', 'tt')): + subtitles.setdefault(locale, []).append({ + 'url': '%s/%s%s_%s.%s' % (config_doc.attrib['strSubtitleServer'], path, locale, v, url_ext), + 'ext': ext, + }) self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) return { @@ -100,7 +133,7 @@ class CrackleIE(InfoExtractor): 'series': item.attrib.get('sn'), 'season_number': int_or_none(item.attrib.get('se')), 'episode_number': int_or_none(item.attrib.get('ep')), - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'subtitles': subtitles, 'formats': formats, } From af59bddc4e4a6c260e7966fe75d9d687c3b13b32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 23:02:20 +0700 Subject: [PATCH 071/195] [konserthusetplay] Extract subtitles (#11823) --- youtube_dl/extractor/konserthusetplay.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/konserthusetplay.py b/youtube_dl/extractor/konserthusetplay.py index 7e6ea9696..3ae2aa317 100644 --- a/youtube_dl/extractor/konserthusetplay.py +++ b/youtube_dl/extractor/konserthusetplay.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( determine_ext, float_or_none, @@ -104,6 +105,13 @@ class KonserthusetPlayIE(InfoExtractor): thumbnail = media.get('image') duration = float_or_none(media.get('duration'), 1000) + subtitles = {} + captions = source.get('captionsAvailableLanguages') + if isinstance(captions, dict): + for lang, subtitle_url in captions.items(): + if lang != 'none' and isinstance(subtitle_url, compat_str): + subtitles.setdefault(lang, []).append({'url': subtitle_url}) + return { 'id': video_id, 'title': title, @@ -111,4 +119,5 @@ class KonserthusetPlayIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, + 'subtitles': subtitles, } From c60089c0222433775dcc1305d85b42fc6158c8df Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 25 Jan 2017 07:38:17 +0100 Subject: [PATCH 072/195] [afreecatv:global] Add new extractor(closes #11807) --- youtube_dl/extractor/afreecatv.py | 92 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 75b366993..4f6cdb8a2 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -18,6 +18,7 @@ from ..utils import ( class AfreecaTVIE(InfoExtractor): + IE_NAME = 'afreecatv' IE_DESC = 'afreecatv.com' _VALID_URL = r'''(?x) https?:// @@ -143,3 +144,94 @@ class AfreecaTVIE(InfoExtractor): expected=True) return info + + +class AfreecaTVGlobalIE(AfreecaTVIE): + IE_NAME = 'afreecatv:global' + _VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P<channel_id>\d+)(?:/v/(?P<video_id>\d+))?' + _TESTS = [{ + 'url': 'http://afreeca.tv/36853014/v/58301', + 'info_dict': { + 'id': '58301', + 'title': 'tryhard top100', + 'uploader_id': '36853014', + 'uploader': 'makgi Hearthstone Live!', + }, + 'playlist_count': 3, + }] + + def _real_extract(self, url): + channel_id, video_id = re.match(self._VALID_URL, url).groups() + video_type = 'video' if video_id else 'live' + query = { + 'pt': 'view', + 'bid': channel_id, + } + if video_id: + query['vno'] = video_id + video_data = self._download_json( + 'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type), + video_id or channel_id, query=query)['channel'] + + if video_data.get('result') != 1: + raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg'])) + + title = video_data['title'] + + info = { + 'thumbnail': video_data.get('thumb'), + 'view_count': int_or_none(video_data.get('vcnt')), + 'age_limit': int_or_none(video_data.get('grade')), + 'uploader_id': channel_id, + 'uploader': video_data.get('cname'), + } + + if video_id: + entries = [] + for i, f in enumerate(video_data.get('flist', [])): + video_key = self.parse_video_key(f.get('key', '')) + f_url = f.get('file') + if not video_key or not f_url: + continue + entries.append({ + 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), + 'title': title, + 'upload_date': video_key.get('upload_date'), + 'duration': int_or_none(f.get('length')), + 'url': f_url, + 'protocol': 'm3u8_native', + 'ext': 'mp4', + }) + + info.update({ + 'id': video_id, + 'title': title, + 'duration': int_or_none(video_data.get('length')), + }) + if len(entries) > 1: + info['_type'] = 'multi_video' + info['entries'] = entries + elif len(entries) == 1: + i = entries[0].copy() + i.update(info) + info = i + else: + formats = [] + for s in video_data.get('strm', []): + s_url = s.get('purl') + if not s_url: + continue + # TODO: extract rtmp formats + if s.get('stype') == 'HLS': + formats.extend(self._extract_m3u8_formats( + s_url, channel_id, 'mp4', fatal=False)) + self._sort_formats(formats) + + info.update({ + 'id': channel_id, + 'title': self._live_title(title), + 'is_live': True, + 'formats': formats, + }) + + return info diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e23b5d0f6..f09b4cf2c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -30,7 +30,10 @@ from .aenetworks import ( AENetworksIE, HistoryTopicIE, ) -from .afreecatv import AfreecaTVIE +from .afreecatv import ( + AfreecaTVIE, + AfreecaTVGlobalIE, +) from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE From b8a03b66601f6af9e6b4009cba634dac6e0d30e6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 25 Jan 2017 07:39:11 +0100 Subject: [PATCH 073/195] [srgssr] fix rts video extraction(closes #11831) --- youtube_dl/extractor/srgssr.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index 47aa887cc..319a48a7a 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -48,9 +48,6 @@ class SRGSSRIE(InfoExtractor): def _real_extract(self, url): bu, media_type, media_id = re.match(self._VALID_URL, url).groups() - if bu == 'rts': - return self.url_result('rts:%s' % media_id, 'RTS') - media_data = self.get_media_data(bu, media_type, media_id) metadata = media_data['AssetMetadatas']['AssetMetadata'][0] From 17f8deeb481a7aa3079d7e11da2c255f893b9e8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 Jan 2017 23:27:22 +0700 Subject: [PATCH 074/195] [extractor/generic] Add support for openload embeds (closes #11536, closes #11812) --- youtube_dl/extractor/generic.py | 7 +++++++ youtube_dl/extractor/openload.py | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 40201f311..a23486620 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -80,6 +80,7 @@ from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE +from .openload import OpenloadIE class GenericIE(InfoExtractor): @@ -2431,6 +2432,12 @@ class GenericIE(InfoExtractor): return _playlist_from_matches( twentymin_urls, ie=TwentyMinutenIE.ie_key()) + # Look for Openload embeds + openload_urls = OpenloadIE._extract_urls(webpage) + if openload_urls: + return _playlist_from_matches( + openload_urls, ie=OpenloadIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 3d4ad7dca..4893ade5d 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_chr from ..utils import ( @@ -56,6 +58,12 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+src=["\']((?:https?://)?(?:openload\.(?:co|io)|oload\.tv)/embed/[a-zA-Z0-9-_]+)', + webpage) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) From c1fa3f46727ccbbb75389ce82753f2e63449ece6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 Jan 2017 23:28:45 +0700 Subject: [PATCH 075/195] [openload] Fallback video extension to mp4 --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 4893ade5d..32289d897 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -101,7 +101,7 @@ class OpenloadIE(InfoExtractor): 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'url': video_url, # Seems all videos have extensions in their titles - 'ext': determine_ext(title), + 'ext': determine_ext(title, 'mp4'), 'subtitles': subtitles, } return info_dict From 2c302cf66b235aed6be5786489f259c0fa993fae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 Jan 2017 23:33:46 +0700 Subject: [PATCH 076/195] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4bc30cff7..e0af3f671 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version <unreleased> + +Extractors ++ [openload] Fallback video extension to mp4 ++ [extractor/generic] Add support for Openload embeds (#11536, #11812) +* [srgssr] Fix rts video extraction (#11831) ++ [afreecatv:global] Add support for afreeca.tv (#11807) ++ [crackle] Extract vtt subtitles ++ [crackle] Extract multiple resolutions for thumbnails ++ [crackle] Add support for mobile URLs ++ [konserthusetplay] Extract subtitles (#11823) ++ [konserthusetplay] Add support for HLS videos (#11823) +* [vimeo:review] Fix config URL extraction (#11821) + + version 2017.01.24 Extractors From 2417d41535a907a2da05a8b6490198916279d2ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 Jan 2017 23:36:03 +0700 Subject: [PATCH 077/195] release 2017.01.25 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f771d72c0..4d409f785 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.25*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.25** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.24 +[debug] youtube-dl version 2017.01.25 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e0af3f671..ff305d7e8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.01.25 Extractors + [openload] Fallback video extension to mp4 diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2d28b3f72..f640cfcaa 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -33,7 +33,8 @@ - **AdobeTVVideo** - **AdultSwim** - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network - - **AfreecaTV**: afreecatv.com + - **afreecatv**: afreecatv.com + - **afreecatv:global**: afreecatv.com - **AirMozilla** - **AlJazeera** - **Allocine** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8a66c2fb9..c23fe85de 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.24' +__version__ = '2017.01.25' From 556dbe7fe35667cb061dbf0ee84d3a065ad11055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 26 Jan 2017 21:43:14 +0700 Subject: [PATCH 078/195] [youtube] Add fallback for duration extraction (closes #11841) --- youtube_dl/extractor/youtube.py | 36 ++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5202beb3e..630586796 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -40,6 +40,7 @@ from ..utils import ( sanitized_Request, smuggle_url, str_to_int, + try_get, unescapeHTML, unified_strdate, unsmuggle_url, @@ -383,6 +384,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], + 'duration': 10, 'like_count': int, 'dislike_count': int, 'start_time': 1, @@ -402,6 +404,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli', 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop', 'iconic ep', 'iconic', 'love', 'it'], + 'duration': 180, 'uploader': 'Icona Pop', 'uploader_id': 'IconaPop', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop', @@ -419,6 +422,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', 'alt_title': 'Tunnel Vision', 'description': 'md5:64249768eec3bc4276236606ea996373', + 'duration': 419, 'uploader': 'justintimberlakeVEVO', 'uploader_id': 'justintimberlakeVEVO', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO', @@ -458,6 +462,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], + 'duration': 10, 'like_count': int, 'dislike_count': int, }, @@ -493,6 +498,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'm4a', 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson', 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d', + 'duration': 244, 'uploader': 'AfrojackVEVO', 'uploader_id': 'AfrojackVEVO', 'upload_date': '20131011', @@ -512,6 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'Taylor Swift - Shake It Off', 'alt_title': 'Shake It Off', 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3', + 'duration': 242, 'uploader': 'TaylorSwiftVEVO', 'uploader_id': 'TaylorSwiftVEVO', 'upload_date': '20140818', @@ -529,6 +536,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'T4XJQO3qol8', 'ext': 'mp4', + 'duration': 219, 'upload_date': '20100909', 'uploader': 'The Amazing Atheist', 'uploader_id': 'TheAmazingAtheist', @@ -546,6 +554,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer', 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}', + 'duration': 142, 'uploader': 'The Witcher', 'uploader_id': 'WitcherGame', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame', @@ -562,6 +571,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'Dedication To My Ex (Miss That) (Lyric Video)', 'description': 'md5:33765bb339e1b47e7e72b5490139bb41', + 'duration': 247, 'uploader': 'LloydVEVO', 'uploader_id': 'LloydVEVO', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO', @@ -576,6 +586,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': '__2ABJjxzNo', 'ext': 'mp4', + 'duration': 266, 'upload_date': '20100430', 'uploader_id': 'deadmau5', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5', @@ -596,6 +607,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'lqQg6PlCWgI', 'ext': 'mp4', + 'duration': 6085, 'upload_date': '20150827', 'uploader_id': 'olympic', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic', @@ -615,6 +627,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': '_b-2C3KPAM0', 'ext': 'mp4', 'stretched_ratio': 16 / 9., + 'duration': 85, 'upload_date': '20110310', 'uploader_id': 'AllenMeow', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow', @@ -649,6 +662,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'md5:7b81415841e02ecd4313668cde88737a', 'description': 'md5:116377fd2963b81ec4ce64b542173306', + 'duration': 220, 'upload_date': '20150625', 'uploader_id': 'dorappi2000', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000', @@ -691,6 +705,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)', 'description': 'md5:dc7872fb300e143831327f1bae3af010', + 'duration': 7335, 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', @@ -703,6 +718,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)', 'description': 'md5:dc7872fb300e143831327f1bae3af010', + 'duration': 7337, 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', @@ -715,6 +731,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'teamPGP: Rocket League Noob Stream (grizzle)', 'description': 'md5:dc7872fb300e143831327f1bae3af010', + 'duration': 7337, 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', @@ -727,6 +744,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'teamPGP: Rocket League Noob Stream (zim)', 'description': 'md5:dc7872fb300e143831327f1bae3af010', + 'duration': 7334, 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', @@ -768,6 +786,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21', 'alt_title': 'Dark Walk', 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a', + 'duration': 133, 'upload_date': '20151119', 'uploader_id': 'IronSoulElf', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf', @@ -809,10 +828,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'md5:e41008789470fc2533a3252216f1c1d1', 'description': 'md5:a677553cf0840649b731a3024aeff4cc', + 'duration': 721, 'upload_date': '20150127', 'uploader_id': 'BerkmanCenter', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter', - 'uploader': 'BerkmanCenter', + 'uploader': 'The Berkman Klein Center for Internet & Society', 'license': 'Creative Commons Attribution license (reuse allowed)', }, 'params': { @@ -827,6 +847,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders', 'description': 'md5:dda0d780d5a6e120758d1711d062a867', + 'duration': 4060, 'upload_date': '20151119', 'uploader': 'Bernie 2016', 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg', @@ -871,7 +892,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': 'iqKdEhx-dD4', 'ext': 'mp4', 'title': 'Isolation - Mind Field (Ep 1)', - 'description': 'md5:3a72f23c086a1496c9e2c54a25fa0822', + 'description': 'md5:8013b7ddea787342608f63a13ddc9492', + 'duration': 2085, 'upload_date': '20170118', 'uploader': 'Vsauce', 'uploader_id': 'Vsauce', @@ -1516,11 +1538,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_subtitles = self.extract_subtitles(video_id, video_webpage) automatic_captions = self.extract_automatic_captions(video_id, video_webpage) - if 'length_seconds' not in video_info: - self._downloader.report_warning('unable to extract video duration') - video_duration = None - else: - video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0])) + video_duration = try_get( + video_info, lambda x: int_or_none(x['length_seconds'][0])) + if not video_duration: + video_duration = parse_duration(self._html_search_meta( + 'duration', video_webpage, 'video duration')) # annotations video_annotations = None From cf0cabbe5011228c78a3d88c1a1b179b10333d6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 26 Jan 2017 21:49:34 +0700 Subject: [PATCH 079/195] [cmt,mtv,southpark] Add support for episode URLs (closes #11837) --- youtube_dl/extractor/cmt.py | 2 +- youtube_dl/extractor/mtv.py | 5 ++++- youtube_dl/extractor/southpark.py | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index f6b794fb3..e701fbeab 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -5,7 +5,7 @@ from .mtv import MTVIE class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes|video-clips)/(?P<id>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index e48ea2481..855c3996f 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -304,7 +304,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): class MTVIE(MTVServicesInfoExtractor): IE_NAME = 'mtv' - _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|full-episodes)/(?P<id>[^/?#.]+)' + _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)' _FEED_URL = 'http://www.mtv.com/feeds/mrss/' _TESTS = [{ @@ -321,6 +321,9 @@ class MTVIE(MTVServicesInfoExtractor): }, { 'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101', 'only_matching': True, + }, { + 'url': 'http://www.mtv.com/episodes/g8xu7q/teen-mom-2-breaking-the-wall-season-7-ep-713', + 'only_matching': True, }] diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 08f8c5744..d8ce416fc 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor class SouthParkIE(MTVServicesInfoExtractor): IE_NAME = 'southpark.cc.com' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' @@ -75,7 +75,7 @@ class SouthParkDeIE(SouthParkIE): class SouthParkNlIE(SouthParkIE): IE_NAME = 'southpark.nl' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/' _TESTS = [{ From 9bccdc7004f48963da9a51b6fe24a398d59da725 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 26 Jan 2017 16:06:01 +0100 Subject: [PATCH 080/195] [vevo] remove request to old api and catch apiv2 errors --- youtube_dl/extractor/vevo.py | 267 +++++++++++------------------------ 1 file changed, 79 insertions(+), 188 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index f0a8075fb..c4e37f694 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -4,9 +4,9 @@ import re from .common import InfoExtractor from ..compat import ( - compat_etree_fromstring, compat_str, compat_urlparse, + compat_HTTPError, ) from ..utils import ( ExtractorError, @@ -140,21 +140,6 @@ class VevoIE(VevoBaseIE): 'url': 'http://www.vevo.com/watch/INS171400764', 'only_matching': True, }] - _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com' - _SOURCE_TYPES = { - 0: 'youtube', - 1: 'brightcove', - 2: 'http', - 3: 'hls_ios', - 4: 'hls', - 5: 'smil', # http - 7: 'f4m_cc', - 8: 'f4m_ak', - 9: 'f4m_l3', - 10: 'ism', - 13: 'smil', # rtmp - 18: 'dash', - } _VERSIONS = { 0: 'youtube', # only in AuthenticateVideo videoVersions 1: 'level3', @@ -163,41 +148,6 @@ class VevoIE(VevoBaseIE): 4: 'amazon', } - def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): - formats = [] - els = smil.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') - for el in els: - src = el.attrib['src'] - m = re.match(r'''(?xi) - (?P<ext>[a-z0-9]+): - (?P<path> - [/a-z0-9]+ # The directory and main part of the URL - _(?P<tbr>[0-9]+)k - _(?P<width>[0-9]+)x(?P<height>[0-9]+) - _(?P<vcodec>[a-z0-9]+) - _(?P<vbr>[0-9]+) - _(?P<acodec>[a-z0-9]+) - _(?P<abr>[0-9]+) - \.[a-z0-9]+ # File extension - )''', src) - if not m: - continue - - format_url = self._SMIL_BASE_URL + m.group('path') - formats.append({ - 'url': format_url, - 'format_id': 'smil_' + m.group('tbr'), - 'vcodec': m.group('vcodec'), - 'acodec': m.group('acodec'), - 'tbr': int(m.group('tbr')), - 'vbr': int(m.group('vbr')), - 'abr': int(m.group('abr')), - 'ext': m.group('ext'), - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) - return formats - def _initialize_api(self, video_id): req = sanitized_Request( 'http://www.vevo.com/auth', data=b'') @@ -214,148 +164,91 @@ class VevoIE(VevoBaseIE): self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token'] def _call_api(self, path, *args, **kwargs): - return self._download_json(self._api_url_template % path, *args, **kwargs) + try: + data = self._download_json(self._api_url_template % path, *args, **kwargs) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + errors = self._parse_json(e.cause.read().decode(), None)['errors'] + error_message = ', '.join([error['message'] for error in errors]) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) + raise + return data def _real_extract(self, url): video_id = self._match_id(url) - json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id - response = self._download_json( - json_url, video_id, 'Downloading video info', - 'Unable to download info', fatal=False) or {} - video_info = response.get('video') or {} + self._initialize_api(video_id) + + video_info = self._call_api( + 'video/%s' % video_id, video_id, 'Downloading api video info', + 'Failed to download video info') + + video_versions = self._call_api( + 'video/%s/streams' % video_id, video_id, + 'Downloading video versions info', + 'Failed to download video versions info', + fatal=False) + + # Some videos are only available via webpage (e.g. + # https://github.com/rg3/youtube-dl/issues/9366) + if not video_versions: + webpage = self._download_webpage(url, video_id) + video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0] + + uploader = None artist = None featured_artist = None - uploader = None - view_count = None + artists = video_info.get('artists') + for curr_artist in artists: + if curr_artist.get('role') == 'Featured': + featured_artist = curr_artist['name'] + else: + artist = uploader = curr_artist['name'] + formats = [] + for video_version in video_versions: + version = self._VERSIONS.get(video_version['version']) + version_url = video_version.get('url') + if not version_url: + continue - if not video_info: - try: - self._initialize_api(video_id) - except ExtractorError: - ytid = response.get('errorInfo', {}).get('ytid') - if ytid: - self.report_warning( - 'Video is geoblocked, trying with the YouTube video %s' % ytid) - return self.url_result(ytid, 'Youtube', ytid) - - raise - - video_info = self._call_api( - 'video/%s' % video_id, video_id, 'Downloading api video info', - 'Failed to download video info') - - video_versions = self._call_api( - 'video/%s/streams' % video_id, video_id, - 'Downloading video versions info', - 'Failed to download video versions info', - fatal=False) - - # Some videos are only available via webpage (e.g. - # https://github.com/rg3/youtube-dl/issues/9366) - if not video_versions: - webpage = self._download_webpage(url, video_id) - video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0] - - timestamp = parse_iso8601(video_info.get('releaseDate')) - artists = video_info.get('artists') - for curr_artist in artists: - if curr_artist.get('role') == 'Featured': - featured_artist = curr_artist['name'] - else: - artist = uploader = curr_artist['name'] - view_count = int_or_none(video_info.get('views', {}).get('total')) - - for video_version in video_versions: - version = self._VERSIONS.get(video_version['version']) - version_url = video_version.get('url') - if not version_url: + if '.ism' in version_url: + continue + elif '.mpd' in version_url: + formats.extend(self._extract_mpd_formats( + version_url, video_id, mpd_id='dash-%s' % version, + note='Downloading %s MPD information' % version, + errnote='Failed to download %s MPD information' % version, + fatal=False)) + elif '.m3u8' in version_url: + formats.extend(self._extract_m3u8_formats( + version_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls-%s' % version, + note='Downloading %s m3u8 information' % version, + errnote='Failed to download %s m3u8 information' % version, + fatal=False)) + else: + m = re.search(r'''(?xi) + _(?P<width>[0-9]+)x(?P<height>[0-9]+) + _(?P<vcodec>[a-z0-9]+) + _(?P<vbr>[0-9]+) + _(?P<acodec>[a-z0-9]+) + _(?P<abr>[0-9]+) + \.(?P<ext>[a-z0-9]+)''', version_url) + if not m: continue - if '.ism' in version_url: - continue - elif '.mpd' in version_url: - formats.extend(self._extract_mpd_formats( - version_url, video_id, mpd_id='dash-%s' % version, - note='Downloading %s MPD information' % version, - errnote='Failed to download %s MPD information' % version, - fatal=False)) - elif '.m3u8' in version_url: - formats.extend(self._extract_m3u8_formats( - version_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls-%s' % version, - note='Downloading %s m3u8 information' % version, - errnote='Failed to download %s m3u8 information' % version, - fatal=False)) - else: - m = re.search(r'''(?xi) - _(?P<width>[0-9]+)x(?P<height>[0-9]+) - _(?P<vcodec>[a-z0-9]+) - _(?P<vbr>[0-9]+) - _(?P<acodec>[a-z0-9]+) - _(?P<abr>[0-9]+) - \.(?P<ext>[a-z0-9]+)''', version_url) - if not m: - continue - - formats.append({ - 'url': version_url, - 'format_id': 'http-%s-%s' % (version, video_version['quality']), - 'vcodec': m.group('vcodec'), - 'acodec': m.group('acodec'), - 'vbr': int(m.group('vbr')), - 'abr': int(m.group('abr')), - 'ext': m.group('ext'), - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) - else: - timestamp = int_or_none(self._search_regex( - r'/Date\((\d+)\)/', - video_info['releaseDate'], 'release date', fatal=False), - scale=1000) - artists = video_info.get('mainArtists') - if artists: - artist = uploader = artists[0]['artistName'] - - featured_artists = video_info.get('featuredArtists') - if featured_artists: - featured_artist = featured_artists[0]['artistName'] - - smil_parsed = False - for video_version in video_info['videoVersions']: - version = self._VERSIONS.get(video_version['version']) - if version == 'youtube': - continue - else: - source_type = self._SOURCE_TYPES.get(video_version['sourceType']) - renditions = compat_etree_fromstring(video_version['data']) - if source_type == 'http': - for rend in renditions.findall('rendition'): - attr = rend.attrib - formats.append({ - 'url': attr['url'], - 'format_id': 'http-%s-%s' % (version, attr['name']), - 'height': int_or_none(attr.get('frameheight')), - 'width': int_or_none(attr.get('frameWidth')), - 'tbr': int_or_none(attr.get('totalBitrate')), - 'vbr': int_or_none(attr.get('videoBitrate')), - 'abr': int_or_none(attr.get('audioBitrate')), - 'vcodec': attr.get('videoCodec'), - 'acodec': attr.get('audioCodec'), - }) - elif source_type == 'hls': - formats.extend(self._extract_m3u8_formats( - renditions.find('rendition').attrib['url'], video_id, - 'mp4', 'm3u8_native', m3u8_id='hls-%s' % version, - note='Downloading %s m3u8 information' % version, - errnote='Failed to download %s m3u8 information' % version, - fatal=False)) - elif source_type == 'smil' and version == 'level3' and not smil_parsed: - formats.extend(self._extract_smil_formats( - renditions.find('rendition').attrib['url'], video_id, False)) - smil_parsed = True + formats.append({ + 'url': version_url, + 'format_id': 'http-%s-%s' % (version, video_version['quality']), + 'vcodec': m.group('vcodec'), + 'acodec': m.group('acodec'), + 'vbr': int(m.group('vbr')), + 'abr': int(m.group('abr')), + 'ext': m.group('ext'), + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) self._sort_formats(formats) track = video_info['title'] @@ -376,17 +269,15 @@ class VevoIE(VevoBaseIE): else: age_limit = None - duration = video_info.get('duration') - return { 'id': video_id, 'title': title, 'formats': formats, 'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'), - 'timestamp': timestamp, + 'timestamp': parse_iso8601(video_info.get('releaseDate')), 'uploader': uploader, - 'duration': duration, - 'view_count': view_count, + 'duration': int_or_none(video_info.get('duration')), + 'view_count': int_or_none(video_info.get('views', {}).get('total')), 'age_limit': age_limit, 'track': track, 'artist': uploader, From b3277115a192b88df34692e42f62f39bd4a65bac Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 26 Jan 2017 16:14:42 +0100 Subject: [PATCH 081/195] [disney] Add new extractor(closes #7409)(closes #11801)(#4975)(#11000) --- youtube_dl/extractor/disney.py | 115 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 116 insertions(+) create mode 100644 youtube_dl/extractor/disney.py diff --git a/youtube_dl/extractor/disney.py b/youtube_dl/extractor/disney.py new file mode 100644 index 000000000..396873c6d --- /dev/null +++ b/youtube_dl/extractor/disney.py @@ -0,0 +1,115 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + unified_strdate, + compat_str, + determine_ext, +) + + +class DisneyIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|starwars\.com))/(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})''' + _TESTS = [{ + 'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977', + 'info_dict': { + 'id': '545ed1857afee5a0ec239977', + 'ext': 'mp4', + 'title': 'Moana - Trailer', + 'description': 'A fun adventure for the entire Family! Bring home Moana on Digital HD Feb 21 & Blu-ray March 7', + 'upload_date': '20170112', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2', + 'only_matching': True, + }, { + 'url': 'http://video.en.disneyme.com/watch/future-worm/robo-carp-2001-544b66002aa7353cdd3f5114', + 'only_matching': True, + }, { + 'url': 'http://video.disneyturkiye.com.tr/izle/7c-7-cuceler/kimin-sesi-zaten-5456f3d015f6b36c8afdd0e2', + 'only_matching': True, + }, { + 'url': 'http://disneyjunior.disney.com/embed/546a4798ddba3d1612e4005d', + 'only_matching': True, + }, { + 'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097', + 'only_matching': True, + }] + + def _real_extract(self, url): + domain, video_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage( + 'http://%s/embed/%s' % (domain, video_id), video_id) + video_data = self._parse_json(self._search_regex( + r'Disney\.EmbedVideo=({.+});', webpage, 'embed data'), video_id)['video'] + + for external in video_data.get('externals', []): + if external.get('source') == 'vevo': + return self.url_result('vevo:' + external['data_id'], 'Vevo') + + title = video_data['title'] + + formats = [] + for flavor in video_data.get('flavors', []): + flavor_format = flavor.get('format') + flavor_url = flavor.get('url') + if not flavor_url or not re.match(r'https?://', flavor_url): + continue + tbr = int_or_none(flavor.get('bitrate')) + if tbr == 99999: + formats.extend(self._extract_m3u8_formats( + flavor_url, video_id, 'mp4', m3u8_id=flavor_format, fatal=False)) + continue + format_id = [] + if flavor_format: + format_id.append(flavor_format) + if tbr: + format_id.append(compat_str(tbr)) + ext = determine_ext(flavor_url) + if flavor_format == 'applehttp' or ext == 'm3u8': + ext = 'mp4' + width = int_or_none(flavor.get('width')) + height = int_or_none(flavor.get('height')) + formats.append({ + 'format_id': '-'.join(format_id), + 'url': flavor_url, + 'width': width, + 'height': height, + 'tbr': tbr, + 'ext': ext, + 'vcodec': 'none' if (width == 0 and height == 0) else None, + }) + self._sort_formats(formats) + + subtitles = {} + for caption in video_data.get('captions', []): + caption_url = caption.get('url') + caption_format = caption.get('format') + if not caption_url or caption_format.startswith('unknown'): + continue + subtitles.setdefault(caption.get('language', 'en'), []).append({ + 'url': caption_url, + 'ext': { + 'webvtt': 'vtt', + }.get(caption_format, caption_format), + }) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description') or video_data.get('short_desc'), + 'thumbnail': video_data.get('thumb') or video_data.get('thumb_secure'), + 'duration': int_or_none(video_data.get('duration_sec')), + 'upload_date': unified_strdate(video_data.get('publish_date')), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f09b4cf2c..0c3e081ad 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -251,6 +251,7 @@ from .dumpert import DumpertIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE from .discoverygo import DiscoveryGoIE +from .disney import DisneyIE from .dispeak import DigitallySpeakingIE from .dropbox import DropboxIE from .dw import ( From c19ef77c3138ecf1ce5c988de2d94031f58b4f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20P=C3=B6schel?= <github@basicmaster.de> Date: Wed, 25 Jan 2017 20:44:03 +0100 Subject: [PATCH 082/195] [jamendo] Extract full title --- youtube_dl/extractor/jamendo.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py index 51d19e67d..3db07e79f 100644 --- a/youtube_dl/extractor/jamendo.py +++ b/youtube_dl/extractor/jamendo.py @@ -16,7 +16,7 @@ class JamendoIE(InfoExtractor): 'id': '196219', 'display_id': 'stories-from-emona-i', 'ext': 'flac', - 'title': 'Stories from Emona I', + 'title': 'Maya Filipič - Stories from Emona I', 'thumbnail': r're:^https?://.*\.jpg' } } @@ -28,7 +28,7 @@ class JamendoIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - title = self._html_search_meta('name', webpage, 'title') + title = self._search_regex(r'<title>(.*?)\ \|\ Jamendo\ Music\ .*', webpage, 'title') formats = [{ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' @@ -62,21 +62,21 @@ class JamendoAlbumIE(InfoExtractor): 'url': 'https://www.jamendo.com/album/121486/duck-on-cover', 'info_dict': { 'id': '121486', - 'title': 'Duck On Cover' + 'title': 'Shearer - Duck On Cover' }, 'playlist': [{ 'md5': 'e1a2fcb42bda30dfac990212924149a8', 'info_dict': { 'id': '1032333', 'ext': 'flac', - 'title': 'Warmachine' + 'title': 'Shearer - Warmachine' } }, { 'md5': '1f358d7b2f98edfe90fd55dac0799d50', 'info_dict': { 'id': '1032330', 'ext': 'flac', - 'title': 'Without Your Ghost' + 'title': 'Shearer - Without Your Ghost' } }], 'params': { @@ -90,7 +90,7 @@ class JamendoAlbumIE(InfoExtractor): webpage = self._download_webpage(url, mobj.group('display_id')) - title = self._html_search_meta('name', webpage, 'title') + title = self._search_regex(r'(.*?)\ \|\ Jamendo\ Music\ .*', webpage, 'title') entries = [ self.url_result( From 15846398ca0af9154b88a69f594557568c6a4782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 26 Jan 2017 23:23:08 +0700 Subject: [PATCH 083/195] [utils] Improve parse_duration --- test/test_utils.py | 1 + youtube_dl/utils.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index e99bf794e..a74d59f34 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -510,6 +510,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('1 hour 3 minutes'), 3780) self.assertEqual(parse_duration('87 Min.'), 5220) self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) + self.assertEqual(parse_duration('PT00H03M30SZ'), 210) def test_fix_xml_ampersands(self): self.assertEqual( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 98acc2b45..cf46711b9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1773,7 +1773,7 @@ def parse_duration(s): s = s.strip() days, hours, mins, secs, ms = [None] * 5 - m = re.match(r'(?:(?:(?:(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+)(?P\.[0-9]+)?$', s) + m = re.match(r'(?:(?:(?:(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+)(?P\.[0-9]+)?Z?$', s) if m: days, hours, mins, secs, ms = m.groups() else: @@ -1790,11 +1790,11 @@ def parse_duration(s): )? (?: (?P[0-9]+)(?P\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* - )?$''', s) + )?Z?$''', s) if m: days, hours, mins, secs, ms = m.groups() else: - m = re.match(r'(?i)(?:(?P[0-9.]+)\s*(?:hours?)|(?P[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)$', s) + m = re.match(r'(?i)(?:(?P[0-9.]+)\s*(?:hours?)|(?P[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s) if m: hours, mins = m.groups() else: From 3cbecdd11121b9c7ff0284e481992f7230806399 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 26 Jan 2017 23:25:40 +0700 Subject: [PATCH 084/195] [jamendo] Improve and extract more metadata (closes #11836) --- youtube_dl/extractor/jamendo.py | 65 ++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py index 3db07e79f..595d7a5b7 100644 --- a/youtube_dl/extractor/jamendo.py +++ b/youtube_dl/extractor/jamendo.py @@ -5,9 +5,27 @@ import re from ..compat import compat_urlparse from .common import InfoExtractor +from ..utils import parse_duration -class JamendoIE(InfoExtractor): +class JamendoBaseIE(InfoExtractor): + def _extract_meta(self, webpage, fatal=True): + title = self._og_search_title( + webpage, default=None) or self._search_regex( + r'([^<]+)', webpage, + 'title', default=None) + if title: + title = self._search_regex( + r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None) + if not title: + title = self._html_search_meta( + 'name', webpage, 'title', fatal=fatal) + mobj = re.search(r'(.+) - (.+)', title or '') + artist, second = mobj.groups() if mobj else [None] * 2 + return title, artist, second + + +class JamendoIE(JamendoBaseIE): _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)' _TEST = { 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', @@ -17,6 +35,9 @@ class JamendoIE(InfoExtractor): 'display_id': 'stories-from-emona-i', 'ext': 'flac', 'title': 'Maya Filipič - Stories from Emona I', + 'artist': 'Maya Filipič', + 'track': 'Stories from Emona I', + 'duration': 210, 'thumbnail': r're:^https?://.*\.jpg' } } @@ -28,7 +49,7 @@ class JamendoIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - title = self._search_regex(r'<title>(.*?)\ \|\ Jamendo\ Music\ .*', webpage, 'title') + title, artist, track = self._extract_meta(webpage) formats = [{ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' @@ -46,17 +67,23 @@ class JamendoIE(InfoExtractor): thumbnail = self._html_search_meta( 'image', webpage, 'thumbnail', fatal=False) + duration = parse_duration(self._search_regex( + r']+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']', + webpage, 'duration', fatal=False)) return { 'id': track_id, 'display_id': display_id, 'thumbnail': thumbnail, 'title': title, + 'duration': duration, + 'artist': artist, + 'track': track, 'formats': formats } -class JamendoAlbumIE(InfoExtractor): +class JamendoAlbumIE(JamendoBaseIE): _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P[0-9]+)/(?P[\w-]+)' _TEST = { 'url': 'https://www.jamendo.com/album/121486/duck-on-cover', @@ -69,14 +96,18 @@ class JamendoAlbumIE(InfoExtractor): 'info_dict': { 'id': '1032333', 'ext': 'flac', - 'title': 'Shearer - Warmachine' + 'title': 'Shearer - Warmachine', + 'artist': 'Shearer', + 'track': 'Warmachine', } }, { 'md5': '1f358d7b2f98edfe90fd55dac0799d50', 'info_dict': { 'id': '1032330', 'ext': 'flac', - 'title': 'Shearer - Without Your Ghost' + 'title': 'Shearer - Without Your Ghost', + 'artist': 'Shearer', + 'track': 'Without Your Ghost', } }], 'params': { @@ -90,18 +121,18 @@ class JamendoAlbumIE(InfoExtractor): webpage = self._download_webpage(url, mobj.group('display_id')) - title = self._search_regex(r'(.*?)\ \|\ Jamendo\ Music\ .*', webpage, 'title') + title, artist, album = self._extract_meta(webpage, fatal=False) - entries = [ - self.url_result( - compat_urlparse.urljoin(url, m.group('path')), - ie=JamendoIE.ie_key(), - video_id=self._search_regex( - r'/track/(\d+)', m.group('path'), - 'track id', default=None)) - for m in re.finditer( - r']+href=(["\'])(?P(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link', - webpage) - ] + entries = [{ + '_type': 'url_transparent', + 'url': compat_urlparse.urljoin(url, m.group('path')), + 'ie_key': JamendoIE.ie_key(), + 'id': self._search_regex( + r'/track/(\d+)', m.group('path'), 'track id', default=None), + 'artist': artist, + 'album': album, + } for m in re.finditer( + r']+href=(["\'])(?P(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link', + webpage)] return self.playlist_result(entries, album_id, title) From 9463637887ba784e3499410ab0945dcd68002bc1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 26 Jan 2017 18:36:28 +0100 Subject: [PATCH 085/195] [tva] Add new extractor(closes #11842) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tva.py | 54 ++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 youtube_dl/extractor/tva.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0c3e081ad..81366f933 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -985,6 +985,7 @@ from .tv2 import ( ) from .tv3 import TV3IE from .tv4 import TV4IE +from .tva import TVAIE from .tvanouvelles import ( TVANouvellesIE, TVANouvellesArticleIE, diff --git a/youtube_dl/extractor/tva.py b/youtube_dl/extractor/tva.py new file mode 100644 index 000000000..3ced098f9 --- /dev/null +++ b/youtube_dl/extractor/tva.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, + smuggle_url, +) + + +class TVAIE(InfoExtractor): + _VALID_URL = r'https?://videos\.tva\.ca/episode/(?P\d+)' + _TEST = { + 'url': 'http://videos.tva.ca/episode/85538', + 'info_dict': { + 'id': '85538', + 'ext': 'mp4', + 'title': 'Épisode du 25 janvier 2017', + 'description': 'md5:e9e7fb5532ab37984d2dc87229cadf98', + 'upload_date': '20170126', + 'timestamp': 1485442329, + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + "https://d18jmrhziuoi7p.cloudfront.net/isl/api/v1/dataservice/Items('%s')" % video_id, + video_id, query={ + '$expand': 'Metadata,CustomId', + '$select': 'Metadata,Id,Title,ShortDescription,LongDescription,CreatedDate,CustomId,AverageUserRating,Categories,ShowName', + '$format': 'json', + }) + metadata = video_data.get('Metadata', {}) + + return { + '_type': 'url_transparent', + 'id': video_id, + 'title': video_data['Title'], + 'url': smuggle_url('ooyala:' + video_data['CustomId'], {'supportedformats': 'm3u8,hds'}), + 'description': video_data.get('LongDescription') or video_data.get('ShortDescription'), + 'series': video_data.get('ShowName'), + 'episode': metadata.get('EpisodeTitle'), + 'episode_number': int_or_none(metadata.get('EpisodeNumber')), + 'categories': video_data.get('Categories'), + 'average_rating': video_data.get('AverageUserRating'), + 'timestamp': parse_iso8601(video_data.get('CreatedDate')), + 'ie_key': 'Ooyala', + } From b51a4ebed45a3944c02bb3c36778630fd9306de7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 26 Jan 2017 19:15:43 +0100 Subject: [PATCH 086/195] [aenetworks] fix season episodes extraction(fixes #11669) --- youtube_dl/extractor/aenetworks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index c5e079a40..c97317400 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -87,7 +87,7 @@ class AENetworksIE(AENetworksBaseIE): self._html_search_meta('aetn:SeriesTitle', webpage)) elif url_parts_len == 2: entries = [] - for episode_item in re.findall(r'(?s)]+class="[^"]*episode-item[^"]*"[^>]*>', webpage): + for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage): episode_attributes = extract_attributes(episode_item) episode_url = compat_urlparse.urljoin( url, episode_attributes['data-canonical']) From 0b23c222ba099d73c287d024f45f90714c15f289 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 21:31:26 +0700 Subject: [PATCH 087/195] [twitch:vod] Expand _VALID_URL (closes #11846) --- youtube_dl/extractor/twitch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 6d67bda86..1ca159a4d 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -209,7 +209,7 @@ class TwitchVodIE(TwitchItemBaseIE): _VALID_URL = r'''(?x) https?:// (?: - (?:www\.)?twitch\.tv/[^/]+/v/| + (?:www\.)?twitch\.tv/(?:[^/]+/v|videos)/| player\.twitch\.tv/\?.*?\bvideo=v ) (?P\d+) @@ -259,6 +259,9 @@ class TwitchVodIE(TwitchItemBaseIE): }, { 'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877', 'only_matching': True, + }, { + 'url': 'https://www.twitch.tv/videos/6528877', + 'only_matching': True, }] def _real_extract(self, url): From 489ffc118232056537e86bd0281488e217fce7d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 22:55:42 +0700 Subject: [PATCH 088/195] [soundcloud] Fix track URL extraction (closes #11852) --- youtube_dl/extractor/soundcloud.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 5a201eaa8..96bebeec5 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -173,11 +173,12 @@ class SoundcloudIE(InfoExtractor): }) # We have to retrieve the url - streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?' - 'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token)) format_dict = self._download_json( - streams_url, - track_id, 'Downloading track url') + 'http://api.soundcloud.com/i1/tracks/%s/streams' % track_id, + track_id, 'Downloading track url', query={ + 'client_id': self._CLIENT_ID, + 'secret_token': secret_token, + }) for key, stream_url in format_dict.items(): if key.startswith('http'): From 9b73471801d24cec678226c82cce9e9ece92732e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 23:08:32 +0700 Subject: [PATCH 089/195] [soundcloud] Extract hls formats --- youtube_dl/extractor/soundcloud.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 96bebeec5..55c80e1cc 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -198,6 +198,13 @@ class SoundcloudIE(InfoExtractor): 'ext': 'flv', 'vcodec': 'none', }) + elif key.startswith('hls'): + m3u8_formats = self._extract_m3u8_formats( + stream_url, track_id, 'mp3', entry_protocol='m3u8_native', + m3u8_id=key, fatal=False) + for f in m3u8_formats: + f['vcodec'] = 'none' + formats.extend(m3u8_formats) if not formats: # We fallback to the stream_url in the original info, this From 3a194cb4ecfa8c2590f22236dffc84e1b1565196 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 23:16:30 +0700 Subject: [PATCH 090/195] [soundcloud] Improve formats extraction and extract audio bitrate --- youtube_dl/extractor/soundcloud.py | 48 +++++++++++++++--------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 55c80e1cc..b3aa4ce26 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -181,46 +181,46 @@ class SoundcloudIE(InfoExtractor): }) for key, stream_url in format_dict.items(): + abr = int_or_none(self._search_regex( + r'_(\d+)_url', key, 'audio bitrate', default=None)) if key.startswith('http'): - formats.append({ + stream_formats = [{ 'format_id': key, 'ext': ext, 'url': stream_url, - 'vcodec': 'none', - }) + }] elif key.startswith('rtmp'): # The url doesn't have an rtmp app, we have to extract the playpath url, path = stream_url.split('mp3:', 1) - formats.append({ + stream_formats = [{ 'format_id': key, 'url': url, 'play_path': 'mp3:' + path, 'ext': 'flv', - 'vcodec': 'none', - }) + }] elif key.startswith('hls'): - m3u8_formats = self._extract_m3u8_formats( + stream_formats = self._extract_m3u8_formats( stream_url, track_id, 'mp3', entry_protocol='m3u8_native', m3u8_id=key, fatal=False) - for f in m3u8_formats: - f['vcodec'] = 'none' - formats.extend(m3u8_formats) + else: + continue - if not formats: - # We fallback to the stream_url in the original info, this - # cannot be always used, sometimes it can give an HTTP 404 error - formats.append({ - 'format_id': 'fallback', - 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID, - 'ext': ext, - 'vcodec': 'none', - }) + for f in stream_formats: + f['abr'] = abr - for f in formats: - if f['format_id'].startswith('http'): - f['protocol'] = 'http' - if f['format_id'].startswith('rtmp'): - f['protocol'] = 'rtmp' + formats.extend(stream_formats) + + if not formats: + # We fallback to the stream_url in the original info, this + # cannot be always used, sometimes it can give an HTTP 404 error + formats.append({ + 'format_id': 'fallback', + 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID, + 'ext': ext, + }) + + for f in formats: + f['vcodec'] = 'none' self._check_formats(formats, track_id) self._sort_formats(formats) From e0b6e50ccd124c6f618bf25bc94361d83cbc8b86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 23:55:55 +0700 Subject: [PATCH 091/195] [crunchyroll] Improve series and season metadata extraction (closes #11832) --- youtube_dl/extractor/crunchyroll.py | 38 ++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 559044352..f811c7f33 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -166,6 +166,25 @@ class CrunchyrollIE(CrunchyrollBaseIE): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589', + 'info_dict': { + 'id': '727589', + 'ext': 'mp4', + 'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance from this Judicial Injustice!", + 'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Kadokawa Pictures Inc.', + 'upload_date': '20170118', + 'series': "KONOSUBA -God's blessing on this wonderful world!", + 'season_number': 2, + 'episode': 'Give Me Deliverance from this Judicial Injustice!', + 'episode_number': 1, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697', 'only_matching': True, @@ -439,6 +458,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text subtitles = self.extract_subtitles(video_id, webpage) + # webpage provide more accurate data than series_title from XML + series = self._html_search_regex( + r'id=["\']showmedia_about_episode_num[^>]+>\s*]+>([^<]+)', + webpage, 'series', default=xpath_text(metadata, 'series_title')) + + episode = xpath_text(metadata, 'episode_title') + episode_number = int_or_none(xpath_text(metadata, 'episode_number')) + + season_number = int_or_none(self._search_regex( + r'(?s)]+id=["\']showmedia_about_episode_num[^>]+>.+?\s*

\s*Season (\d+)', + webpage, 'season number', default=None)) + return { 'id': video_id, 'title': video_title, @@ -446,9 +477,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'thumbnail': xpath_text(metadata, 'episode_image_url'), 'uploader': video_uploader, 'upload_date': video_upload_date, - 'series': xpath_text(metadata, 'series_title'), - 'episode': xpath_text(metadata, 'episode_title'), - 'episode_number': int_or_none(xpath_text(metadata, 'episode_number')), + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, 'subtitles': subtitles, 'formats': formats, } From 815d2a36d81c4cc6181d0536ce811b0e2e4a5021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 00:03:21 +0700 Subject: [PATCH 092/195] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index ff305d7e8..2c670c62e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +version + +Core +* [utils] Improve parse_duration + +Extractors +* [crunchyroll] Improve series and season metadata extraction (#11832) +* [soundcloud] Improve formats extraction and extract audio bitrate ++ [soundcloud] Extract HLS formats +* [soundcloud] Fix track URL extraction (#11852) ++ [twitch:vod] Expand URL regular expressions (#11846) +* [aenetworks] Fix season episodes extraction (#11669) ++ [tva] Add support for videos.tva.ca (#11842) +* [jamendo] Improve and extract more metadata (#11836) ++ [disney] Add support for Disney sites (#7409, #11801, #4975, #11000) +* [vevo] Remove request to old API and catch API v2 errors ++ [cmt,mtv,southpark] Add support for episode URLs (#11837) ++ [youtube] Add fallback for duration extraction (#11841) + + version 2017.01.25 Extractors From d41ed6d243c2079db123963a7f65e91f24b390f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 00:33:55 +0700 Subject: [PATCH 093/195] release 2017.01.28 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 4d409f785..693f3b745 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.25*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.25** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.28** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.25 +[debug] youtube-dl version 2017.01.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 2c670c62e..8e5a04b42 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.28 Core * [utils] Improve parse_duration diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f640cfcaa..6318a862f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -202,6 +202,7 @@ - **Digiteka** - **Discovery** - **DiscoveryGo** + - **Disney** - **Dotsub** - **DouyuTV**: 斗鱼 - **DPlay** @@ -785,6 +786,7 @@ - **TV2Article** - **TV3** - **TV4**: tv4.se and tv4play.se + - **TVA** - **TVANouvelles** - **TVANouvellesArticle** - **TVC** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c23fe85de..c22c410a8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.25' +__version__ = '2017.01.28' From 99a0baf370c7652f6103cff71f878872229b4129 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Tue, 24 Jan 2017 17:42:00 +0100 Subject: [PATCH 094/195] [konserthusetplay] Add support for rspoplay.se --- youtube_dl/extractor/konserthusetplay.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/konserthusetplay.py b/youtube_dl/extractor/konserthusetplay.py index 3ae2aa317..c11cbcf47 100644 --- a/youtube_dl/extractor/konserthusetplay.py +++ b/youtube_dl/extractor/konserthusetplay.py @@ -11,22 +11,22 @@ from ..utils import ( class KonserthusetPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?.*\bm=(?P[^&]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?(?:konserthusetplay|rspoplay)\.se/\?.*\bm=(?P[^&]+)' + _TESTS = [{ 'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A', + 'md5': 'e3fd47bf44e864bd23c08e487abe1967', 'info_dict': { 'id': 'CKDDnlCY-dhWAAqiMERd-A', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Orkesterns instrument: Valthornen', 'description': 'md5:f10e1f0030202020396a4d712d2fa827', 'thumbnail': 're:^https?://.*$', - 'duration': 398.8, + 'duration': 398.76, }, - 'params': { - # rtmp download - 'skip_download': True, - }, - } + }, { + 'url': 'http://rspoplay.se/?m=elWuEH34SMKvaO4wO_cHBw', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 26e40542dd730b1a18f9d7eebe241972b77810cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 17:50:56 +0700 Subject: [PATCH 095/195] [kaltura] Improve uploader_id extraction --- youtube_dl/extractor/kaltura.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index c0ddad6f9..a57d913af 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -319,6 +319,6 @@ class KalturaIE(InfoExtractor): 'thumbnail': info.get('thumbnailUrl'), 'duration': info.get('duration'), 'timestamp': info.get('createdAt'), - 'uploader_id': info.get('userId'), + 'uploader_id': info.get('userId') if info.get('userId') != 'None' else None, 'view_count': info.get('plays'), } From ab6f6aee78fc4757fcb65bd8f4699aaf9feac3a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 18:27:42 +0700 Subject: [PATCH 096/195] [kaltura] Add fallback for fileExt --- youtube_dl/extractor/kaltura.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index a57d913af..5ef382f9f 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -266,9 +266,12 @@ class KalturaIE(InfoExtractor): # skip for now. if f.get('fileExt') == 'chun': continue - if not f.get('fileExt') and f.get('containerFormat') == 'qt': + if not f.get('fileExt'): # QT indicates QuickTime; some videos have broken fileExt - f['fileExt'] = 'mov' + if f.get('containerFormat') == 'qt': + f['fileExt'] = 'mov' + else: + f['fileExt'] = 'mp4' video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g From b92d3c5343536eb0a865afa79e3787fc384ec0ec Mon Sep 17 00:00:00 2001 From: ping Date: Tue, 24 Jan 2017 13:52:17 +0800 Subject: [PATCH 097/195] [vlive] Add support for channels --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/vlive.py | 68 ++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 81366f933..c781c9b87 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1129,7 +1129,10 @@ from .vk import ( VKUserVideosIE, VKWallPostIE, ) -from .vlive import VLiveIE +from .vlive import ( + VLiveIE, + VLiveChannelIE +) from .vodlocker import VodlockerIE from .vodplatform import VODPlatformIE from .voicerepublic import VoiceRepublicIE diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 540246c79..70bab1f04 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -2,6 +2,8 @@ from __future__ import unicode_literals import re +import time +import itertools from .common import InfoExtractor from ..utils import ( @@ -169,3 +171,69 @@ class VLiveIE(InfoExtractor): 'subtitles': subtitles, }) return info + + +class VLiveChannelIE(InfoExtractor): + IE_NAME = 'vlive:channel' + _VALID_URL = r'https?://channels\.vlive\.tv/(?P[0-9A-Z]+)/video' + _TEST = { + 'url': 'http://channels.vlive.tv/FCD4B/video', + 'info_dict': { + 'id': 'FCD4B', + 'title': 'MAMAMOO', + }, + 'playlist_mincount': 110 + } + _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' + + def _real_extract(self, url): + channel_code = self._match_id(url) + + webpage = self._download_webpage( + 'http://channels.vlive.tv/%s/video' % channel_code, channel_code) + app_js_url = self._search_regex( + r'(http[^\'\"\s]+app\.js)', webpage, 'app js', default='') + + if app_js_url: + app_js = self._download_webpage(app_js_url, channel_code, 'app js') + app_id = self._search_regex( + r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]', + app_js, 'app id', default=self._APP_ID) + else: + app_id = self._APP_ID + + channel_info = self._download_json( + 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode', + channel_code, note='decode channel code', + query={'app_id': app_id, 'channelCode': channel_code, '_': int(time.time())}) + + channel_seq = channel_info['result']['channelSeq'] + channel_name = None + entries = [] + + for page_num in itertools.count(1): + video_list = self._download_json( + 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList', + channel_code, note='channel list %d' % page_num, + query={ + 'app_id': app_id, + 'channelSeq': channel_seq, + 'maxNumOfRows': 1000, + '_': int(time.time()), + 'pageNo': page_num + } + ) + if not channel_name: + channel_name = video_list['result']['channelInfo']['channelName'] + + if not video_list['result'].get('videoList'): + break + + for video in video_list['result']['videoList']: + video_id = str(video['videoSeq']) + entries.append( + self.url_result( + 'http://www.vlive.tv/video/%s' % video_id, 'Vlive', video_id)) + + return self.playlist_result( + entries, channel_code, channel_name) From 661cc229d2e885dd303d26535477c8905805ddf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 19:08:01 +0700 Subject: [PATCH 098/195] [vlive:channel] Improve --- youtube_dl/extractor/vlive.py | 62 ++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 70bab1f04..b9718901b 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -6,15 +6,19 @@ import time import itertools from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlencode, + compat_str, +) from ..utils import ( dict_get, ExtractorError, float_or_none, int_or_none, remove_start, + try_get, urlencode_postdata, ) -from ..compat import compat_urllib_parse_urlencode class VLiveIE(InfoExtractor): @@ -175,9 +179,9 @@ class VLiveIE(InfoExtractor): class VLiveChannelIE(InfoExtractor): IE_NAME = 'vlive:channel' - _VALID_URL = r'https?://channels\.vlive\.tv/(?P[0-9A-Z]+)/video' + _VALID_URL = r'https?://channels\.vlive\.tv/(?P[0-9A-Z]+)' _TEST = { - 'url': 'http://channels.vlive.tv/FCD4B/video', + 'url': 'http://channels.vlive.tv/FCD4B', 'info_dict': { 'id': 'FCD4B', 'title': 'MAMAMOO', @@ -191,21 +195,31 @@ class VLiveChannelIE(InfoExtractor): webpage = self._download_webpage( 'http://channels.vlive.tv/%s/video' % channel_code, channel_code) + + app_id = None + app_js_url = self._search_regex( - r'(http[^\'\"\s]+app\.js)', webpage, 'app js', default='') + r']+src=(["\'])(?Phttp.+?/app\.js.*?)\1', + webpage, 'app js', default=None, group='url') if app_js_url: - app_js = self._download_webpage(app_js_url, channel_code, 'app js') - app_id = self._search_regex( - r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]', - app_js, 'app id', default=self._APP_ID) - else: - app_id = self._APP_ID + app_js = self._download_webpage( + app_js_url, channel_code, 'Downloading app JS', fatal=False) + if app_js: + app_id = self._search_regex( + r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]', + app_js, 'app id', default=None) + + app_id = app_id or self._APP_ID channel_info = self._download_json( 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode', - channel_code, note='decode channel code', - query={'app_id': app_id, 'channelCode': channel_code, '_': int(time.time())}) + channel_code, note='Downloading decode channel code', + query={ + 'app_id': app_id, + 'channelCode': channel_code, + '_': int(time.time()) + }) channel_seq = channel_info['result']['channelSeq'] channel_name = None @@ -214,7 +228,7 @@ class VLiveChannelIE(InfoExtractor): for page_num in itertools.count(1): video_list = self._download_json( 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList', - channel_code, note='channel list %d' % page_num, + channel_code, note='Downloading channel list page #%d' % page_num, query={ 'app_id': app_id, 'channelSeq': channel_seq, @@ -223,17 +237,27 @@ class VLiveChannelIE(InfoExtractor): 'pageNo': page_num } ) - if not channel_name: - channel_name = video_list['result']['channelInfo']['channelName'] - if not video_list['result'].get('videoList'): + if not channel_name: + channel_name = try_get( + video_list, + lambda x: x['result']['channelInfo']['channelName'], + compat_str) + + videos = try_get( + video_list, lambda x: x['result']['videoList'], list) + if not videos: break - for video in video_list['result']['videoList']: - video_id = str(video['videoSeq']) + for video in videos: + video_id = video.get('videoSeq') + if not video_id: + continue + video_id = compat_str(video_id) entries.append( self.url_result( - 'http://www.vlive.tv/video/%s' % video_id, 'Vlive', video_id)) + 'http://www.vlive.tv/video/%s' % video_id, + ie=VLiveIE.ie_key(), video_id=video_id)) return self.playlist_result( entries, channel_code, channel_name) From 008f247077027f10c947060d8f3bb886c9af6aa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 20:29:22 +0700 Subject: [PATCH 099/195] [mtv81] Add extractor (closes #7619) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/mtv.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c781c9b87..915291f74 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -553,6 +553,7 @@ from .mtv import ( MTVVideoIE, MTVServicesEmbeddedIE, MTVDEIE, + MTV81IE, ) from .muenchentv import MuenchenTVIE from .musicplayon import MusicPlayOnIE diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 855c3996f..8acea1461 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -327,6 +327,35 @@ class MTVIE(MTVServicesInfoExtractor): }] +class MTV81IE(InfoExtractor): + IE_NAME = 'mtv81' + _VALID_URL = r'https?://(?:www\.)?mtv81\.com/videos/(?P[^/?#.]+)' + + _TEST = { + 'url': 'http://www.mtv81.com/videos/artist-to-watch/the-godfather-of-japanese-hip-hop-segment-1/', + 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b', + 'info_dict': { + 'id': '5e14040d-18a4-47c4-a582-43ff602de88e', + 'ext': 'mp4', + 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer', + 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.', + 'timestamp': 1468846800, + 'upload_date': '20160718', + }, + } + + def _extract_mgid(self, webpage): + return self._search_regex( + r'getTheVideo\((["\'])(?Pmgid:.+?)\1', webpage, + 'mgid', group='id') + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mgid = self._extract_mgid(webpage) + return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) + + class MTVVideoIE(MTVServicesInfoExtractor): IE_NAME = 'mtv:video' _VALID_URL = r'''(?x)^https?:// From 732fb3f8be6cca47c60b3befee83ee9b5002984d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 21:06:22 +0700 Subject: [PATCH 100/195] [options] Move --abort-on-unavailable-fragment to download section --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 0d2ce8d15..5e2936555 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -450,7 +450,7 @@ def parseOpts(overrideArguments=None): '--skip-unavailable-fragments', action='store_true', dest='skip_unavailable_fragments', default=True, help='Skip unavailable fragments (DASH and hlsnative only)') - general.add_option( + downloader.add_option( '--abort-on-unavailable-fragment', action='store_false', dest='skip_unavailable_fragments', help='Abort downloading when some fragment is not available') From a71b8d3b3bb399acb82f3ccfbd8a19d411848db4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 28 Jan 2017 15:51:52 +0100 Subject: [PATCH 101/195] [itv] Add new extractor(closes #9240) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/itv.py | 181 +++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+) create mode 100644 youtube_dl/extractor/itv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 915291f74..086a2296d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -415,6 +415,7 @@ from .internetvideoarchive import InternetVideoArchiveIE from .iprima import IPrimaIE from .iqiyi import IqiyiIE from .ir90tv import Ir90TvIE +from .itv import ITVIE from .ivi import ( IviIE, IviCompilationIE diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py new file mode 100644 index 000000000..d029609c3 --- /dev/null +++ b/youtube_dl/extractor/itv.py @@ -0,0 +1,181 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import uuid +import xml.etree.ElementTree as etree +import json + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + extract_attributes, + xpath_with_ns, + xpath_element, + xpath_text, + int_or_none, + parse_duration, + ExtractorError, + determine_ext, +) + + +class ITVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P[0-9a-z]+)' + _TEST = { + 'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', + 'info_dict': { + 'id': '2a2936a0053', + 'ext': 'flv', + 'title': 'Home Movie', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + params = extract_attributes(self._search_regex( + r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params')) + + ns_map = { + 'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/', + 'tem': 'http://tempuri.org/', + 'itv': 'http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types', + 'com': 'http://schemas.itv.com/2009/05/Common', + } + for ns, full_ns in ns_map.items(): + etree.register_namespace(ns, full_ns) + + def _add_ns(name): + return xpath_with_ns(name, ns_map) + + def _add_sub_element(element, name): + return etree.SubElement(element, _add_ns(name)) + + req_env = etree.Element(_add_ns('soapenv:Envelope')) + _add_sub_element(req_env, 'soapenv:Header') + body = _add_sub_element(req_env, 'soapenv:Body') + get_playlist = _add_sub_element(body, ('tem:GetPlaylist')) + request = _add_sub_element(get_playlist, 'tem:request') + _add_sub_element(request, 'itv:ProductionId').text = params['data-video-id'] + _add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper() + vodcrid = _add_sub_element(request, 'itv:Vodcrid') + _add_sub_element(vodcrid, 'com:Id') + _add_sub_element(request, 'itv:Partition') + user_info = _add_sub_element(get_playlist, 'tem:userInfo') + _add_sub_element(user_info, 'itv:Broadcaster').text = 'Itv' + _add_sub_element(user_info, 'itv:DM') + _add_sub_element(user_info, 'itv:RevenueScienceValue') + _add_sub_element(user_info, 'itv:SessionId') + _add_sub_element(user_info, 'itv:SsoToken') + _add_sub_element(user_info, 'itv:UserToken') + site_info = _add_sub_element(get_playlist, 'tem:siteInfo') + _add_sub_element(site_info, 'itv:AdvertisingRestriction').text = 'None' + _add_sub_element(site_info, 'itv:AdvertisingSite').text = 'ITV' + _add_sub_element(site_info, 'itv:AdvertisingType').text = 'Any' + _add_sub_element(site_info, 'itv:Area').text = 'ITVPLAYER.VIDEO' + _add_sub_element(site_info, 'itv:Category') + _add_sub_element(site_info, 'itv:Platform').text = 'DotCom' + _add_sub_element(site_info, 'itv:Site').text = 'ItvCom' + device_info = _add_sub_element(get_playlist, 'tem:deviceInfo') + _add_sub_element(device_info, 'itv:ScreenSize').text = 'Big' + player_info = _add_sub_element(get_playlist, 'tem:playerInfo') + _add_sub_element(player_info, 'itv:Version').text = '2' + + headers = self.geo_verification_headers() + headers.update({ + 'Content-Type': 'text/xml; charset=utf-8', + 'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist', + }) + resp_env = self._download_xml( + params['data-playlist-url'], video_id, + headers=headers, data=etree.tostring(req_env)) + playlist = xpath_element(resp_env, './/Playlist') + if playlist is None: + fault_string = xpath_text(resp_env, './/faultstring') + raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) + title = xpath_text(playlist, 'EpisodeTitle', fatal=True) + media_files = xpath_element(playlist, 'VideoEntries/Video/MediaFiles', fatal=True) + rtmp_url = media_files.attrib['base'] + + formats = [] + for media_file in media_files.findall('MediaFile'): + play_path = xpath_text(media_file, 'URL') + if not play_path: + continue + tbr = int_or_none(media_file.get('bitrate'), 1000) + formats.append({ + 'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), + 'url': rtmp_url, + 'play_path': play_path, + 'tbr': tbr, + 'ext': 'flv', + }) + + ios_playlist_url = params.get('data-video-playlist') + hmac = params.get('data-video-hmac') + if ios_playlist_url and hmac: + headers = self.geo_verification_headers() + headers.update({ + 'Accept': 'application/vnd.itv.vod.playlist.v2+json', + 'Content-Type': 'application/json', + 'hmac': hmac.upper(), + }) + ios_playlist = self._download_json( + ios_playlist_url, video_id, data=json.dumps({ + 'user': { + 'itvUserId': '', + 'entitlements': [], + 'token': '' + }, + 'device': { + 'manufacturer': 'Apple', + 'model': 'iPad', + 'os': { + 'name': 'iPhone OS', + 'version': '9.3', + 'type': 'ios' + } + }, + 'client': { + 'version': '4.1', + 'id': 'browser' + }, + 'variantAvailability': { + 'featureset': { + 'min': ['hls', 'aes'], + 'max': ['hls', 'aes'] + }, + 'platformTag': 'mobile' + } + }).encode(), headers=headers, fatal=False) + if ios_playlist: + video_data = ios_playlist.get('Playlist', {}).get('Video', {}) + ios_base_url = video_data.get('Base') + for media_file in video_data.get('MediaFiles', []): + href = media_file.get('Href') + if not href: + continue + if ios_base_url: + href = ios_base_url + href + ext = determine_ext(href) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats(href, video_id, 'mp4', m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': href, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'episode_title': title, + 'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), + 'series': xpath_text(playlist, 'ProgrammeTitle'), + 'duartion': parse_duration(xpath_text(playlist, 'Duration')), + } From 24ee6b9721770b7066f10f6a6773f1ce15f82ed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 22:40:07 +0700 Subject: [PATCH 102/195] [options] Remove experimental mark from some options --- youtube_dl/options.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 5e2936555..09c9387ca 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -216,23 +216,23 @@ def parseOpts(overrideArguments=None): network.add_option( '--source-address', metavar='IP', dest='source_address', default=None, - help='Client-side IP address to bind to (experimental)', + help='Client-side IP address to bind to', ) network.add_option( '-4', '--force-ipv4', action='store_const', const='0.0.0.0', dest='source_address', - help='Make all connections via IPv4 (experimental)', + help='Make all connections via IPv4', ) network.add_option( '-6', '--force-ipv6', action='store_const', const='::', dest='source_address', - help='Make all connections via IPv6 (experimental)', + help='Make all connections via IPv6', ) network.add_option( '--geo-verification-proxy', dest='geo_verification_proxy', default=None, metavar='URL', help='Use this proxy to verify the IP address for some geo-restricted sites. ' - 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)' + 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.' ) network.add_option( '--cn-verification-proxy', @@ -297,7 +297,7 @@ def parseOpts(overrideArguments=None): '--match-filter', metavar='FILTER', dest='match_filter', default=None, help=( - 'Generic video filter (experimental). ' + 'Generic video filter. ' 'Specify any key (see help for -o for a list of available keys) to' ' match if the key is present, ' '!key to check if the key is not present,' From f592ff98683794e0f79c96cbec67b737ae8da00c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 28 Jan 2017 17:25:15 +0100 Subject: [PATCH 103/195] [itv] extract subtitles --- youtube_dl/extractor/itv.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index d029609c3..d65cdc6af 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -20,7 +20,7 @@ from ..utils import ( class ITVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P[0-9a-z]+)' + _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P[0-9a-zA-Z]+)' _TEST = { 'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', 'info_dict': { @@ -98,7 +98,8 @@ class ITVIE(InfoExtractor): fault_string = xpath_text(resp_env, './/faultstring') raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) title = xpath_text(playlist, 'EpisodeTitle', fatal=True) - media_files = xpath_element(playlist, 'VideoEntries/Video/MediaFiles', fatal=True) + video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) + media_files = xpath_element(video_element, 'MediaFiles', fatal=True) rtmp_url = media_files.attrib['base'] formats = [] @@ -170,10 +171,21 @@ class ITVIE(InfoExtractor): }) self._sort_formats(formats) + subtitles = {} + for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): + if not caption_url.text: + continue + ext = determine_ext(caption_url.text, 'ttml') + subtitles.setdefault('en', []).append({ + 'url': caption_url, + 'ext': 'ttml' if ext == 'xml' else ext, + }) + return { 'id': video_id, 'title': title, 'formats': formats, + 'subtitles': subtitles, 'episode_title': title, 'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), 'series': xpath_text(playlist, 'ProgrammeTitle'), From 4edeac5bfae76966fd14f636bd68850ea0403ece Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 28 Jan 2017 17:28:18 +0100 Subject: [PATCH 104/195] [itv] fix subtitle extraction --- youtube_dl/extractor/itv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index d65cdc6af..0328c7093 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -177,7 +177,7 @@ class ITVIE(InfoExtractor): continue ext = determine_ext(caption_url.text, 'ttml') subtitles.setdefault('en', []).append({ - 'url': caption_url, + 'url': caption_url.text, 'ext': 'ttml' if ext == 'xml' else ext, }) From acbb2374bce27eda16764b80832f88cf833a51e5 Mon Sep 17 00:00:00 2001 From: Costy Petrisor Date: Sun, 1 May 2016 12:34:11 +0000 Subject: [PATCH 105/195] added --autonumber-start NUMBER as a command line option to be able to offset the index at which autonumber formats filenames --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 1 + youtube_dl/options.py | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 41d9a63ee..c71e94518 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -584,7 +584,7 @@ class YoutubeDL(object): if autonumber_size is None: autonumber_size = 5 autonumber_templ = '%0' + str(autonumber_size) + 'd' - template_dict['autonumber'] = autonumber_templ % self._num_downloads + template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads) if template_dict.get('playlist_index') is not None: template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index']) if template_dict.get('resolution') is None: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index dfa4ae839..577bc880f 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -321,6 +321,7 @@ def _real_main(argv=None): 'listformats': opts.listformats, 'outtmpl': outtmpl, 'autonumber_size': opts.autonumber_size, + 'autonumber_start': opts.autonumber_start, 'restrictfilenames': opts.restrictfilenames, 'ignoreerrors': opts.ignoreerrors, 'force_generic_extractor': opts.force_generic_extractor, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 09c9387ca..571525434 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -663,6 +663,10 @@ def parseOpts(overrideArguments=None): '--autonumber-size', dest='autonumber_size', metavar='NUMBER', help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') + filesystem.add_option( + '--autonumber-start', + dest='autonumber_start', metavar='NUMBER', type="int", default=1, + help='Specify the start value for the %(autonumber)s counter. Defaults to 1.') filesystem.add_option( '--restrict-filenames', action='store_true', dest='restrictfilenames', default=False, From 1a241a2d02e2507219e81d7b18c18f10937ae6e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 23:57:56 +0700 Subject: [PATCH 106/195] [options] Refactor autonumber options and add validation (closes #727, closes #2702, closes #9362) --- youtube_dl/__init__.py | 6 ++++++ youtube_dl/options.py | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 577bc880f..2b156342a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -133,6 +133,12 @@ def _real_main(argv=None): parser.error('TV Provider account username missing\n') if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): parser.error('using output template conflicts with using title, video ID or auto number') + if opts.autonumber_size is not None: + if opts.autonumber_size <= 0: + parser.error('auto number size must be positive') + if opts.autonumber_start is not None: + if opts.autonumber_start < 0: + parser.error('auto number start must be positive or 0') if opts.usetitle and opts.useid: parser.error('using title conflicts with using video ID') if opts.username is not None and opts.password is None: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 571525434..3abf621c0 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -661,12 +661,12 @@ def parseOpts(overrideArguments=None): help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info')) filesystem.add_option( '--autonumber-size', - dest='autonumber_size', metavar='NUMBER', - help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') + dest='autonumber_size', metavar='NUMBER', default=5, type=int, + help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given (default is %default)') filesystem.add_option( '--autonumber-start', - dest='autonumber_start', metavar='NUMBER', type="int", default=1, - help='Specify the start value for the %(autonumber)s counter. Defaults to 1.') + dest='autonumber_start', metavar='NUMBER', default=1, type=int, + help='Specify the start value for %(autonumber)s (default is %default)') filesystem.add_option( '--restrict-filenames', action='store_true', dest='restrictfilenames', default=False, From c0af11abeeaad75f4387ad77adc751715dfc0cf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 00:52:23 +0700 Subject: [PATCH 107/195] Credit @AVerwer for showroomlive (#11458) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 9e092cccc..90ff4d789 100644 --- a/AUTHORS +++ b/AUTHORS @@ -191,3 +191,4 @@ Rich Leeper Zhong Jianxin Thor77 Mattias Wadman +Arjan Verwer From ffcfb7e3e01cec5f5468e4639b2e4d44a0c7bfba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 00:54:31 +0700 Subject: [PATCH 108/195] Credit @costypetrisor for autonumber start (#9362) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 90ff4d789..6b6e38613 100644 --- a/AUTHORS +++ b/AUTHORS @@ -192,3 +192,4 @@ Zhong Jianxin Thor77 Mattias Wadman Arjan Verwer +Costy Petrisor From 34cea6137e6df158c99d83fd1c1af55f94ee4a38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 00:57:15 +0700 Subject: [PATCH 109/195] Credit @einstein95 for pornflip (#11795) and chaturbate fix (#11797) --- AUTHORS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/AUTHORS b/AUTHORS index 6b6e38613..600e2c55b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -193,3 +193,5 @@ Thor77 Mattias Wadman Arjan Verwer Costy Petrisor +Logan B + From 186f4abe938e0f631b63c5dc1aaa4d622513a366 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 00:59:17 +0700 Subject: [PATCH 110/195] Credit @goggle for 20min (#11683) and azmedien (#11805) --- AUTHORS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 600e2c55b..b3193f7da 100644 --- a/AUTHORS +++ b/AUTHORS @@ -194,4 +194,4 @@ Mattias Wadman Arjan Verwer Costy Petrisor Logan B - +Alex Seiler From f5169501d2749503e5d19f9c51937aedcce357e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 01:00:17 +0700 Subject: [PATCH 111/195] Credit @sudovijay for openload fix (#11646) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index b3193f7da..434217abb 100644 --- a/AUTHORS +++ b/AUTHORS @@ -195,3 +195,4 @@ Arjan Verwer Costy Petrisor Logan B Alex Seiler +Vijay Singh From 4d07b748c2e8057fa6417ab5422cb19be313d7b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 01:01:39 +0700 Subject: [PATCH 112/195] Credit @bastik for zdf fix (#11063) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 434217abb..49ffc99aa 100644 --- a/AUTHORS +++ b/AUTHORS @@ -196,3 +196,4 @@ Costy Petrisor Logan B Alex Seiler Vijay Singh +Paul Hartmann From 59c307891ac2cca2b2db42a534a3f4de61820450 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 01:02:28 +0700 Subject: [PATCH 113/195] Credit @RPing for cntv (#8541) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 49ffc99aa..78a7a5291 100644 --- a/AUTHORS +++ b/AUTHORS @@ -197,3 +197,4 @@ Logan B Alex Seiler Vijay Singh Paul Hartmann +Stephen Chen From 0842b8241d9f8984dd70266b59aa68241259401f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 01:03:59 +0700 Subject: [PATCH 114/195] Credit @fast90 for config location (#10648) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 78a7a5291..022a5de84 100644 --- a/AUTHORS +++ b/AUTHORS @@ -198,3 +198,4 @@ Alex Seiler Vijay Singh Paul Hartmann Stephen Chen +Fabian Stahl From 56fc078da84a7f26d8290b2b425cc2da66a5975a Mon Sep 17 00:00:00 2001 From: Andre Walker Date: Sat, 28 Jan 2017 16:19:38 +0100 Subject: [PATCH 115/195] [npo] Update subtitles url NPO websites changed the domain they used for subtitles, from e.omroep.nl to tt888.omroep.nl. --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index c91f58461..962437145 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -241,7 +241,7 @@ class NPOIE(NPOBaseIE): if metadata.get('tt888') == 'ja': subtitles['nl'] = [{ 'ext': 'vtt', - 'url': 'http://e.omroep.nl/tt888/%s' % video_id, + 'url': 'http://tt888.omroep.nl/tt888/%s' % video_id, }] return { From 76aaf1faaed613569cb71e4f9aa7bd218f27c54b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 03:43:46 +0700 Subject: [PATCH 116/195] Credit @BagiraHun for videa (#11133) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 022a5de84..3ef2800c9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -199,3 +199,4 @@ Vijay Singh Paul Hartmann Stephen Chen Fabian Stahl +Bagira From d04621daf451d601dba80dc0f2baa29e404e4ca6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 05:36:53 +0700 Subject: [PATCH 117/195] [extractor/common] Fix duration per dash segment (closes #11868) --- youtube_dl/extractor/common.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index dce8c7d0d..a3048fb59 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1751,14 +1751,16 @@ class InfoExtractor(object): # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI # or any YouTube dashsegments video fragments = [] - s_num = 0 - for segment_url in representation_ms_info['segment_urls']: - s = representation_ms_info['s'][s_num] + segment_index = 0 + timescale = representation_ms_info['timescale'] + for s in representation_ms_info['s']: + duration = float_or_none(s['d'], timescale) for r in range(s.get('r', 0) + 1): fragments.append({ - 'url': segment_url, - 'duration': float_or_none(s['d'], representation_ms_info['timescale']), + 'url': representation_ms_info['segment_urls'][segment_index], + 'duration': duration, }) + segment_index += 1 representation_ms_info['fragments'] = fragments # NB: MPD manifest may contain direct URLs to unfragmented media. # No fragments key is present in this case. From c58c2d63cbde07af66885829b7c3dbcdfbc096dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 05:56:43 +0700 Subject: [PATCH 118/195] [extractor/common] Document forgotten fragment base and path interfaces --- youtube_dl/extractor/common.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a3048fb59..fb484b6f2 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -121,9 +121,19 @@ class InfoExtractor(object): download, lower-case. "http", "https", "rtsp", "rtmp", "rtmpe", "m3u8", "m3u8_native" or "http_dash_segments". - * fragments A list of fragments of the fragmented media, - with the following entries: - * "url" (mandatory) - fragment's URL + * fragment_base_url + Base URL for fragments. Each fragment's path + value (if present) will be relative to + this URL. + * fragments A list of fragments of a fragmented media. + Each fragment entry must contain either an url + or a path. If an url is present it should be + considered by a client. Otherwise both path and + fragment_base_url must be present. Here is + the list of all potential fields: + * "url" - fragment's URL + * "path" - fragment's path relative to + fragment_base_url * "duration" (optional, int or float) * "filesize" (optional, int) * preference Order number of this format. If this field is From e228616c6e73561f0c6d32d6b681bbba321c06aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 06:57:39 +0700 Subject: [PATCH 119/195] [extractor/common] Fix initialization template (closes #11605, closes #11825) --- youtube_dl/extractor/common.py | 48 ++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index fb484b6f2..5a15a9536 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1637,12 +1637,12 @@ class InfoExtractor(object): segment_template = element.find(_add_ns('SegmentTemplate')) if segment_template is not None: extract_common(segment_template) - media_template = segment_template.get('media') - if media_template: - ms_info['media_template'] = media_template + media = segment_template.get('media') + if media: + ms_info['media'] = media initialization = segment_template.get('initialization') if initialization: - ms_info['initialization_url'] = initialization + ms_info['initialization'] = initialization else: extract_Initialization(segment_template) return ms_info @@ -1686,6 +1686,7 @@ class InfoExtractor(object): lang = representation_attrib.get('lang') url_el = representation.find(_add_ns('BaseURL')) filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None) + bandwidth = int_or_none(representation_attrib.get('bandwidth')) f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, 'url': base_url, @@ -1693,7 +1694,7 @@ class InfoExtractor(object): 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), 'height': int_or_none(representation_attrib.get('height')), - 'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000), + 'tbr': int_or_none(bandwidth, 1000), 'asr': int_or_none(representation_attrib.get('audioSamplingRate')), 'fps': int_or_none(representation_attrib.get('frameRate')), 'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None, @@ -1702,13 +1703,32 @@ class InfoExtractor(object): } f.update(parse_codecs(representation_attrib.get('codecs'))) representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) - if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info: - media_template = representation_ms_info['media_template'] - media_template = media_template.replace('$RepresentationID$', representation_id) - media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template) - media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template) - media_template.replace('$$', '$') + def prepare_template(template_name, identifiers): + t = representation_ms_info[template_name] + t = t.replace('$RepresentationID$', representation_id) + t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t) + t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t) + t.replace('$$', '$') + return t + + # @initialization is a regular template like @media one + # so it should be handled just the same way (see + # https://github.com/rg3/youtube-dl/issues/11605) + if 'initialization' in representation_ms_info: + initialization_template = prepare_template( + 'initialization', + # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and + # $Time$ shall not be included for @initialization thus + # only $Bandwidth$ remains + ('Bandwidth', )) + representation_ms_info['initialization_url'] = initialization_template % { + 'Bandwidth': bandwidth, + } + + if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info: + + media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time')) # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ # can't be used at the same time @@ -1720,7 +1740,7 @@ class InfoExtractor(object): representation_ms_info['fragments'] = [{ 'url': media_template % { 'Number': segment_number, - 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')), + 'Bandwidth': bandwidth, }, 'duration': segment_duration, } for segment_number in range( @@ -1738,7 +1758,7 @@ class InfoExtractor(object): def add_segment_url(): segment_url = media_template % { 'Time': segment_time, - 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')), + 'Bandwidth': bandwidth, 'Number': segment_number, } representation_ms_info['fragments'].append({ @@ -1780,7 +1800,7 @@ class InfoExtractor(object): 'protocol': 'http_dash_segments', }) if 'initialization_url' in representation_ms_info: - initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id) + initialization_url = representation_ms_info['initialization_url'] if not f.get('url'): f['url'] = initialization_url f['fragments'].append({'url': initialization_url}) From f13da8af289d7d9365e34ef705a53ac62aa3b570 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Sat, 28 Jan 2017 17:52:07 +0100 Subject: [PATCH 120/195] [azmedien:playlist] Add support for topic and themen playlists --- youtube_dl/extractor/azmedien.py | 39 +++++++++++++++++++++++++----- youtube_dl/extractor/extractors.py | 2 +- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index a89f71c20..cbc3ed564 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -5,8 +5,9 @@ import re from .common import InfoExtractor from .kaltura import KalturaIE from ..utils import ( - get_element_by_class, + get_element_by_id, strip_or_none, + urljoin, ) @@ -83,8 +84,8 @@ class AZMedienIE(AZMedienBaseIE): return self._kaltura_video(partner_id, entry_id) -class AZMedienShowIE(AZMedienBaseIE): - IE_DESC = 'AZ Medien shows' +class AZMedienPlaylistIE(AZMedienBaseIE): + IE_DESC = 'AZ Medien playlists' _VALID_URL = r'''(?x) https?:// (?:www\.)? @@ -93,7 +94,12 @@ class AZMedienShowIE(AZMedienBaseIE): telebaern\.tv| telem1\.ch )/ - (?P[0-9]+-show-[^/\#]+ + (?P[0-9]+- + (?: + show| + topic| + themen + )-[^/\#]+ (?: /[0-9]+-episode-[^/\#]+ )? @@ -108,6 +114,18 @@ class AZMedienShowIE(AZMedienBaseIE): 'title': 'News - Donnerstag, 15. Dezember 2016', }, 'playlist_count': 9, + }, { + # URL with 'themen' + 'url': 'http://www.telem1.ch/258-themen-tele-m1-classics', + 'info_dict': { + 'id': '258-themen-tele-m1-classics', + 'title': 'Tele M1 Classics', + }, + 'playlist_mincount': 15, + }, { + # URL with 'topic', contains nested playlists + 'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen', + 'only_matching': True, }, { # URL with 'show' only 'url': 'http://www.telezueri.ch/86-show-talktaeglich', @@ -136,10 +154,19 @@ class AZMedienShowIE(AZMedienBaseIE): for m in re.finditer( r']+data-real=(["\'])(?Phttp.+?)\1', webpage)] + if not entries: + entries = [ + # May contain nested playlists (e.g. [1]) thus no explicit + # ie_key + # 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen) + self.url_result(urljoin(url, m.group('url'))) + for m in re.finditer( + r']+name=[^>]+href=(["\'])(?P/.+?)\1', webpage)] + title = self._search_regex( r'episodeShareTitle\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'title', - default=strip_or_none(get_element_by_class( - 'title-block-cell', webpage)), group='title') + default=strip_or_none(get_element_by_id( + 'video-title', webpage)), group='title') return self.playlist_result(entries, show_id, title) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 086a2296d..2590b5e1b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -82,7 +82,7 @@ from .awaan import ( ) from .azmedien import ( AZMedienIE, - AZMedienShowIE, + AZMedienPlaylistIE, ) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE From fe323a4800d67d0ad2fecebcc3b627a7a22be427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 Jan 2017 21:21:26 +0700 Subject: [PATCH 121/195] [ChangeLog] Actualize --- ChangeLog | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ChangeLog b/ChangeLog index 8e5a04b42..ab2818f9e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +version <unreleased> + +Core +* [extractor/common] Fix initialization template (#11605, #11825) ++ [extractor/common] Document fragment_base_url and fragment's path fields +* [extractor/common] Fix duration per DASH segment (#11868) ++ Introduce --autonumber-start option for initial value of %(autonumber)s + template (#727, #2702, #9362, #10457, #10529, #11862) + +Extractors ++ [azmedien:playlist] Add support for topic and themen playlists (#11817) +* [npo] Fix subtitles extraction ++ [itv] Extract subtitles ++ [itv] Add support for itv.com (#9240) ++ [mtv81] Add support for mtv81.com (#7619) ++ [vlive] Add support for channels (#11826) ++ [kaltura] Add fallback for fileExt ++ [kaltura] Improve uploader_id extraction ++ [konserthusetplay] Add support for rspoplay.se (#11828) + + version 2017.01.28 Core From 4d2fdb07c47e2d9f96d58f5fbf3da8665a1144a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 Jan 2017 13:21:42 +0700 Subject: [PATCH 122/195] release 2017.01.29 --- .github/ISSUE_TEMPLATE.md | 6 ++--- ChangeLog | 2 +- README.md | 47 +++++++++++++++++++-------------------- docs/supportedsites.md | 5 ++++- youtube_dl/version.py | 2 +- 5 files changed, 32 insertions(+), 30 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 693f3b745..10c982fd0 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.28** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.29*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.29** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.28 +[debug] youtube-dl version 2017.01.29 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index ab2818f9e..cd7017f6d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.01.29 Core * [extractor/common] Fix initialization template (#11605, #11825) diff --git a/README.md b/README.md index 4f677d0cc..2ee00f515 100644 --- a/README.md +++ b/README.md @@ -88,8 +88,6 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo --mark-watched Mark videos watched (YouTube only) --no-mark-watched Do not mark videos watched (YouTube only) --no-color Do not emit color codes in output - --abort-on-unavailable-fragment Abort downloading when some fragment is not - available ## Network Options: --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. @@ -99,16 +97,13 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo string (--proxy "") for direct connection --socket-timeout SECONDS Time to wait before giving up, in seconds --source-address IP Client-side IP address to bind to - (experimental) -4, --force-ipv4 Make all connections via IPv4 - (experimental) -6, --force-ipv6 Make all connections via IPv6 - (experimental) --geo-verification-proxy URL Use this proxy to verify the IP address for some geo-restricted sites. The default proxy specified by --proxy (or none, if the options is not present) is used for the - actual downloading. (experimental) + actual downloading. ## Video Selection: --playlist-start NUMBER Playlist video to start at (default is 1) @@ -139,23 +134,23 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo COUNT views --max-views COUNT Do not download any videos with more than COUNT views - --match-filter FILTER Generic video filter (experimental). - Specify any key (see help for -o for a list - of available keys) to match if the key is - present, !key to check if the key is not - present,key > NUMBER (like "comment_count > - 12", also works with >=, <, <=, !=, =) to - compare against a number, and & to require - multiple matches. Values which are not - known are excluded unless you put a - question mark (?) after the operator.For - example, to only match videos that have - been liked more than 100 times and disliked - less than 50 times (or the dislike - functionality is not available at the given - service), but who also have a description, - use --match-filter "like_count > 100 & - dislike_count <? 50 & description" . + --match-filter FILTER Generic video filter. Specify any key (see + help for -o for a list of available keys) + to match if the key is present, !key to + check if the key is not present,key > + NUMBER (like "comment_count > 12", also + works with >=, <, <=, !=, =) to compare + against a number, and & to require multiple + matches. Values which are not known are + excluded unless you put a question mark (?) + after the operator.For example, to only + match videos that have been liked more than + 100 times and disliked less than 50 times + (or the dislike functionality is not + available at the given service), but who + also have a description, use --match-filter + "like_count > 100 & dislike_count <? 50 & + description" . --no-playlist Download only the video, if the URL refers to a video and a playlist. --yes-playlist Download the playlist, if the URL refers to @@ -178,6 +173,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo only) --skip-unavailable-fragments Skip unavailable fragments (DASH and hlsnative only) + --abort-on-unavailable-fragment Abort downloading when some fragment is not + available --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) (default is 1024) --no-resize-buffer Do not automatically adjust the buffer @@ -210,7 +207,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo --autonumber-size NUMBER Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option - is given + is given (default is 5) + --autonumber-start NUMBER Specify the start value for %(autonumber)s + (default is 1) --restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 6318a862f..d4231577b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -76,7 +76,7 @@ - **awaan:season** - **awaan:video** - **AZMedien**: AZ Medien videos - - **AZMedienShow**: AZ Medien shows + - **AZMedienPlaylist**: AZ Medien playlists - **Azubu** - **AzubuLive** - **BaiduVideo**: 百度视频 @@ -337,6 +337,7 @@ - **IPrima** - **iqiyi**: 爱奇艺 - **Ir90Tv** + - **ITV** - **ivi**: ivi.ru - **ivi:compilation**: ivi.ru compilations - **ivideon**: Ivideon TV @@ -445,6 +446,7 @@ - **mtg**: MTG services - **mtv** - **mtv.de** + - **mtv81** - **mtv:video** - **mtvservices:embedded** - **MuenchenTV**: münchen.tv @@ -887,6 +889,7 @@ - **vk:uservideos**: VK - User's Videos - **vk:wallpost** - **vlive** + - **vlive:channel** - **Vodlocker** - **VODPlatform** - **VoiceRepublic** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c22c410a8..a37a65db9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.28' +__version__ = '2017.01.29' From c2d9c25f818da2e0e622b475ffc714f35df0887c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 29 Jan 2017 16:03:39 +0100 Subject: [PATCH 123/195] [compat] add compat_etree_register_namespace --- youtube_dl/compat.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 02abf8c1e..49e3c90e2 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2529,6 +2529,24 @@ else: el.text = el.text.decode('utf-8') return doc +if hasattr(etree, 'register_namespace'): + compat_etree_register_namespace = etree.register_namespace +else: + def compat_etree_register_namespace(prefix, uri): + """Register a namespace prefix. + The registry is global, and any existing mapping for either the + given prefix or the namespace URI will be removed. + *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and + attributes in this namespace will be serialized with prefix if possible. + ValueError is raised if prefix is reserved or is invalid. + """ + if re.match(r"ns\d+$", prefix): + raise ValueError("Prefix format reserved for internal use") + for k, v in list(etree._namespace_map.items()): + if k == uri or v == prefix: + del etree._namespace_map[k] + etree._namespace_map[uri] = prefix + if sys.version_info < (2, 7): # Here comes the crazy part: In 2.6, if the xpath is a unicode, # .//node does not match if a node is a direct child of . ! From 4719419951ced20e42cddb26b437908ba636debb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 29 Jan 2017 16:04:15 +0100 Subject: [PATCH 124/195] [itv] fix extraction in python 2.6 --- youtube_dl/extractor/itv.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index 0328c7093..b0d860452 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -6,7 +6,10 @@ import xml.etree.ElementTree as etree import json from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_etree_register_namespace, +) from ..utils import ( extract_attributes, xpath_with_ns, @@ -47,7 +50,7 @@ class ITVIE(InfoExtractor): 'com': 'http://schemas.itv.com/2009/05/Common', } for ns, full_ns in ns_map.items(): - etree.register_namespace(ns, full_ns) + compat_etree_register_namespace(ns, full_ns) def _add_ns(name): return xpath_with_ns(name, ns_map) From dadb836139f070da9364439bf3b148eec8bc0b11 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 30 Jan 2017 09:32:31 +0100 Subject: [PATCH 125/195] [ruutu] extract dash formats --- youtube_dl/extractor/ruutu.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index f12bc5614..20d01754a 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -81,6 +81,9 @@ class RuutuIE(InfoExtractor): elif ext == 'f4m': formats.extend(self._extract_f4m_formats( video_url, video_id, f4m_id='hds', fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + video_url, video_id, mpd_id='dash', fatal=False)) else: proto = compat_urllib_parse_urlparse(video_url).scheme if not child.tag.startswith('HTTP') and proto != 'rtmp': From 75822ca7909d7f7e15694f73b05b2bf0f1fa61f3 Mon Sep 17 00:00:00 2001 From: Thomas Christlieb <thomaschristlieb@hotmail.com> Date: Tue, 31 Jan 2017 10:03:31 +0100 Subject: [PATCH 126/195] New parameter --playlist-random to randomize playlist download order. Fixes #11889 --- youtube_dl/YoutubeDL.py | 5 +++++ youtube_dl/__init__.py | 1 + youtube_dl/options.py | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c71e94518..a7bf5a1b0 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -24,6 +24,7 @@ import sys import time import tokenize import traceback +import random from .compat import ( compat_basestring, @@ -159,6 +160,7 @@ class YoutubeDL(object): playlistend: Playlist item to end at. playlist_items: Specific indices of playlist to download. playlistreverse: Download playlist items in reverse order. + playlistrandom: Download playlist items in random order. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. logger: Log messages to a logging.Logger instance. @@ -842,6 +844,9 @@ class YoutubeDL(object): if self.params.get('playlistreverse', False): entries = entries[::-1] + if self.params.get('playlistrandom', False): + random.shuffle(entries) + for i, entry in enumerate(entries, 1): self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) extra = { diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 2b156342a..5c5b8094b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -344,6 +344,7 @@ def _real_main(argv=None): 'playliststart': opts.playliststart, 'playlistend': opts.playlistend, 'playlistreverse': opts.playlist_reverse, + 'playlistrandom': opts.playlist_random, 'noplaylist': opts.noplaylist, 'logtostderr': opts.outtmpl == '-', 'consoletitle': opts.consoletitle, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 3abf621c0..349f44778 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -470,6 +470,10 @@ def parseOpts(overrideArguments=None): '--playlist-reverse', action='store_true', help='Download playlist videos in reverse order') + downloader.add_option( + '--playlist-random', + action='store_true', + help='Download playlist videos in random order') downloader.add_option( '--xattr-set-filesize', dest='xattr_set_filesize', action='store_true', From ae9a173b6421a3fdf70dd50d2dc0386f8861fe71 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 31 Jan 2017 14:47:56 +0100 Subject: [PATCH 127/195] [vimeo] extract both mixed and separated dash formats --- youtube_dl/extractor/vimeo.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index c12eeadd4..8b6a5cc3c 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -142,10 +142,19 @@ class VimeoBaseInfoExtractor(InfoExtractor): note='Downloading %s m3u8 information' % cdn_name, fatal=False)) elif files_type == 'dash': - formats.extend(self._extract_mpd_formats( - manifest_url.replace('/master.json', '/master.mpd'), video_id, format_id, - 'Downloading %s MPD information' % cdn_name, - fatal=False)) + mpd_pattern = r'/%s/(?:sep/)?video/' % video_id + mpd_manifest_urls = [] + if re.search(mpd_pattern, manifest_url): + for suffix, repl in (('', 'video'), ('_sep', 'sep/video')): + mpd_manifest_urls.append((format_id + suffix, re.sub( + mpd_pattern, '/%s/%s/' % (video_id, repl), manifest_url))) + else: + mpd_manifest_urls = [(format_id, manifest_url)] + for f_id, m_url in mpd_manifest_urls: + formats.extend(self._extract_mpd_formats( + m_url.replace('/master.json', '/master.mpd'), video_id, f_id, + 'Downloading %s MPD information' % cdn_name, + fatal=False)) subtitles = {} text_tracks = config['request'].get('text_tracks') From 3c90cc8b6fc069930264b41f5505dc34c1077442 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 31 Jan 2017 22:19:29 +0700 Subject: [PATCH 128/195] [youtube] Fix extraction for domainless player URLs Closes #11890 Closes #11891 Closes #11892 Closes #11894 Closes #11895 Closes #11897 Closes #11900 Closes #11903 Closes #11904 Closes #11906 Closes #11907 Closes #11909 Closes #11913 Closes #11914 Closes #11915 Closes #11916 Closes #11917 Closes #11918 Closes #11919 --- youtube_dl/extractor/youtube.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 630586796..ea398bcc8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1028,8 +1028,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( - r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode, - 'Initial JS player signature function name') + (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('), + jscode, 'Initial JS player signature function name', group='sig') jsi = JSInterpreter(jscode) initial_function = jsi.extract_function(funcname) @@ -1050,6 +1051,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if player_url.startswith('//'): player_url = 'https:' + player_url + elif not re.match(r'https?://', player_url): + player_url = compat_urlparse.urljoin( + 'https://www.youtube.com', player_url) try: player_id = (player_url, self._signature_cache_id(s)) if player_id not in self._player_cache: From 3a528ffd8944417c99b139da18d0dff907ade517 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 31 Jan 2017 22:21:54 +0700 Subject: [PATCH 129/195] [ChangeLog] Actualize --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index cd7017f6d..e331acacc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +version <unreleased> + +Core ++ [compat] Add compat_etree_register_namespace + +Extractors +* [youtube] Fix extraction for domainless player URLs (#11890, #11891, #11892, + #11894, #11895, #11897, #11900, #11903, #11904, #11906, #11907, #11909, + #11913, #11914, #11915, #11916, #11917, #11918, #11919) ++ [vimeo] Extract both mixed and separated DASH formats ++ [ruutu] Extract DASH formats +* [itv] Fix extraction for python 2.6 + + version 2017.01.29 Core From d7e215b42dcaf71298a7e1dc953cf93523b3da81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 31 Jan 2017 22:24:45 +0700 Subject: [PATCH 130/195] release 2017.01.31 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 10c982fd0..180013f72 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.29*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.29** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.31** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.29 +[debug] youtube-dl version 2017.01.31 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e331acacc..d5ab0e0a7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.01.31 Core + [compat] Add compat_etree_register_namespace diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a37a65db9..fee0ac7c5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.29' +__version__ = '2017.01.31' From 8fd65faece98139def3a6538e98053bebd400263 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Odd=20Str=C3=A5b=C3=B8?= <oddstr13@openshell.no> Date: Sat, 14 Jan 2017 02:36:04 +0100 Subject: [PATCH 131/195] [NRKTV] Added NRKTVSeriesIE [NRKTV] Added season and episode number to metadata. [NRKTV] Added category to metadata. [NRKTV] Added tests to NRKTVSeries. [NRKTV] Fixed whitespace issues (flake8). --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nrk.py | 49 ++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2590b5e1b..06e6d4620 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -668,6 +668,7 @@ from .nrk import ( NRKTVIE, NRKTVDirekteIE, NRKTVEpisodesIE, + NRKTVSeriesIE, ) from .ntvde import NTVDeIE from .ntvru import NTVRuIE diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index ea7be005a..26604f84f 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -128,6 +128,18 @@ class NRKBaseIE(InfoExtractor): series = conviva.get('seriesName') or data.get('seriesTitle') episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') + season_number = None + episode_number = None + if data.get('mediaElementType') == 'Episode': + _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \ + data.get('relativeOriginUrl', '') + EPISODENUM_RE = [ + r'/s(?P<season>\d+)e(?P<episode>\d+)\.', + r'/sesong-(?P<season>\d+)/episode-(?P<episode>\d+)', + ] + season_number = int_or_none(self._search_regex(EPISODENUM_RE, _season_episode, "S##E##", fatal=False, group='season')) + episode_number = int_or_none(self._search_regex(EPISODENUM_RE, _season_episode, "S##E##", fatal=False, group='episode')) + thumbnails = None images = data.get('images') if images and isinstance(images, dict): @@ -140,11 +152,15 @@ class NRKBaseIE(InfoExtractor): } for image in web_images if image.get('imageUrl')] description = data.get('description') + category = data.get('mediaAnalytics', {}).get('category') common_info = { 'description': description, 'series': series, 'episode': episode, + 'season_number': season_number, + 'episode_number': episode_number, + 'categories': [category] if category else None, 'age_limit': parse_age_limit(data.get('legalAge')), 'thumbnails': thumbnails, } @@ -360,6 +376,39 @@ class NRKTVEpisodesIE(NRKPlaylistBaseIE): r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False) +class NRKTVSeriesIE(InfoExtractor): + _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+)/?' + _ITEM_RE = r'data-season=["\'](?P<id>\d+)["\']' + _TESTS = [{ + 'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene', + 'playlist_count': 1, + }, { + 'url': 'https://tv.nrk.no/serie/saving-the-human-race', + 'playlist_count': 1, + }, { + 'url': 'https://tv.nrk.no/serie/postmann-pat', + 'playlist_count': 3, + }, { + 'url': 'https://tv.nrk.no/serie/groenn-glede', + 'playlist_count': 9, + }] + + def _real_extract(self, url): + series_id = self._match_id(url) + + webpage = self._download_webpage(url, series_id) + + entries = [ + self.url_result('https://tv.nrk.no/program/Episodes/{series}/{season}'.format( + series=series_id, + season=season_id + )) + for season_id in re.findall(self._ITEM_RE, webpage) + ] + + return self.playlist_result(entries) + + class NRKSkoleIE(InfoExtractor): IE_DESC = 'NRK Skole' _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)' From 7c5329e6f4152b48c5476b1b9b8ab931caa10331 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 00:29:29 +0700 Subject: [PATCH 132/195] [nrk] Improve extraction and update tests (closes #11571) --- youtube_dl/extractor/nrk.py | 145 +++++++++++++++++++++++++++--------- 1 file changed, 111 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 26604f84f..fc3c0cd3c 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -134,11 +134,15 @@ class NRKBaseIE(InfoExtractor): _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \ data.get('relativeOriginUrl', '') EPISODENUM_RE = [ - r'/s(?P<season>\d+)e(?P<episode>\d+)\.', - r'/sesong-(?P<season>\d+)/episode-(?P<episode>\d+)', + r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.', + r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})', ] - season_number = int_or_none(self._search_regex(EPISODENUM_RE, _season_episode, "S##E##", fatal=False, group='season')) - episode_number = int_or_none(self._search_regex(EPISODENUM_RE, _season_episode, "S##E##", fatal=False, group='episode')) + season_number = int_or_none(self._search_regex( + EPISODENUM_RE, _season_episode, 'season number', + default=None, group='season')) + episode_number = int_or_none(self._search_regex( + EPISODENUM_RE, _season_episode, 'episode number', + default=None, group='episode')) thumbnails = None images = data.get('images') @@ -243,54 +247,102 @@ class NRKTVIE(NRKBaseIE): 'title': '20 spørsmål 23.05.2014', 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', 'duration': 1741, + 'series': '20 spørsmål - TV', + 'episode': '23.05.2014', }, }, { 'url': 'https://tv.nrk.no/program/mdfp15000514', - 'md5': '43d0be26663d380603a9cf0c24366531', 'info_dict': { 'id': 'MDFP15000514CA', 'ext': 'mp4', 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014', 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db', 'duration': 4605, + 'series': 'Kunnskapskanalen', + 'episode': '24.05.2014', + }, + 'params': { + 'skip_download': True, }, }, { # single playlist video 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', - 'md5': 'adbd1dbd813edaf532b0a253780719c2', 'info_dict': { 'id': 'MSPO40010515-part2', 'ext': 'flv', 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', }, - 'skip': 'Only works from Norway', + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['Video is geo restricted'], + 'skip': 'particular part is not supported currently', }, { 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', 'playlist': [{ - 'md5': '9480285eff92d64f06e02a5367970a7a', 'info_dict': { - 'id': 'MSPO40010515-part1', - 'ext': 'flv', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'id': 'MSPO40010515AH', + 'ext': 'mp4', + 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)', + 'description': 'md5:c03aba1e917561eface5214020551b7a', + 'duration': 772, + 'series': 'Tour de Ski', + 'episode': '06.01.2015', + }, + 'params': { + 'skip_download': True, }, }, { - 'md5': 'adbd1dbd813edaf532b0a253780719c2', 'info_dict': { - 'id': 'MSPO40010515-part2', - 'ext': 'flv', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'id': 'MSPO40010515BH', + 'ext': 'mp4', + 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)', + 'description': 'md5:c03aba1e917561eface5214020551b7a', + 'duration': 6175, + 'series': 'Tour de Ski', + 'episode': '06.01.2015', + }, + 'params': { + 'skip_download': True, }, }], 'info_dict': { 'id': 'MSPO40010515', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'duration': 6947.52, + 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015', + 'description': 'md5:c03aba1e917561eface5214020551b7a', + }, + 'expected_warnings': ['Video is geo restricted'], + }, { + 'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13', + 'info_dict': { + 'id': 'KMTE50001317AA', + 'ext': 'mp4', + 'title': 'Anno 13:30', + 'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa', + 'duration': 2340, + 'series': 'Anno', + 'episode': '13:30', + 'season_number': 3, + 'episode_number': 13, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017', + 'info_dict': { + 'id': 'MUHH46000317AA', + 'ext': 'mp4', + 'title': 'Nytt på Nytt 27.01.2017', + 'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b', + 'duration': 1796, + 'series': 'Nytt på nytt', + 'episode': '27.01.2017', + }, + 'params': { + 'skip_download': True, }, - 'skip': 'Only works from Norway', }, { 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#', 'only_matching': True, @@ -377,36 +429,61 @@ class NRKTVEpisodesIE(NRKPlaylistBaseIE): class NRKTVSeriesIE(InfoExtractor): - _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+)/?' - _ITEM_RE = r'data-season=["\'](?P<id>\d+)["\']' + _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)' + _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)' _TESTS = [{ + 'url': 'https://tv.nrk.no/serie/groenn-glede', + 'info_dict': { + 'id': 'groenn-glede', + 'title': 'Grønn glede', + 'description': 'md5:7576e92ae7f65da6993cf90ee29e4608', + }, + 'playlist_mincount': 9, + }, { + 'url': 'http://tv.nrksuper.no/serie/labyrint', + 'info_dict': { + 'id': 'labyrint', + 'title': 'Labyrint', + 'description': 'md5:58afd450974c89e27d5a19212eee7115', + }, + 'playlist_mincount': 3, + }, { 'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene', - 'playlist_count': 1, + 'only_matching': True, }, { 'url': 'https://tv.nrk.no/serie/saving-the-human-race', - 'playlist_count': 1, + 'only_matching': True, }, { 'url': 'https://tv.nrk.no/serie/postmann-pat', - 'playlist_count': 3, - }, { - 'url': 'https://tv.nrk.no/serie/groenn-glede', - 'playlist_count': 9, + 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return False if NRKTVIE.suitable(url) else super(NRKTVSeriesIE, cls).suitable(url) + def _real_extract(self, url): series_id = self._match_id(url) webpage = self._download_webpage(url, series_id) entries = [ - self.url_result('https://tv.nrk.no/program/Episodes/{series}/{season}'.format( - series=series_id, - season=season_id - )) + self.url_result( + 'https://tv.nrk.no/program/Episodes/{series}/{season}'.format( + series=series_id, season=season_id)) for season_id in re.findall(self._ITEM_RE, webpage) ] - return self.playlist_result(entries) + title = self._html_search_meta( + 'seriestitle', webpage, + 'title', default=None) or self._og_search_title( + webpage, fatal=False) + + description = self._html_search_meta( + 'series_description', webpage, + 'description', default=None) or self._og_search_description(webpage) + + return self.playlist_result(entries, series_id, title, description) class NRKSkoleIE(InfoExtractor): From 363245ad94dfdf0c34b4c2c801e7cf6cea74f39c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 00:30:19 +0700 Subject: [PATCH 133/195] Credit @oddstr13 for nrk:series (#11571) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 3ef2800c9..f2875d504 100644 --- a/AUTHORS +++ b/AUTHORS @@ -200,3 +200,4 @@ Paul Hartmann Stephen Chen Fabian Stahl Bagira +Odd Stråbø From c38a67bcd5df639b9d7e7faa8685e76446803527 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 00:49:28 +0700 Subject: [PATCH 134/195] [vimeo] Extract license (closes #11880) --- youtube_dl/extractor/vimeo.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 8b6a5cc3c..32179e915 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -218,6 +218,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'user7108434', 'uploader': 'Filippo Valsorda', 'duration': 10, + 'license': 'by-sa', }, }, { @@ -486,6 +487,8 @@ class VimeoIE(VimeoBaseInfoExtractor): '%s said: %s' % (self.IE_NAME, seed_status['title']), expected=True) + cc_license = None + # Extract the config JSON try: try: @@ -499,8 +502,9 @@ class VimeoIE(VimeoBaseInfoExtractor): vimeo_clip_page_config = self._search_regex( r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage, 'vimeo clip page config') - config_url = self._parse_json( - vimeo_clip_page_config, video_id)['player']['config_url'] + page_config = self._parse_json(vimeo_clip_page_config, video_id) + config_url = page_config['player']['config_url'] + cc_license = page_config.get('cc_license') config_json = self._download_webpage(config_url, video_id) config = json.loads(config_json) except RegexNotFoundError: @@ -609,6 +613,12 @@ class VimeoIE(VimeoBaseInfoExtractor): info_dict = self._parse_config(config, video_id) formats.extend(info_dict['formats']) self._vimeo_sort_formats(formats) + + if not cc_license: + cc_license = self._search_regex( + r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1', + webpage, 'license', default=None, group='license') + info_dict.update({ 'id': video_id, 'formats': formats, @@ -618,6 +628,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'view_count': view_count, 'like_count': like_count, 'comment_count': comment_count, + 'license': cc_license, }) return info_dict From c15cd296404e164b72fd7f2666d5875f35057d93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 00:58:02 +0700 Subject: [PATCH 135/195] [vimeo] Extract upload timestamp --- youtube_dl/extractor/vimeo.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 32179e915..8ba222224 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -21,7 +21,9 @@ from ..utils import ( sanitized_Request, smuggle_url, std_headers, + try_get, unified_strdate, + unified_timestamp, unsmuggle_url, urlencode_postdata, unescapeHTML, @@ -213,6 +215,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'ext': 'mp4', 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 'description': 'md5:2d3305bad981a06ff79f027f19865021', + 'timestamp': 1355990239, 'upload_date': '20121220', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434', 'uploader_id': 'user7108434', @@ -259,6 +262,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'id': '68375962', 'ext': 'mp4', 'title': 'youtube-dl password protected test video', + 'timestamp': 1371200155, 'upload_date': '20130614', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128', 'uploader_id': 'user18948128', @@ -281,7 +285,8 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio', 'uploader_id': 'atencio', 'uploader': 'Peter Atencio', - 'upload_date': '20130927', + 'timestamp': 1380339469, + 'upload_date': '20130928', 'duration': 187, }, }, @@ -293,6 +298,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'ext': 'mp4', 'title': 'The New Vimeo Player (You Know, For Videos)', 'description': 'md5:2ec900bf97c3f389378a96aee11260ea', + 'timestamp': 1381846109, 'upload_date': '20131015', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff', 'uploader_id': 'staff', @@ -324,6 +330,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader': 'The DMCI', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci', 'uploader_id': 'dmci', + 'timestamp': 1324343742, 'upload_date': '20111220', 'description': 'md5:ae23671e82d05415868f7ad1aec21147', }, @@ -339,6 +346,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader': 'Casey Donahue', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue', 'uploader_id': 'caseydonahue', + 'timestamp': 1250886430, 'upload_date': '20090821', 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', }, @@ -488,6 +496,7 @@ class VimeoIE(VimeoBaseInfoExtractor): expected=True) cc_license = None + timestamp = None # Extract the config JSON try: @@ -505,6 +514,9 @@ class VimeoIE(VimeoBaseInfoExtractor): page_config = self._parse_json(vimeo_clip_page_config, video_id) config_url = page_config['player']['config_url'] cc_license = page_config.get('cc_license') + timestamp = try_get( + page_config, lambda x: x['clip']['uploaded_on'], + compat_str) config_json = self._download_webpage(config_url, video_id) config = json.loads(config_json) except RegexNotFoundError: @@ -573,10 +585,10 @@ class VimeoIE(VimeoBaseInfoExtractor): self._downloader.report_warning('Cannot find video description') # Extract upload date - video_upload_date = None - mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage) - if mobj is not None: - video_upload_date = unified_strdate(mobj.group(1)) + if not timestamp: + timestamp = self._search_regex( + r'<time[^>]+datetime="([^"]+)"', webpage, + 'timestamp', default=None) try: view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count')) @@ -622,7 +634,7 @@ class VimeoIE(VimeoBaseInfoExtractor): info_dict.update({ 'id': video_id, 'formats': formats, - 'upload_date': video_upload_date, + 'timestamp': unified_timestamp(timestamp), 'description': video_description, 'webpage_url': url, 'view_count': view_count, From 26c0f09935d51cc8837230ad48db08acd3744dd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 02:15:52 +0700 Subject: [PATCH 136/195] [vimeo] PEP 8 --- youtube_dl/extractor/vimeo.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 8ba222224..61cc469bf 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -22,13 +22,11 @@ from ..utils import ( smuggle_url, std_headers, try_get, - unified_strdate, unified_timestamp, unsmuggle_url, urlencode_postdata, unescapeHTML, parse_filesize, - try_get, ) From 2b2d5d319b563a12e26c55966a047fa5bb039cd0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Feb 2017 16:39:32 +0800 Subject: [PATCH 137/195] [crunchyroll] Remove ScaledBorderAndShadow settings See https://github.com/rg3/youtube-dl/pull/9028, especially @lachs0r's comments for the reason behind this change --- youtube_dl/extractor/crunchyroll.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index f811c7f33..109d1c5a8 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -255,8 +255,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style'] output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x'] output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y'] - output += """ScaledBorderAndShadow: no - + output += """ [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding """ From 7882f1115e8eca2d2c958e2dbb6be45450e4027c Mon Sep 17 00:00:00 2001 From: Thomas Christlieb <thomaschristlieb@hotmail.com> Date: Wed, 1 Feb 2017 16:00:41 +0100 Subject: [PATCH 138/195] Added new Regex for prosiebensat1 Extractor Description. Fixes #11810 (#11929) * Added new Regex for prosiebensat1 Extractor Description. Fixes #11810 * Using _og_search_description() as a Fallback for Description-Regex * Using _og_search_description() as a Fallback for Description-Regex - Second try * Also added fallback regex * Using _og_search_description() as a Fallback for Description-Regex - Third try * removed fatal=False from search for description regex. default=None should be preferred only * Using fatal=false for _og_search_description * Revert "Using fatal=false for _og_search_description" This reverts commit 2b7e123f9d0f2bd6ada54fa8e4e6035fece5dbf4. * Deleted default=None Parameter for _og_search_property --- youtube_dl/extractor/prosiebensat1.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 03e1b1f7f..6856bacaf 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -375,7 +375,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title') info = self._extract_video_info(url, clip_id) description = self._html_search_regex( - self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) + self._DESCRIPTION_REGEXES, webpage, 'description', default=None) + if description is None: + description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._html_search_regex( self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) From fe5aa197b58be1bbf88a152be0e84f24f1711bd7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Feb 2017 23:13:45 +0800 Subject: [PATCH 139/195] [prosiebensat1] PEP8 and update _TESTS --- youtube_dl/extractor/prosiebensat1.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 6856bacaf..5091d8456 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -147,16 +147,12 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', 'info_dict': { 'id': '2104602', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Episode 18 - Staffel 2', 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', 'upload_date': '20131231', 'duration': 5845.04, }, - 'params': { - # rtmp download - 'skip_download': True, - }, }, { 'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html', @@ -258,7 +254,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', 'info_dict': { 'id': '2572814', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Andreas Kümmert: Rocket Man', 'description': 'md5:6ddb02b0781c6adf778afea606652e38', 'upload_date': '20131017', @@ -272,7 +268,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html', 'info_dict': { 'id': '2156342', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Kurztrips zum Valentinstag', 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.', 'duration': 307.24, @@ -289,12 +285,13 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'description': 'md5:63b8963e71f481782aeea877658dec84', }, 'playlist_count': 2, + 'skip': 'This video is unavailable', }, { 'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge', 'info_dict': { 'id': '4187506', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Best of Circus HalliGalli', 'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9', 'upload_date': '20151229', @@ -376,7 +373,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): info = self._extract_video_info(url, clip_id) description = self._html_search_regex( self._DESCRIPTION_REGEXES, webpage, 'description', default=None) - if description is None: + if description is None: description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._html_search_regex( From 000f207944e277e63dbec5a60007c30e3187d3fd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Feb 2017 23:16:35 +0800 Subject: [PATCH 140/195] [prosiebensat1] Update ChangeLog --- ChangeLog | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ChangeLog b/ChangeLog index d5ab0e0a7..da5b75b47 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version <unreleased> + +Extractors +* [prosiebensat1] Fix extraction of descriptions (#11810, #11929) + version 2017.01.31 Core From b83ef507b457e6ea8c52265ea42b6c5d2c500a7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 23:15:38 +0700 Subject: [PATCH 141/195] [facebook] Fix extraction (closes #11926) --- youtube_dl/extractor/facebook.py | 36 ++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index c0a7fc7d8..47bcc0dbc 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -12,14 +12,16 @@ from ..compat import ( compat_urllib_parse_unquote_plus, ) from ..utils import ( + clean_html, error_to_compat_str, ExtractorError, + get_element_by_id, int_or_none, + js_to_json, limit_length, sanitized_Request, + try_get, urlencode_postdata, - get_element_by_id, - clean_html, ) @@ -243,14 +245,30 @@ class FacebookIE(InfoExtractor): video_data = None + def extract_video_data(instances): + for item in instances: + if item[1][0] == 'VideoConfig': + video_item = item[2][0] + if video_item.get('video_id') == video_id: + return video_item['videoData'] + server_js_data = self._parse_json(self._search_regex( - r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id) - for item in server_js_data.get('instances', []): - if item[1][0] == 'VideoConfig': - video_item = item[2][0] - if video_item.get('video_id') == video_id: - video_data = video_item['videoData'] - break + r'handleServerJS\(({.+})(?:\);|,")', webpage, + 'server js data', default='{}'), video_id, fatal=False) + + if server_js_data: + video_data = extract_video_data(server_js_data.get('instances', [])) + + if not video_data: + server_js_data = self._parse_json( + self._search_regex( + r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet', + webpage, 'js data', default='{}'), + video_id, transform_source=js_to_json, fatal=False) + if server_js_data: + video_data = extract_video_data(try_get( + server_js_data, lambda x: x['jsmods']['instances'], + list) or []) if not video_data: if not fatal_if_no_video: From b996b8809285c2c8526dfe96f5ea9835ea799fe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 23:29:59 +0700 Subject: [PATCH 142/195] [ChangeLog] Actualize --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index da5b75b47..d24169af8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,13 @@ version <unreleased> Extractors ++ [facebook] Add another fallback extraction scenario (#11926) * [prosiebensat1] Fix extraction of descriptions (#11810, #11929) +- [crunchyroll] Remove ScaledBorderAndShadow settings (#9028) ++ [vimeo] Extract upload timestamp ++ [vimeo] Extract license (#8726, #11880) ++ [nrk:series] Add support for series (#11571, #11711) + version 2017.01.31 From 50695949937bf399b611ef7957f44aac9fbee9dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 03:20:09 +0700 Subject: [PATCH 143/195] release 2017.02.01 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 180013f72..8914569b6 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.31** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.31 +[debug] youtube-dl version 2017.02.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index d24169af8..c1e8f643a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.01 Extractors + [facebook] Add another fallback extraction scenario (#11926) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d4231577b..d900f5e12 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -528,6 +528,7 @@ - **NRKTV**: NRK TV and NRK Radio - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte - **NRKTVEpisodes** + - **NRKTVSeries** - **ntv.ru** - **Nuvid** - **NYTimes** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fee0ac7c5..0f9b6b703 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.31' +__version__ = '2017.02.01' From da162c1135febbb653a302b598dba2d24ac4e24e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 1 Feb 2017 20:15:25 +0100 Subject: [PATCH 144/195] [compat] add compat_etree_register_namespace to __all__ list --- youtube_dl/compat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 49e3c90e2..718902019 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2883,6 +2883,7 @@ __all__ = [ 'compat_cookiejar', 'compat_cookies', 'compat_etree_fromstring', + 'compat_etree_register_namespace', 'compat_expanduser', 'compat_get_terminal_size', 'compat_getenv', From 020c5df52d61af0630be8c982282e110a83fc8df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 1 Feb 2017 23:48:34 +0100 Subject: [PATCH 145/195] [elpais] Fix extraction for some URLs (closes #11765) --- ChangeLog | 1 + youtube_dl/extractor/elpais.py | 23 ++++++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index c1e8f643a..8e3a04d7d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,6 +7,7 @@ Extractors + [vimeo] Extract upload timestamp + [vimeo] Extract license (#8726, #11880) + [nrk:series] Add support for series (#11571, #11711) ++ [elpais] Fix extraction for some URLs (#11765) version 2017.01.31 diff --git a/youtube_dl/extractor/elpais.py b/youtube_dl/extractor/elpais.py index 8c725a4e6..99e00cf3c 100644 --- a/youtube_dl/extractor/elpais.py +++ b/youtube_dl/extractor/elpais.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import strip_jsonp, unified_strdate class ElPaisIE(InfoExtractor): @@ -29,6 +29,16 @@ class ElPaisIE(InfoExtractor): 'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.', 'upload_date': '20160303', } + }, { + 'url': 'http://elpais.com/elpais/2017/01/26/ciencia/1485456786_417876.html', + 'md5': '9c79923a118a067e1a45789e1e0b0f9c', + 'info_dict': { + 'id': '1485456786_417876', + 'ext': 'mp4', + 'title': 'Hallado un barco de la antigua Roma que naufragó en Baleares hace 1.800 años', + 'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas', + 'upload_date': '20170127', + }, }] def _real_extract(self, url): @@ -37,8 +47,15 @@ class ElPaisIE(InfoExtractor): prefix = self._html_search_regex( r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix') - video_suffix = self._search_regex( - r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL') + id_multimedia = self._search_regex( + r"id_multimedia\s*=\s*'([^']+)'", webpage, 'ID multimedia', default=None) + if id_multimedia: + url_info = self._download_json( + 'http://elpais.com/vdpep/1/?pepid=' + id_multimedia, video_id, transform_source=strip_jsonp) + video_suffix = url_info['mp4'] + else: + video_suffix = self._search_regex( + r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL') video_url = prefix + video_suffix thumbnail_suffix = self._search_regex( r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", From 8bdc149441a86e01c56946090087c005a525260e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 2 Feb 2017 08:05:16 +0100 Subject: [PATCH 146/195] [downloader/external:ffmpeg] minimize the use of aac_adtstoasc filter --- youtube_dl/downloader/external.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 5d3e5d8d3..138f353ef 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -17,6 +17,7 @@ from ..utils import ( encodeArgument, handle_youtubedl_headers, check_executable, + is_outdated_version, ) @@ -264,7 +265,9 @@ class FFmpegFD(ExternalFD): if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': args += ['-f', 'mpegts'] else: - args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] + args += ['-f', 'mp4'] + if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): + args += ['-bsf:a', 'aac_adtstoasc'] elif protocol == 'rtmp': args += ['-f', 'flv'] else: From 81aeafeb44a16b341e47c3bb85d288252a095eda Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 2 Feb 2017 08:07:06 +0100 Subject: [PATCH 147/195] [cbc:watch] extract audio codec for audion only formats(fixes #11893) --- youtube_dl/extractor/cbc.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index a291685bf..cf678e7f8 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -296,6 +296,12 @@ class CBCWatchVideoIE(CBCWatchBaseIE): formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False) if len(formats) < 2: formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + for f in formats: + format_id = f.get('format_id') + if format_id.startswith('AAC'): + f['acodec'] = 'aac' + elif format_id.startswith('AC3'): + f['acodec'] = 'ac-3' self._sort_formats(formats) info = { From bd8f48c78b952ebe3bf335185c819e265f63cb50 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 2 Feb 2017 21:51:31 +0800 Subject: [PATCH 148/195] [bilibili] Support new Bangumi URLs (closes #11845) To reduce complexity, I don't support old Bangumi URLs directly via _VALID_URL. Instead, I choose to let it go to generic redirection. An example can be found in #10190: http://bangumi.bilibili.com/anime/v/40062 --- ChangeLog | 5 ++ youtube_dl/extractor/bilibili.py | 135 ++++++++++++++++++++++++++--- youtube_dl/extractor/extractors.py | 5 +- 3 files changed, 134 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8e3a04d7d..c27907f51 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version <unreleased> + +Extractors ++ [bilibili] Support new Bangumi URLs (#11845) + version 2017.02.01 Extractors diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 85ea5e6ee..80dd8382e 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -5,19 +5,27 @@ import hashlib import re from .common import InfoExtractor -from ..compat import compat_parse_qs +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) from ..utils import ( + ExtractorError, int_or_none, float_or_none, + parse_iso8601, + smuggle_url, + strip_jsonp, unified_timestamp, + unsmuggle_url, urlencode_postdata, ) class BiliBiliIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', 'info_dict': { @@ -32,25 +40,61 @@ class BiliBiliIE(InfoExtractor): 'uploader': '菊子桑', 'uploader_id': '156160', }, - } + }, { + # Tested in BiliBiliBangumiIE + 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062', + 'only_matching': True, + }, { + 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643', + 'md5': '3f721ad1e75030cc06faf73587cfec57', + 'info_dict': { + 'id': '100643', + 'ext': 'mp4', + 'title': 'CHAOS;CHILD', + 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', + }, + 'skip': 'Geo-restricted to China', + }] _APP_KEY = '84956560bc028eb7' _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' + def _report_error(self, result): + if 'message' in result: + raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True) + elif 'code' in result: + raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True) + else: + raise ExtractorError('Can\'t extract Bangumi episode ID') + def _real_extract(self, url): - video_id = self._match_id(url) + url, smuggled_data = unsmuggle_url(url, {}) + + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + anime_id = mobj.group('anime_id') webpage = self._download_webpage(url, video_id) - if 'anime/v' not in url: + if 'anime/' not in url: cid = compat_parse_qs(self._search_regex( [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], webpage, 'player parameters'))['cid'][0] else: + if 'no_bangumi_tip' not in smuggled_data: + self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % ( + video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id))) + headers = { + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + } + headers.update(self.geo_verification_headers()) + js = self._download_json( 'http://bangumi.bilibili.com/web_api/get_source', video_id, data=urlencode_postdata({'episode_id': video_id}), - headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'}) + headers=headers) + if 'result' not in js: + self._report_error(js) cid = js['result']['cid'] payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid) @@ -58,7 +102,11 @@ class BiliBiliIE(InfoExtractor): video_info = self._download_json( 'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign), - video_id, note='Downloading video info page') + video_id, note='Downloading video info page', + headers=self.geo_verification_headers()) + + if 'durl' not in video_info: + self._report_error(video_info) entries = [] @@ -85,7 +133,7 @@ class BiliBiliIE(InfoExtractor): title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title') description = self._html_search_meta('description', webpage) timestamp = unified_timestamp(self._html_search_regex( - r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)) + r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None)) thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) # TODO 'view_count' requires deobfuscating Javascript @@ -99,7 +147,7 @@ class BiliBiliIE(InfoExtractor): } uploader_mobj = re.search( - r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"', + r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"', webpage) if uploader_mobj: info.update({ @@ -123,3 +171,70 @@ class BiliBiliIE(InfoExtractor): 'description': description, 'entries': entries, } + + +class BiliBiliBangumiIE(InfoExtractor): + _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)' + + IE_NAME = 'bangumi.bilibili.com' + IE_DESC = 'BiliBili番剧' + + _TESTS = [{ + 'url': 'http://bangumi.bilibili.com/anime/1869', + 'info_dict': { + 'id': '1869', + 'title': '混沌武士', + 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', + }, + 'playlist_count': 26, + }, { + 'url': 'http://bangumi.bilibili.com/anime/1869', + 'info_dict': { + 'id': '1869', + 'title': '混沌武士', + 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', + }, + 'playlist': [{ + 'md5': '91da8621454dd58316851c27c68b0c13', + 'info_dict': { + 'id': '40062', + 'ext': 'mp4', + 'title': '混沌武士', + 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...', + 'timestamp': 1414538739, + 'upload_date': '20141028', + 'episode': '疾风怒涛 Tempestuous Temperaments', + 'episode_number': 1, + }, + }], + 'params': { + 'playlist_items': '1', + }, + }] + + @classmethod + def suitable(cls, url): + return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url) + + def _real_extract(self, url): + bangumi_id = self._match_id(url) + + # Sometimes this API returns a JSONP response + season_info = self._download_json( + 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id, + bangumi_id, transform_source=strip_jsonp)['result'] + + entries = [{ + '_type': 'url_transparent', + 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}), + 'ie_key': BiliBiliIE.ie_key(), + 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '), + 'episode': episode.get('index_title'), + 'episode_number': int_or_none(episode.get('index')), + } for episode in season_info['episodes']] + + entries = sorted(entries, key=lambda entry: entry.get('episode_number')) + + return self.playlist_result( + entries, bangumi_id, + season_info.get('bangumi_title'), season_info.get('evaluate')) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 06e6d4620..1d1c05d42 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -103,7 +103,10 @@ from .beatport import BeatportIE from .bet import BetIE from .bigflix import BigflixIE from .bild import BildIE -from .bilibili import BiliBiliIE +from .bilibili import ( + BiliBiliIE, + BiliBiliBangumiIE, +) from .biobiochiletv import BioBioChileTVIE from .biqle import BIQLEIE from .bleacherreport import ( From a685751051f277b8ce99ee0949420bca4ea28c28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Feb 2017 22:01:11 +0700 Subject: [PATCH 149/195] [youtube:playlist] Recognize TL playlists (closes #11945) --- youtube_dl/extractor/youtube.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ea398bcc8..0e67fdd12 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1857,13 +1857,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist= ) ( - (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,} + (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,} # Top tracks, they can also include dots |(?:MC)[\w\.]* ) .* | - ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,}) + ((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}) )""" _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true' _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?' @@ -1985,6 +1985,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): }, { 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21', 'only_matching': True, + }, { + 'url': 'TLGGrESM50VT6acwMjAyMjAxNw', + 'only_matching': True, }] def _real_initialize(self): From 5a116e13020813f9f1d952504455043986c28b9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Feb 2017 22:45:18 +0700 Subject: [PATCH 150/195] [facebook] Fix title extraction (closes #11941) --- youtube_dl/extractor/facebook.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 47bcc0dbc..b325c8200 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -73,7 +73,7 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '274175099429670', 'ext': 'mp4', - 'title': 'Facebook video #274175099429670', + 'title': 'Asif Nawab Butt posted a video to his Timeline.', 'uploader': 'Asif Nawab Butt', 'upload_date': '20140506', 'timestamp': 1399398998, @@ -318,10 +318,16 @@ class FacebookIE(InfoExtractor): video_title = self._html_search_regex( r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>', webpage, 'alternative title', default=None) - video_title = limit_length(video_title, 80) if not video_title: + video_title = self._html_search_meta( + 'description', webpage, 'title') + if video_title: + video_title = limit_length(video_title, 80) + else: video_title = 'Facebook video #%s' % video_id - uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage)) + uploader = clean_html(get_element_by_id( + 'fbPhotoPageAuthorName', webpage)) or self._search_regex( + r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False) timestamp = int_or_none(self._search_regex( r'<abbr[^>]+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)) From c54c01f82dba6d3e982c73c81ad71c49f31d8af1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Feb 2017 23:03:38 +0700 Subject: [PATCH 151/195] [go] Relax video id regex (closes #11937) --- youtube_dl/extractor/go.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index c7776b186..a34779b16 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -43,7 +43,10 @@ class GoIE(InfoExtractor): sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups() if not video_id: webpage = self._download_webpage(url, display_id) - video_id = self._search_regex(r'data-video-id=["\']VDKA(\w+)', webpage, 'video id') + video_id = self._search_regex( + # There may be inner quotes, e.g. data-video-id="'VDKA3609139'" + # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood + r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id') brand = self._BRANDS[sub_domain] video_data = self._download_json( 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id), From a22b2fd19bd8c08d50f884d1903486d4f00f76ec Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 3 Feb 2017 01:28:24 +0800 Subject: [PATCH 152/195] [youtube] Fix ytsearch* when cookies are provided Closes #11924 The API with `page` is no longer used in browsers, and YouTube always returns {'reload': 'now'} when cookies are provided. See http://youtube.github.io/spfjs/documentation/start/ for how SPF works. Basically appending static link with a `spf` parameter yields the corresponding dynamic link. --- ChangeLog | 1 + youtube_dl/extractor/youtube.py | 22 ++++++++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index c27907f51..c80126cfb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [youtube] Fix ytsearch when cookies are provided (#11924) + [bilibili] Support new Bangumi URLs (#11845) version 2017.02.01 diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0e67fdd12..f2f751104 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2348,18 +2348,18 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): videos = [] limit = n + url_query = { + 'search_query': query.encode('utf-8'), + } + url_query.update(self._EXTRA_QUERY_ARGS) + result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) + for pagenum in itertools.count(1): - url_query = { - 'search_query': query.encode('utf-8'), - 'page': pagenum, - 'spf': 'navigate', - } - url_query.update(self._EXTRA_QUERY_ARGS) - result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) data = self._download_json( result_url, video_id='query "%s"' % query, note='Downloading page %s' % pagenum, - errnote='Unable to download API page') + errnote='Unable to download API page', + query={'spf': 'navigate'}) html_content = data[1]['body']['content'] if 'class="search-message' in html_content: @@ -2371,6 +2371,12 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): videos += new_videos if not new_videos or len(videos) > limit: break + next_link = self._html_search_regex( + r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next', + html_content, 'next link', default=None) + if next_link is None: + break + result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link) if len(videos) > n: videos = videos[:n] From b3ee552e4b918fb720111b23147e24fa5475a74b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= <michal@cihar.com> Date: Tue, 31 Jan 2017 07:54:53 +0100 Subject: [PATCH 153/195] [utils] Handle single-line comments in js_to_json --- test/test_utils.py | 3 +++ youtube_dl/utils.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index a74d59f34..954bb7d8b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -791,6 +791,9 @@ class TestUtil(unittest.TestCase): on = js_to_json('{ 0: /* " \n */ ",]" , }') self.assertEqual(json.loads(on), {'0': ',]'}) + on = js_to_json('{ 0: // comment\n1 }') + self.assertEqual(json.loads(on), {'0': 1}) + on = js_to_json(r'["<p>x<\/p>"]') self.assertEqual(json.loads(on), ['<p>x</p>']) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index cf46711b9..6c462625b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2107,7 +2107,7 @@ def js_to_json(code): v = m.group(0) if v in ('true', 'false', 'null'): return v - elif v.startswith('/*') or v == ',': + elif v.startswith('/*') or v.startswith('//') or v == ',': return "" if v[0] in ("'", '"'): @@ -2134,7 +2134,7 @@ def js_to_json(code): return re.sub(r'''(?sx) "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| - /\*.*?\*/|,(?=\s*[\]}])| + /\*.*?\*/|//[^\n]*|,(?=\s*[\]}])| [a-zA-Z_][.a-zA-Z_0-9]*| \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| [0-9]+(?=\s*:) From 0bbcc8a10a4bd339540bf149dd263419fd8b6e66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= <michal@cihar.com> Date: Tue, 31 Jan 2017 07:59:55 +0100 Subject: [PATCH 154/195] [iprima] Fix extraction (closes #11920, closes #11896) --- youtube_dl/extractor/iprima.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index da2cdc656..0fe576883 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -65,7 +65,7 @@ class IPrimaIE(InfoExtractor): options = self._parse_json( self._search_regex( - r'(?s)var\s+playerOptions\s*=\s*({.+?});', + r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]', playerpage, 'player options', default='{}'), video_id, transform_source=js_to_json, fatal=False) if options: From 4195096ea8da8237a63e1ba3876dc8856b8605c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Feb 2017 02:55:06 +0700 Subject: [PATCH 155/195] [utils] Improve comments processing in js_to_json (closes #11947) --- test/test_utils.py | 24 ++++++++++++++++++++++++ youtube_dl/utils.py | 20 +++++++++++--------- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 954bb7d8b..edc712f07 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -785,12 +785,24 @@ class TestUtil(unittest.TestCase): on = js_to_json('["abc", "def",]') self.assertEqual(json.loads(on), ['abc', 'def']) + on = js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]') + self.assertEqual(json.loads(on), ['abc', 'def']) + + on = js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]') + self.assertEqual(json.loads(on), ['abc', 'def']) + on = js_to_json('{"abc": "def",}') self.assertEqual(json.loads(on), {'abc': 'def'}) + on = js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}') + self.assertEqual(json.loads(on), {'abc': 'def'}) + on = js_to_json('{ 0: /* " \n */ ",]" , }') self.assertEqual(json.loads(on), {'0': ',]'}) + on = js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }') + self.assertEqual(json.loads(on), {'0': ',]'}) + on = js_to_json('{ 0: // comment\n1 }') self.assertEqual(json.loads(on), {'0': 1}) @@ -803,15 +815,27 @@ class TestUtil(unittest.TestCase): on = js_to_json("['a\\\nb']") self.assertEqual(json.loads(on), ['ab']) + on = js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/") + self.assertEqual(json.loads(on), ['ab']) + on = js_to_json('{0xff:0xff}') self.assertEqual(json.loads(on), {'255': 255}) + on = js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}') + self.assertEqual(json.loads(on), {'255': 255}) + on = js_to_json('{077:077}') self.assertEqual(json.loads(on), {'63': 63}) + on = js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}') + self.assertEqual(json.loads(on), {'63': 63}) + on = js_to_json('{42:42}') self.assertEqual(json.loads(on), {'42': 42}) + on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}') + self.assertEqual(json.loads(on), {'42': 42}) + def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6c462625b..67a847eba 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2103,6 +2103,13 @@ def strip_jsonp(code): def js_to_json(code): + COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*' + SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE) + INTEGER_TABLE = ( + (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16), + (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8), + ) + def fix_kv(m): v = m.group(0) if v in ('true', 'false', 'null'): @@ -2118,11 +2125,6 @@ def js_to_json(code): '\\x': '\\u00', }.get(m.group(0), m.group(0)), v[1:-1]) - INTEGER_TABLE = ( - (r'^(0[xX][0-9a-fA-F]+)\s*:?$', 16), - (r'^(0+[0-7]+)\s*:?$', 8), - ) - for regex, base in INTEGER_TABLE: im = re.match(regex, v) if im: @@ -2134,11 +2136,11 @@ def js_to_json(code): return re.sub(r'''(?sx) "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| - /\*.*?\*/|//[^\n]*|,(?=\s*[\]}])| + {comment}|,(?={skip}[\]}}])| [a-zA-Z_][.a-zA-Z_0-9]*| - \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| - [0-9]+(?=\s*:) - ''', fix_kv, code) + \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?| + [0-9]+(?={skip}:) + '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code) def qualities(quality_ids): From 33da98f4933ddc54c944bae985cfcc7b53563208 Mon Sep 17 00:00:00 2001 From: Justsoos <justso@gmail.com> Date: Wed, 1 Feb 2017 21:30:01 +0800 Subject: [PATCH 156/195] [douyutv] Improve room id regex http://www.douyu.com/t/lpl source get extra '\' with "room_id\" (from js coding) --- youtube_dl/extractor/douyutv.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index 2f3c5113e..911594413 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -18,7 +18,7 @@ from ..utils import ( class DouyuTVIE(InfoExtractor): IE_DESC = '斗鱼' - _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?P<id>[A-Za-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?:[^/]+/)*(?P<id>[A-Za-z0-9]+)' _TESTS = [{ 'url': 'http://www.douyutv.com/iseven', 'info_dict': { @@ -68,6 +68,10 @@ class DouyuTVIE(InfoExtractor): }, { 'url': 'http://www.douyu.com/xiaocang', 'only_matching': True, + }, { + # \"room_id\" + 'url': 'http://www.douyu.com/t/lpl', + 'only_matching': True, }] # Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf @@ -82,7 +86,7 @@ class DouyuTVIE(InfoExtractor): else: page = self._download_webpage(url, video_id) room_id = self._html_search_regex( - r'"room_id"\s*:\s*(\d+),', page, 'room id') + r'"room_id\\?"\s*:\s*(\d+),', page, 'room id') room = self._download_json( 'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id, From 45024183aea169dc898902388f782485de02cbac Mon Sep 17 00:00:00 2001 From: Mattias Wadman <mattias.wadman@gmail.com> Date: Fri, 3 Feb 2017 05:10:13 +0100 Subject: [PATCH 157/195] [infoq] Add audio only format if available (#11565) * [infoq] Add audio only format if available Refactor cookie code into a function. Renamed formats to http_video, http_audio, rtmp_video Renamed extract functions to video instead of videos as they return one or no video. * [infoq] Rename to _extract_cookies as it more than one * [infoq] Remove redundant determine_ext * [infoq] Add comment about hardcoded URL * [infoq] Use _hidden_inputs instead of messy regex * [infoq] Probe if audio URL is valid Make it possible to pass headers to _is_valid_url * [infoq] Add audio only test --- youtube_dl/extractor/common.py | 4 +-- youtube_dl/extractor/infoq.py | 63 ++++++++++++++++++++++++++++------ 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5a15a9536..2c8ec1417 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1025,13 +1025,13 @@ class InfoExtractor(object): unique_formats.append(f) formats[:] = unique_formats - def _is_valid_url(self, url, video_id, item='video'): + def _is_valid_url(self, url, video_id, item='video', headers={}): url = self._proto_relative_url(url, scheme='http:') # For now assume non HTTP(S) URLs always valid if not (url.startswith('http://') or url.startswith('https://')): return True try: - self._request_webpage(url, video_id, 'Checking %s URL' % item) + self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers) return True except ExtractorError as e: if isinstance(e.cause, compat_urllib_error.URLError): diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index cca0b8a93..9fb71e8ef 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import base64 -from ..compat import compat_urllib_parse_unquote +from ..compat import ( + compat_urllib_parse_unquote, + compat_urlparse, +) from ..utils import determine_ext from .bokecc import BokeCCBaseIE @@ -33,9 +36,21 @@ class InfoQIE(BokeCCBaseIE): 'ext': 'flv', 'description': 'md5:308d981fb28fa42f49f9568322c683ff', }, + }, { + 'url': 'https://www.infoq.com/presentations/Simple-Made-Easy', + 'md5': '0e34642d4d9ef44bf86f66f6399672db', + 'info_dict': { + 'id': 'Simple-Made-Easy', + 'title': 'Simple Made Easy', + 'ext': 'mp3', + 'description': 'md5:3e0e213a8bbd074796ef89ea35ada25b', + }, + 'params': { + 'format': 'bestaudio', + }, }] - def _extract_rtmp_videos(self, webpage): + def _extract_rtmp_video(self, webpage): # The server URL is hardcoded video_url = 'rtmpe://video.infoq.com/cfx/st/' @@ -47,28 +62,53 @@ class InfoQIE(BokeCCBaseIE): playpath = 'mp4:' + real_id return [{ - 'format_id': 'rtmp', + 'format_id': 'rtmp_video', 'url': video_url, 'ext': determine_ext(playpath), 'play_path': playpath, }] - def _extract_http_videos(self, webpage): - http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL') - + def _extract_cookies(self, webpage): policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy') signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature') key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id') + return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % ( + policy, signature, key_pair_id) + def _extract_http_video(self, webpage): + http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL') return [{ - 'format_id': 'http', + 'format_id': 'http_video', 'url': http_video_url, 'http_headers': { - 'Cookie': 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % ( - policy, signature, key_pair_id), + 'Cookie': self._extract_cookies(webpage) }, }] + def _extract_http_audio(self, webpage, video_id): + fields = self._hidden_inputs(webpage) + http_audio_url = fields['filename'] + if http_audio_url is None: + return [] + + cookies_header = {'Cookie': self._extract_cookies(webpage)} + + # base URL is found in the Location header in the response returned by + # GET https://www.infoq.com/mp3download.action?filename=... when logged in. + http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url) + + # audio file seem to be missing some times even if there is a download link + # so probe URL to make sure + if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header): + return [] + + return [{ + 'format_id': 'http_audio', + 'url': http_audio_url, + 'vcodec': 'none', + 'http_headers': cookies_header, + }] + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -80,7 +120,10 @@ class InfoQIE(BokeCCBaseIE): # for China videos, HTTP video URL exists but always fails with 403 formats = self._extract_bokecc_formats(webpage, video_id) else: - formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage) + formats = ( + self._extract_rtmp_video(webpage) + + self._extract_http_video(webpage) + + self._extract_http_audio(webpage, video_id)) self._sort_formats(formats) From d7f9242e301fa7c08542932c9348140cf2e07172 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 3 Feb 2017 12:13:24 +0800 Subject: [PATCH 158/195] [ChangeLog] Update after #11565 --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index c80126cfb..487ed3f0f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors ++ [infoq] Add audio only formats (#11565) * [youtube] Fix ytsearch when cookies are provided (#11924) + [bilibili] Support new Bangumi URLs (#11845) From 4ce3407d089ae8c34341e6d68267910683d4b500 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 3 Feb 2017 10:15:03 +0100 Subject: [PATCH 159/195] [filmon] improve extraction --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/filmon.py | 236 +++++++++++++++++------------ 2 files changed, 139 insertions(+), 102 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c9b9ebd23..e4ee43ee3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -287,7 +287,10 @@ from .fc2 import ( FC2EmbedIE, ) from .fczenit import FczenitIE -from .filmon import FilmOnIE, FilmOnVODIE +from .filmon import ( + FilmOnIE, + FilmOnChannelIE, +) from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE diff --git a/youtube_dl/extractor/filmon.py b/youtube_dl/extractor/filmon.py index 987792fec..f775fe0ba 100644 --- a/youtube_dl/extractor/filmon.py +++ b/youtube_dl/extractor/filmon.py @@ -2,74 +2,21 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import qualities -from ..compat import compat_urllib_request - - -_QUALITY = qualities(('low', 'high')) +from ..compat import ( + compat_str, + compat_HTTPError, +) +from ..utils import ( + qualities, + strip_or_none, + int_or_none, + ExtractorError, +) class FilmOnIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P<id>[a-z0-9-]+)' - _TESTS = [{ - 'url': 'https://www.filmon.com/channel/filmon-sports', - 'only_matching': True, - }, { - 'url': 'https://www.filmon.com/tv/2894', - 'only_matching': True, - }] - - def _real_extract(self, url): - channel_id = self._match_id(url) - - request = compat_urllib_request.Request('https://www.filmon.com/channel/%s' % (channel_id)) - request.add_header('X-Requested-With', 'XMLHttpRequest') - channel_info = self._download_json(request, channel_id) - now_playing = channel_info['now_playing'] - - thumbnails = [] - for thumb in now_playing.get('images', ()): - if thumb['type'] != '2': - continue - thumbnails.append({ - 'url': thumb['url'], - 'width': int(thumb['width']), - 'height': int(thumb['height']), - }) - - formats = [] - - for stream in channel_info['streams']: - formats.append({ - 'format_id': str(stream['id']), - # this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats - # because 0) it doesn't have bitrate variants anyway, and 1) the ids generated - # by that method are highly unstable (because the bitrate is variable) - 'url': stream['url'], - 'resolution': stream['name'], - 'format_note': 'expires after %u seconds' % int(stream['watch-timeout']), - 'ext': 'mp4', - 'quality': _QUALITY(stream['quality']), - 'preference': int(stream['watch-timeout']), - }) - self._sort_formats(formats) - - return { - 'id': str(channel_info['id']), - 'display_id': channel_info['alias'], - 'formats': formats, - # XXX: use the channel description (channel_info['description'])? - 'uploader_id': channel_info['alias'], - 'uploader': channel_info['title'], # XXX: kinda stretching it... - 'title': now_playing.get('programme_name') or channel_info['title'], - 'description': now_playing.get('programme_description'), - 'thumbnails': thumbnails, - 'is_live': True, - } - - -class FilmOnVODIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?filmon\.com/vod/view/(?P<id>\d+)' + IE_NAME = 'filmon' + _VALID_URL = r'(?:https?://(?:www\.)?filmon\.com/vod/view/|filmon:)(?P<id>\d+)' _TESTS = [{ 'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space', 'info_dict': { @@ -83,62 +30,149 @@ class FilmOnVODIE(InfoExtractor): 'info_dict': { 'id': '2825', 'title': 'Popeye Series 1', + 'description': 'The original series of Popeye.', }, - 'playlist_count': 8, + 'playlist_mincount': 8, }] def _real_extract(self, url): video_id = self._match_id(url) - result = self._download_json('https://www.filmon.com/api/vod/movie?id=%s' % (video_id), video_id) - if result['code'] != 200: - raise ExtractorError('FilmOn said: %s' % (result['reason']), expected=True) + try: + response = self._download_json( + 'https://www.filmon.com/api/vod/movie?id=%s' % video_id, + video_id)['response'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason'] + raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) + raise - response = result['response'] + title = response['title'] + description = strip_or_none(response.get('description')) - if response.get('episodes'): - return { - '_type': 'playlist', - 'id': video_id, - 'title': response['title'], - 'entries': [{ - '_type': 'url', - 'url': 'https://www.filmon.com/vod/view/%s' % (ep), - } for ep in response['episodes']] - } + if response.get('type_id') == 1: + entries = [self.url_result('filmon:' + episode_id) for episode_id in response.get('episodes', [])] + return self.playlist_result(entries, video_id, title, description) + QUALITY = qualities(('low', 'high')) formats = [] - for (id, stream) in response['streams'].items(): + for format_id, stream in response.get('streams', {}).items(): + stream_url = stream.get('url') + if not stream_url: + continue formats.append({ - 'format_id': id, - 'url': stream['url'], - 'resolution': stream['name'], - 'format_note': 'expires after %u seconds' % int(stream['watch-timeout']), + 'format_id': format_id, + 'url': stream_url, 'ext': 'mp4', - 'quality': _QUALITY(stream['quality']), - 'preference': int(stream['watch-timeout']), + 'quality': QUALITY(stream.get('quality')), + 'protocol': 'm3u8_native', }) self._sort_formats(formats) - poster = response['poster'] - thumbnails = [{ - 'id': 'poster', - 'url': poster['url'], - 'width': poster['width'], - 'height': poster['height'], - }] - for (id, thumb) in poster['thumbs'].items(): + thumbnails = [] + poster = response.get('poster', {}) + thumbs = poster.get('thumbs', {}) + thumbs['poster'] = poster + for thumb_id, thumb in thumbs.items(): + thumb_url = thumb.get('url') + if not thumb_url: + continue thumbnails.append({ - 'id': id, - 'url': thumb['url'], - 'width': thumb['width'], - 'height': thumb['height'], + 'id': thumb_id, + 'url': thumb_url, + 'width': int_or_none(thumb.get('width')), + 'height': int_or_none(thumb.get('height')), }) return { 'id': video_id, - 'title': response['title'], + 'title': title, 'formats': formats, - 'description': response['description'], + 'description': description, 'thumbnails': thumbnails, } + + +class FilmOnChannelIE(InfoExtractor): + IE_NAME = 'filmon:channel' + _VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P<id>[a-z0-9-]+)' + _TESTS = [{ + # VOD + 'url': 'http://www.filmon.com/tv/sports-haters', + 'info_dict': { + 'id': '4190', + 'ext': 'mp4', + 'title': 'Sports Haters', + 'description': 'md5:dabcb4c1d9cfc77085612f1a85f8275d', + }, + }, { + # LIVE + 'url': 'https://www.filmon.com/channel/filmon-sports', + 'only_matching': True, + }, { + 'url': 'https://www.filmon.com/tv/2894', + 'only_matching': True, + }] + + _THUMBNAIL_RES = [ + ('logo', 56, 28), + ('big_logo', 106, 106), + ('extra_big_logo', 300, 300), + ] + + def _real_extract(self, url): + channel_id = self._match_id(url) + + try: + channel_data = self._download_json( + 'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message'] + raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) + raise + + channel_id = compat_str(channel_data['id']) + is_live = not channel_data.get('is_vod') and not channel_data.get('is_vox') + title = channel_data['title'] + + QUALITY = qualities(('low', 'high')) + formats = [] + for stream in channel_data.get('streams', []): + stream_url = stream.get('url') + if not stream_url: + continue + if not is_live: + formats.extend(self._extract_wowza_formats( + stream_url, channel_id, skip_protocols=['dash', 'rtmp', 'rtsp'])) + continue + quality = stream.get('quality') + formats.append({ + 'format_id': quality, + # this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats + # because it doesn't have bitrate variants anyway + 'url': stream_url, + 'ext': 'mp4', + 'quality': QUALITY(quality), + }) + self._sort_formats(formats) + + thumbnails = [] + for name, width, height in self._THUMBNAIL_RES: + thumbnails.append({ + 'id': name, + 'url': 'http://static.filmon.com/assets/channels/%s/%s.png' % (channel_id, name), + 'width': width, + 'height': height, + }) + + return { + 'id': channel_id, + 'display_id': channel_data.get('alias'), + 'title': self._live_title(title) if is_live else title, + 'description': channel_data.get('description'), + 'thumbnails': thumbnails, + 'formats': formats, + 'is_live': is_live, + } From daac118bf4e8bf3dc1ec202fe8b21b9319d15dbf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 3 Feb 2017 18:56:40 +0800 Subject: [PATCH 160/195] [ChangeLog] Update after #11901 --- ChangeLog | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ChangeLog b/ChangeLog index 487ed3f0f..947590b94 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,14 @@ version <unreleased> +Core ++ Add --playlist-random to shuffle playlists (#11889, #11901) + Extractors + [infoq] Add audio only formats (#11565) * [youtube] Fix ytsearch when cookies are provided (#11924) + [bilibili] Support new Bangumi URLs (#11845) + version 2017.02.01 Extractors From f7a10d8cd6d1378d5f8e67b4b3572fa474b47cde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Feb 2017 21:25:44 +0700 Subject: [PATCH 161/195] [sportbox] Remove extractor (closes #11954) Covered by generic extractor --- youtube_dl/extractor/sportbox.py | 54 -------------------------------- 1 file changed, 54 deletions(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index b512cd20f..05a0b5a80 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -11,60 +11,6 @@ from ..utils import ( ) -class SportBoxIE(InfoExtractor): - _VALID_URL = r'https?://news\.sportbox\.ru/(?:[^/]+/)+spbvideo_NI\d+_(?P<display_id>.+)' - _TESTS = [{ - 'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S', - 'md5': 'ff56a598c2cf411a9a38a69709e97079', - 'info_dict': { - 'id': '80822', - 'ext': 'mp4', - 'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн', - 'description': 'md5:3d72dc4a006ab6805d82f037fdc637ad', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20140928', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4', - 'only_matching': True, - }, { - 'url': 'http://news.sportbox.ru/video/no_ads/spbvideo_NI536574_V_Novorossijske_proshel_detskij_turnir_Pole_slavy_bojevoj?ci=211355', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - - player = self._search_regex( - r'src="/?(vdl/player/[^"]+)"', webpage, 'player') - - title = self._html_search_regex( - [r'"nodetitle"\s*:\s*"([^"]+)"', r'class="node-header_{1,2}title">([^<]+)'], - webpage, 'title') - description = self._og_search_description(webpage) or self._html_search_meta( - 'description', webpage, 'description') - thumbnail = self._og_search_thumbnail(webpage) - upload_date = unified_strdate(self._html_search_meta( - 'dateCreated', webpage, 'upload date')) - - return { - '_type': 'url_transparent', - 'url': compat_urlparse.urljoin(url, '/%s' % player), - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - } - - class SportBoxEmbedIE(InfoExtractor): _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)' _TESTS = [{ From b7cc5f078eca4d90b3e3d31d1247452953dba1fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Feb 2017 21:56:10 +0700 Subject: [PATCH 162/195] [extractors] Remove remnants of sportbox extractor (#11954) --- youtube_dl/extractor/extractors.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index aa235bec1..eaf3676df 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -888,10 +888,7 @@ from .spiegeltv import SpiegeltvIE from .spike import SpikeIE from .stitcher import StitcherIE from .sport5 import Sport5IE -from .sportbox import ( - SportBoxIE, - SportBoxEmbedIE, -) +from .sportbox import SportBoxEmbedIE from .sportdeutschland import SportDeutschlandIE from .sportschau import SportschauIE from .srgssr import ( From f962790ee53c634758021d9fc752ae476c6a142b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Feb 2017 21:56:48 +0700 Subject: [PATCH 163/195] [vine] Fix extraction (closes #11955) --- youtube_dl/extractor/vine.py | 103 +++++++++++++++-------------------- 1 file changed, 44 insertions(+), 59 deletions(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 0183f052a..4957a07f7 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -6,8 +6,9 @@ import itertools from .common import InfoExtractor from ..utils import ( + determine_ext, int_or_none, - unified_strdate, + unified_timestamp, ) @@ -20,50 +21,16 @@ class VineIE(InfoExtractor): 'id': 'b9KOOWX7HUx', 'ext': 'mp4', 'title': 'Chicken.', - 'alt_title': 'Vine by Jack Dorsey', + 'alt_title': 'Vine by Jack', + 'timestamp': 1368997951, 'upload_date': '20130519', - 'uploader': 'Jack Dorsey', + 'uploader': 'Jack', 'uploader_id': '76', 'view_count': int, 'like_count': int, 'comment_count': int, 'repost_count': int, }, - }, { - 'url': 'https://vine.co/v/MYxVapFvz2z', - 'md5': '7b9a7cbc76734424ff942eb52c8f1065', - 'info_dict': { - 'id': 'MYxVapFvz2z', - 'ext': 'mp4', - 'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14', - 'alt_title': 'Vine by Mars Ruiz', - 'upload_date': '20140815', - 'uploader': 'Mars Ruiz', - 'uploader_id': '1102363502380728320', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'repost_count': int, - }, - }, { - 'url': 'https://vine.co/v/bxVjBbZlPUH', - 'md5': 'ea27decea3fa670625aac92771a96b73', - 'info_dict': { - 'id': 'bxVjBbZlPUH', - 'ext': 'mp4', - 'title': '#mw3 #ac130 #killcam #angelofdeath', - 'alt_title': 'Vine by Z3k3', - 'upload_date': '20130430', - 'uploader': 'Z3k3', - 'uploader_id': '936470460173008896', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'repost_count': int, - }, - }, { - 'url': 'https://vine.co/oembed/MYxVapFvz2z.json', - 'only_matching': True, }, { 'url': 'https://vine.co/v/e192BnZnZ9V', 'info_dict': { @@ -71,6 +38,7 @@ class VineIE(InfoExtractor): 'ext': 'mp4', 'title': 'ยิ้ม~ เขิน~ อาย~ น่าร้ากอ้ะ >//< @n_whitewo @orlameena #lovesicktheseries #lovesickseason2', 'alt_title': 'Vine by Pimry_zaa', + 'timestamp': 1436057405, 'upload_date': '20150705', 'uploader': 'Pimry_zaa', 'uploader_id': '1135760698325307392', @@ -82,43 +50,60 @@ class VineIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://vine.co/v/MYxVapFvz2z', + 'only_matching': True, + }, { + 'url': 'https://vine.co/v/bxVjBbZlPUH', + 'only_matching': True, + }, { + 'url': 'https://vine.co/oembed/MYxVapFvz2z.json', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id) - data = self._parse_json( - self._search_regex( - r'window\.POST_DATA\s*=\s*({.+?});\s*</script>', - webpage, 'vine data'), - video_id) + data = self._download_json( + 'https://archive.vine.co/posts/%s.json' % video_id, video_id) - data = data[list(data.keys())[0]] - - formats = [{ - 'format_id': '%(format)s-%(rate)s' % f, - 'vcodec': f.get('format'), - 'quality': f.get('rate'), - 'url': f['videoUrl'], - } for f in data['videoUrls'] if f.get('videoUrl')] + def video_url(kind): + for url_suffix in ('Url', 'URL'): + format_url = data.get('video%s%s' % (kind, url_suffix)) + if format_url: + return format_url + formats = [] + for quality, format_id in enumerate(('low', '', 'dash')): + format_url = video_url(format_id.capitalize()) + if not format_url: + continue + # DASH link returns plain mp4 + if format_id == 'dash' and determine_ext(format_url) == 'mpd': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id or 'standard', + 'quality': quality, + }) self._sort_formats(formats) username = data.get('username') return { 'id': video_id, - 'title': data.get('description') or self._og_search_title(webpage), - 'alt_title': 'Vine by %s' % username if username else self._og_search_description(webpage, default=None), + 'title': data.get('description'), + 'alt_title': 'Vine by %s' % username if username else None, 'thumbnail': data.get('thumbnailUrl'), - 'upload_date': unified_strdate(data.get('created')), + 'timestamp': unified_timestamp(data.get('created')), 'uploader': username, 'uploader_id': data.get('userIdStr'), - 'view_count': int_or_none(data.get('loops', {}).get('count')), - 'like_count': int_or_none(data.get('likes', {}).get('count')), - 'comment_count': int_or_none(data.get('comments', {}).get('count')), - 'repost_count': int_or_none(data.get('reposts', {}).get('count')), + 'view_count': int_or_none(data.get('loops')), + 'like_count': int_or_none(data.get('likes')), + 'comment_count': int_or_none(data.get('comments')), + 'repost_count': int_or_none(data.get('reposts')), 'formats': formats, } From 605fd6392fedd2599115e1f1e12df2a6212df1ae Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 3 Feb 2017 17:59:48 +0100 Subject: [PATCH 164/195] [youtube] add format info for itag 325 and 328 --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f2f751104..76710931a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -329,6 +329,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'}, '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, + '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'preference': -50, 'container': 'm4a_dash'}, + '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'preference': -50, 'container': 'm4a_dash'}, # Dash webm '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, From f65dba7cdb98bb5444ad5656c9626a15d210f6f6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 3 Feb 2017 22:25:19 +0100 Subject: [PATCH 165/195] [myspace] fix extraction and extract hls and http formats --- youtube_dl/extractor/myspace.py | 108 +++++++++++++++++--------------- 1 file changed, 58 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py index ab32e632e..f281238c9 100644 --- a/youtube_dl/extractor/myspace.py +++ b/youtube_dl/extractor/myspace.py @@ -17,9 +17,10 @@ class MySpaceIE(InfoExtractor): _TESTS = [ { 'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919', + 'md5': '9c1483c106f4a695c47d2911feed50a7', 'info_dict': { 'id': '109594919', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Little Big Town', 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.', 'uploader': 'Five Minutes to the Stage', @@ -27,37 +28,30 @@ class MySpaceIE(InfoExtractor): 'timestamp': 1414108751, 'upload_date': '20141023', }, - 'params': { - # rtmp download - 'skip_download': True, - }, }, # songs { 'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681', + 'md5': '1d7ee4604a3da226dd69a123f748b262', 'info_dict': { 'id': '93388656', - 'ext': 'flv', + 'ext': 'm4a', 'title': 'Of weakened soul...', 'uploader': 'Killsorrow', 'uploader_id': 'killsorrow', }, - 'params': { - # rtmp download - 'skip_download': True, - }, }, { - 'add_ie': ['Vevo'], + 'add_ie': ['Youtube'], 'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041', 'info_dict': { - 'id': 'USZM20600099', - 'ext': 'mp4', - 'title': 'Animal I Have Become', - 'uploader': 'Three Days Grace', - 'timestamp': int, - 'upload_date': '20060502', + 'id': 'xqds0B_meys', + 'ext': 'webm', + 'title': 'Three Days Grace - Animal I Have Become', + 'description': 'md5:8bd86b3693e72a077cf863a8530c54bb', + 'uploader': 'ThreeDaysGraceVEVO', + 'uploader_id': 'ThreeDaysGraceVEVO', + 'upload_date': '20091002', }, - 'skip': 'VEVO is only available in some countries', }, { 'add_ie': ['Youtube'], 'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426', @@ -76,24 +70,46 @@ class MySpaceIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + is_song = mobj.group('mediatype').startswith('music/song') webpage = self._download_webpage(url, video_id) player_url = self._search_regex( - r'playerSwf":"([^"?]*)', webpage, 'player URL') + r'videoSwf":"([^"?]*)', webpage, 'player URL', fatal=False) - def rtmp_format_from_stream_url(stream_url, width=None, height=None): - rtmp_url, play_path = stream_url.split(';', 1) - return { - 'format_id': 'rtmp', - 'url': rtmp_url, - 'play_path': play_path, - 'player_url': player_url, - 'protocol': 'rtmp', - 'ext': 'flv', - 'width': width, - 'height': height, - } + def formats_from_stream_urls(stream_url, hls_stream_url, http_stream_url, width=None, height=None): + formats = [] + vcodec = 'none' if is_song else None + if hls_stream_url: + formats.append({ + 'format_id': 'hls', + 'url': hls_stream_url, + 'protocol': 'm3u8_native', + 'ext': 'm4a' if is_song else 'mp4', + 'vcodec': vcodec, + }) + if stream_url and player_url: + rtmp_url, play_path = stream_url.split(';', 1) + formats.append({ + 'format_id': 'rtmp', + 'url': rtmp_url, + 'play_path': play_path, + 'player_url': player_url, + 'protocol': 'rtmp', + 'ext': 'flv', + 'width': width, + 'height': height, + 'vcodec': vcodec, + }) + if http_stream_url: + formats.append({ + 'format_id': 'http', + 'url': http_stream_url, + 'width': width, + 'height': height, + 'vcodec': vcodec, + }) + return formats - if mobj.group('mediatype').startswith('music/song'): + if is_song: # songs don't store any useful info in the 'context' variable song_data = self._search_regex( r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id, @@ -108,8 +124,10 @@ class MySpaceIE(InfoExtractor): return self._search_regex( r'''data-%s=([\'"])(?P<data>.*?)\1''' % name, song_data, name, default='', group='data') - stream_url = search_data('stream-url') - if not stream_url: + formats = formats_from_stream_urls( + search_data('stream-url'), search_data('hls-stream-url'), + search_data('http-stream-url')) + if not formats: vevo_id = search_data('vevo-id') youtube_id = search_data('youtube-id') if vevo_id: @@ -121,6 +139,7 @@ class MySpaceIE(InfoExtractor): else: raise ExtractorError( 'Found song but don\'t know how to download it') + self._sort_formats(formats) return { 'id': video_id, 'title': self._og_search_title(webpage), @@ -128,27 +147,16 @@ class MySpaceIE(InfoExtractor): 'uploader_id': search_data('artist-username'), 'thumbnail': self._og_search_thumbnail(webpage), 'duration': int_or_none(search_data('duration')), - 'formats': [rtmp_format_from_stream_url(stream_url)] + 'formats': formats, } else: video = self._parse_json(self._search_regex( r'context = ({.*?});', webpage, 'context'), video_id)['video'] - formats = [] - hls_stream_url = video.get('hlsStreamUrl') - if hls_stream_url: - formats.append({ - 'format_id': 'hls', - 'url': hls_stream_url, - 'protocol': 'm3u8_native', - 'ext': 'mp4', - }) - stream_url = video.get('streamUrl') - if stream_url: - formats.append(rtmp_format_from_stream_url( - stream_url, - int_or_none(video.get('width')), - int_or_none(video.get('height')))) + formats = formats_from_stream_urls( + video.get('streamUrl'), video.get('hlsStreamUrl'), + video.get('mp4StreamUrl'), int_or_none(video.get('width')), + int_or_none(video.get('height'))) self._sort_formats(formats) return { 'id': video_id, From 2c15db829c1bd8311ed82e2884661271f0cf73ed Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 4 Feb 2017 08:38:28 +0100 Subject: [PATCH 166/195] [drtv] add support for live and radio sections(closes #1827)(closes #3427) --- youtube_dl/extractor/drtv.py | 74 +++++++++++++++++++++++++++--- youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 72 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 88d096b30..e966d7483 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -9,12 +9,13 @@ from ..utils import ( mimetype2ext, parse_iso8601, remove_end, + update_url_query, ) class DRTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' - + _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' + IE_NAME = 'drtv' _TESTS = [{ 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', 'md5': '25e659cccc9a2ed956110a299fdf5983', @@ -79,9 +80,10 @@ class DRTVIE(InfoExtractor): subtitles = {} for asset in data['Assets']: - if asset.get('Kind') == 'Image': + kind = asset.get('Kind') + if kind == 'Image': thumbnail = asset.get('Uri') - elif asset.get('Kind') == 'VideoResource': + elif kind in ('VideoResource', 'AudioResource'): duration = float_or_none(asset.get('DurationInMilliseconds'), 1000) restricted_to_denmark = asset.get('RestrictedToDenmark') spoken_subtitles = asset.get('Target') == 'SpokenSubtitles' @@ -96,9 +98,13 @@ class DRTVIE(InfoExtractor): preference = -1 format_id += '-spoken-subtitles' if target == 'HDS': - formats.extend(self._extract_f4m_formats( + f4m_formats = self._extract_f4m_formats( uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43', - video_id, preference, f4m_id=format_id)) + video_id, preference, f4m_id=format_id) + if kind == 'AudioResource': + for f in f4m_formats: + f['vcodec'] = 'none' + formats.extend(f4m_formats) elif target == 'HLS': formats.extend(self._extract_m3u8_formats( uri, video_id, 'mp4', entry_protocol='m3u8_native', @@ -112,6 +118,7 @@ class DRTVIE(InfoExtractor): 'format_id': format_id, 'tbr': int_or_none(bitrate), 'ext': link.get('FileFormat'), + 'vcodec': 'none' if kind == 'AudioResource' else None, }) subtitles_list = asset.get('SubtitlesList') if isinstance(subtitles_list, list): @@ -144,3 +151,58 @@ class DRTVIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class DRTVLiveIE(InfoExtractor): + IE_NAME = 'drtv:live' + _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)' + _TEST = { + 'url': 'https://www.dr.dk/tv/live/dr1', + 'info_dict': { + 'id': 'dr1', + 'ext': 'mp4', + 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + channel_data = self._download_json( + 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id, + channel_id) + title = self._live_title(channel_data['Title']) + + formats = [] + for streaming_server in channel_data.get('StreamingServers', []): + server = streaming_server.get('Server') + if not server: + continue + link_type = streaming_server.get('LinkType') + for quality in streaming_server.get('Qualities', []): + for stream in quality.get('Streams', []): + stream_path = stream.get('Stream') + if not stream_path: + continue + stream_url = update_url_query( + '%s/%s' % (server, stream_path), {'b': ''}) + if link_type == 'HLS': + formats.extend(self._extract_m3u8_formats( + stream_url, channel_id, 'mp4', + m3u8_id=link_type, fatal=False, live=True)) + elif link_type == 'HDS': + formats.extend(self._extract_f4m_formats(update_url_query( + '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}), + channel_id, f4m_id=link_type, fatal=False)) + self._sort_formats(formats) + + return { + 'id': channel_id, + 'title': title, + 'thumbnail': channel_data.get('PrimaryImageUri'), + 'formats': formats, + 'is_live': True, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index eaf3676df..32420937c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -248,7 +248,10 @@ from .dramafever import ( from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE from .drtuber import DrTuberIE -from .drtv import DRTVIE +from .drtv import ( + DRTVIE, + DRTVLiveIE, +) from .dvtv import DVTVIE from .dumpert import DumpertIE from .defense import DefenseGouvFrIE From 36fce54816eb1f1d792ac7ed4d07e292d44d62f5 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 4 Feb 2017 15:23:46 +0100 Subject: [PATCH 167/195] [turner] fix downloading of secure hls formats using ffmpeg(closes #11358)(closes #11373)(closes #11800) --- youtube_dl/downloader/external.py | 9 +++++++++ youtube_dl/extractor/turner.py | 8 ++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 138f353ef..41e37261d 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -199,6 +199,15 @@ class FFmpegFD(ExternalFD): args = [ffpp.executable, '-y'] + seekable = info_dict.get('_seekable') + if seekable is not None: + # setting -seekable prevents ffmpeg from guessing if the server + # supports seeking(by adding the header `Range: bytes=0-`), which + # can cause problems in some cases + # https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127 + # http://trac.ffmpeg.org/ticket/6125#comment:10 + args += ['-seekable', '1' if seekable else '0'] + args += self._configuration_args() # start_time = info_dict.get('start_time') or 0 diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 57ffedb87..1c0be9fc6 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -100,9 +100,13 @@ class TurnerBaseIE(AdobePassIE): formats.extend(self._extract_smil_formats( video_url, video_id, fatal=False)) elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( video_url, video_id, 'mp4', - m3u8_id=format_id or 'hls', fatal=False)) + m3u8_id=format_id or 'hls', fatal=False) + if '/secure/' in video_url and '?hdnea=' in video_url: + for f in m3u8_formats: + f['_seekable'] = False + formats.extend(m3u8_formats) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( update_url_query(video_url, {'hdcore': '3.7.0'}), From 643dc0fcfed5e5eb152000190d0c7ba9dd577ef8 Mon Sep 17 00:00:00 2001 From: A Connecticut Princess <bugchecker@dibaby.org> Date: Sat, 4 Feb 2017 13:23:35 +0500 Subject: [PATCH 168/195] [vk] Catch author blocked error message Example link (video in blocked group): https://vk.com/search?c%5Bq%5D=%D0%9F%D1%80%D1%8B%D0%B6%D0%BE%D0%BA%20c%20%D0%BA%D1%80%D0%B0%D0%BD%D0%B0%20%D0%B2%20%D1%81%D1%82%D0%B8%D0%BB%D0%B5%20%D0%A7%D0%B5%D0%BB%D0%BE%D0%B2%D0%B5%D0%BA%D0%B0-%D0%BF%D0%B0%D1%83%D0%BA%D0%B0&c%5Bsection%5D=video&c%5Bsort%5D=2&z=video-10639516_456240611 --- youtube_dl/extractor/vk.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 6e6c3a0e1..7c42a4f54 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -281,6 +281,11 @@ class VKIE(VKBaseIE): { 'url': 'http://new.vk.com/video205387401_165548505', 'only_matching': True, + }, + { + # This video is no longer available, because its author has been blocked. + 'url': 'https://vk.com/video-10639516_456240611', + 'only_matching': True, } ] @@ -328,6 +333,12 @@ class VKIE(VKBaseIE): r'<!>Access denied': 'Access denied to video %s.', + + r'<!>Видеозапись недоступна, так как её автор был заблокирован.': + 'Video %s is no longer available, because its author has been blocked.', + + r'<!>This video is no longer available, because its author has been blocked.': + 'Video %s is no longer available, because its author has been blocked.', } for error_re, error_msg in ERRORS.items(): From c2521c1ac6bbd24cd5d01ba764f2d084b16c506f Mon Sep 17 00:00:00 2001 From: John Hawkinson <jhawk@mit.edu> Date: Sat, 4 Feb 2017 10:23:14 -0500 Subject: [PATCH 169/195] [Piksel] Add another app token regex --- youtube_dl/extractor/piksel.py | 43 ++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py index d44edcdfb..c0c276a50 100644 --- a/youtube_dl/extractor/piksel.py +++ b/youtube_dl/extractor/piksel.py @@ -16,18 +16,33 @@ from ..utils import ( class PikselIE(InfoExtractor): _VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)' - _TEST = { - 'url': 'http://player.piksel.com/v/nv60p12f', - 'md5': 'd9c17bbe9c3386344f9cfd32fad8d235', - 'info_dict': { - 'id': 'nv60p12f', - 'ext': 'mp4', - 'title': 'فن الحياة - الحلقة 1', - 'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور', - 'timestamp': 1465231790, - 'upload_date': '20160606', + _TESTS = [ + { + 'url': 'http://player.piksel.com/v/nv60p12f', + 'md5': 'd9c17bbe9c3386344f9cfd32fad8d235', + 'info_dict': { + 'id': 'nv60p12f', + 'ext': 'mp4', + 'title': 'فن الحياة - الحلقة 1', + 'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور', + 'timestamp': 1465231790, + 'upload_date': '20160606', + } + }, + { + # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al + 'url': 'https://player.piksel.com/v/v80kqp41', + 'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d', + 'info_dict': { + 'id': 'v80kqp41', + 'ext': 'mp4', + 'title': 'WAW- State of Washington vs. Donald J. Trump, et al', + 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.', + 'timestamp': 1486171129, + 'upload_date': '20170204', + } } - } + ] @staticmethod def _extract_url(webpage): @@ -40,8 +55,10 @@ class PikselIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - app_token = self._search_regex( - r'clientAPI\s*:\s*"([^"]+)"', webpage, 'app token') + app_token = self._search_regex([ + r'clientAPI\s*:\s*"([^"]+)"', + r'data-de-api-key\s*=\s*"([^"]+)"' + ], webpage, 'app token') response = self._download_json( 'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token, video_id, query={ From 31487eb9746123b7c4e28be7e48908773beab40c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 22:57:48 +0700 Subject: [PATCH 170/195] release 2017.02.04 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 1 + docs/supportedsites.md | 7 +++++-- youtube_dl/version.py | 2 +- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 8914569b6..11fd56038 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.04** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.01 +[debug] youtube-dl version 2017.02.04 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 947590b94..5323769d8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.04 Core + Add --playlist-random to shuffle playlists (#11889, #11901) diff --git a/README.md b/README.md index 2ee00f515..89876bd7a 100644 --- a/README.md +++ b/README.md @@ -182,6 +182,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo automatically resized from an initial value of SIZE. --playlist-reverse Download playlist videos in reverse order + --playlist-random Download playlist videos in random order --xattr-set-filesize Set file xattribute ytdl.filesize with expected file size (experimental) --hls-prefer-native Use the native HLS downloader instead of diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d900f5e12..50a339bc4 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -84,6 +84,7 @@ - **bambuser:channel** - **Bandcamp** - **Bandcamp:album** + - **bangumi.bilibili.com**: BiliBili番剧 - **bbc**: BBC - **bbc.co.uk**: BBC iPlayer - **bbc.co.uk:article**: BBC articles @@ -211,7 +212,8 @@ - **DRBonanza** - **Dropbox** - **DrTuber** - - **DRTV** + - **drtv** + - **drtv:live** - **Dumpert** - **dvtv**: http://video.aktualne.cz/ - **dw** @@ -247,6 +249,8 @@ - **fc2:embed** - **Fczenit** - **fernsehkritik.tv** + - **filmon** + - **filmon:channel** - **Firstpost** - **FiveTV** - **Flickr** @@ -703,7 +707,6 @@ - **Spiegeltv** - **Spike** - **Sport5** - - **SportBox** - **SportBoxEmbed** - **SportDeutschland** - **Sportschau** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0f9b6b703..376b31397 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.01' +__version__ = '2017.02.04' From 8e4041cf3f8e769ee2188f3db4747b7133ab5c2d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 4 Feb 2017 17:02:12 +0100 Subject: [PATCH 171/195] [radiocanada] fix extraction for toutv rtmp formats --- youtube_dl/extractor/radiocanada.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 321917ad0..3b40002a8 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -54,9 +54,8 @@ class RadioCanadaIE(InfoExtractor): raise ExtractorError('This video is DRM protected.', expected=True) device_types = ['ipad'] - if app_code != 'toutv': - device_types.append('flash') if not smuggled_data: + device_types.append('flash') device_types.append('android') formats = [] @@ -103,7 +102,7 @@ class RadioCanadaIE(InfoExtractor): continue f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url) protocol = determine_protocol({'url': f_url}) - formats.append({ + f = { 'format_id': '%s-%d' % (protocol, tbr), 'url': f_url, 'ext': 'flv' if protocol == 'rtmp' else ext, @@ -111,7 +110,14 @@ class RadioCanadaIE(InfoExtractor): 'width': int_or_none(url_e.get('width')), 'height': int_or_none(url_e.get('height')), 'tbr': tbr, - }) + } + mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url) + if mobj: + f.update({ + 'url': mobj.group('url') + mobj.group('auth'), + 'play_path': mobj.group('playpath'), + }) + formats.append(f) if protocol == 'rtsp': base_url = self._search_regex( r'rtsp://([^?]+)', f_url, 'base url', default=None) From 9db8f6c54021a9c809c8ae65a37544ad566ed159 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 23:21:07 +0700 Subject: [PATCH 172/195] [twitch:stream] Improve _VALID_URL (closes #11971) --- youtube_dl/extractor/twitch.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 1ca159a4d..bbba394b0 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -447,7 +447,14 @@ class TwitchHighlightsIE(TwitchVideosBaseIE): class TwitchStreamIE(TwitchBaseIE): IE_NAME = 'twitch:stream' - _VALID_URL = r'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?twitch\.tv/| + player\.twitch\.tv/\?.*?\bchannel= + ) + (?P<id>[^/#?]+) + ''' _TESTS = [{ 'url': 'http://www.twitch.tv/shroomztv', @@ -471,8 +478,25 @@ class TwitchStreamIE(TwitchBaseIE): }, { 'url': 'http://www.twitch.tv/miracle_doto#profile-0', 'only_matching': True, + }, { + 'url': 'https://player.twitch.tv/?channel=lotsofs', + 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return (False + if any(ie.suitable(url) for ie in ( + TwitchVideoIE, + TwitchChapterIE, + TwitchVodIE, + TwitchProfileIE, + TwitchAllVideosIE, + TwitchUploadsIE, + TwitchPastBroadcastsIE, + TwitchHighlightsIE)) + else super(TwitchStreamIE, cls).suitable(url)) + def _real_extract(self, url): channel_id = self._match_id(url) From 3144eccf551afc4c5e66e06de541c033e6f90681 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 23:22:28 +0700 Subject: [PATCH 173/195] [ChangeLog] Actualize --- ChangeLog | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 5323769d8..fe9cd3440 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,12 +1,38 @@ +version <unreleased> + +Extractors ++ [twitch:stream] Add support for player.twitch.tv (#11971) + + version 2017.02.04 Core + Add --playlist-random to shuffle playlists (#11889, #11901) +* [utils] Improve comments processing in js_to_json (#11947) +* [utils] Handle single-line comments in js_to_json +* [downloader/external:ffmpeg] Minimize the use of aac_adtstoasc filter Extractors ++ [piksel] Add another app token pattern (#11969) ++ [vk] Capture and output author blocked error message (#11965) ++ [turner] Fix secure HLS formats downloading with ffmpeg (#11358, #11373, + #11800) ++ [drtv] Add support for live and radio sections (#1827, #3427) +* [myspace] Fix extraction and extract HLS and HTTP formats ++ [youtube] Add format info for itag 325 and 328 +* [vine] Fix extraction (#11955) +- [sportbox] Remove extractor (#11954) ++ [filmon] Add support for filmon.com (#11187) + [infoq] Add audio only formats (#11565) +* [douyutv] Improve room id regular expression (#11931) +* [iprima] Fix extraction (#11920, #11896) * [youtube] Fix ytsearch when cookies are provided (#11924) +* [go] Relax video id regular expression (#11937) +* [facebook] Fix title extraction (#11941) ++ [youtube:playlist] Recognize TL playlists (#11945) + [bilibili] Support new Bangumi URLs (#11845) ++ [cbc:watch] Extract audio codec for audio only formats (#11893) ++ [elpais] Fix extraction for some URLs (#11765) version 2017.02.01 @@ -18,7 +44,6 @@ Extractors + [vimeo] Extract upload timestamp + [vimeo] Extract license (#8726, #11880) + [nrk:series] Add support for series (#11571, #11711) -+ [elpais] Fix extraction for some URLs (#11765) version 2017.01.31 From 7bccd5fc8ac35b1a3952522c0aa176c982f20206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 23:23:38 +0700 Subject: [PATCH 174/195] [ChangeLog] Actualize --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index fe9cd3440..76be8dbd9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,6 +2,7 @@ version <unreleased> Extractors + [twitch:stream] Add support for player.twitch.tv (#11971) +* [radiocanada] Fix extraction for toutv rtmp formats version 2017.02.04 From a713a86755ba864a7b765fd2ce9a5ac8a8f4cc63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 23:26:39 +0700 Subject: [PATCH 175/195] release 2017.02.04.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 11fd56038..15e7d4944 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.04** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.04.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.04.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.04 +[debug] youtube-dl version 2017.02.04.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 76be8dbd9..23a729559 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.04.1 Extractors + [twitch:stream] Add support for player.twitch.tv (#11971) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 376b31397..5dde47a26 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.04' +__version__ = '2017.02.04.1' From 3d2c2752c5cd70fc7f9cebe8c4683a1de626017d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 4 Feb 2017 18:18:03 +0100 Subject: [PATCH 176/195] [afreecatv] extract rtmp formats --- youtube_dl/extractor/afreecatv.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 4f6cdb8a2..e0a0f7c57 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -221,10 +221,23 @@ class AfreecaTVGlobalIE(AfreecaTVIE): s_url = s.get('purl') if not s_url: continue - # TODO: extract rtmp formats - if s.get('stype') == 'HLS': + stype = s.get('stype') + if stype == 'HLS': formats.extend(self._extract_m3u8_formats( - s_url, channel_id, 'mp4', fatal=False)) + s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False)) + elif stype == 'RTMP': + format_id = [stype] + label = s.get('label') + if label: + format_id.append(label) + formats.append({ + 'format_id': '-'.join(format_id), + 'url': s_url, + 'tbr': int_or_none(s.get('bps')), + 'height': int_or_none(s.get('brt')), + 'ext': 'flv', + 'rtmp_live': True, + }) self._sort_formats(formats) info.update({ From 49bd8d5e2e5c4de8c1c409adffc557cb198f7eee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Feb 2017 02:41:22 +0700 Subject: [PATCH 177/195] [travis] Add python 3.6 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index c74c9cc12..4833c76e9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,7 @@ python: - "3.3" - "3.4" - "3.5" + - "3.6" sudo: false script: nosetests test --verbose notifications: From 6fd138bed892ac8ae1714d64f4a53d8ea7a1d5bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Feb 2017 13:36:52 +0700 Subject: [PATCH 178/195] [sportbox] PEP 8 --- youtube_dl/extractor/sportbox.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 05a0b5a80..e7bd5bf91 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -4,11 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - js_to_json, - unified_strdate, -) +from ..utils import js_to_json class SportBoxEmbedIE(InfoExtractor): From 6ef3e65a7b244d5e432e764772177c7d48cab237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Feb 2017 13:37:27 +0700 Subject: [PATCH 179/195] [videopress] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 22 +++++++ youtube_dl/extractor/videopress.py | 99 ++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+) create mode 100644 youtube_dl/extractor/videopress.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 32420937c..cf608faee 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1095,6 +1095,7 @@ from .videomore import ( VideomoreSeasonIE, ) from .videopremium import VideoPremiumIE +from .videopress import VideoPressIE from .vidio import VidioIE from .vidme import ( VidmeIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a23486620..4156cf27d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -81,6 +81,7 @@ from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE from .openload import OpenloadIE +from .videopress import VideoPressIE class GenericIE(InfoExtractor): @@ -1473,6 +1474,21 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [TwentyMinutenIE.ie_key()], + }, + { + # VideoPress embed + 'url': 'https://en.support.wordpress.com/videopress/', + 'info_dict': { + 'id': 'OcobLTqC', + 'ext': 'm4v', + 'title': 'IMG_5786', + 'timestamp': 1435711927, + 'upload_date': '20150701', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [VideoPressIE.ie_key()], } # { # # TODO: find another test @@ -2438,6 +2454,12 @@ class GenericIE(InfoExtractor): return _playlist_from_matches( openload_urls, ie=OpenloadIE.ie_key()) + # Look for VideoPress embeds + videopress_urls = VideoPressIE._extract_urls(webpage) + if videopress_urls: + return _playlist_from_matches( + videopress_urls, ie=VideoPressIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/videopress.py b/youtube_dl/extractor/videopress.py new file mode 100644 index 000000000..049db25a5 --- /dev/null +++ b/youtube_dl/extractor/videopress.py @@ -0,0 +1,99 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import random +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + float_or_none, + parse_age_limit, + qualities, + try_get, + unified_timestamp, + urljoin, +) + + +class VideoPressIE(InfoExtractor): + _VALID_URL = r'https?://videopress\.com/embed/(?P<id>[\da-zA-Z]+)' + _TESTS = [{ + 'url': 'https://videopress.com/embed/kUJmAcSf', + 'md5': '706956a6c875873d51010921310e4bc6', + 'info_dict': { + 'id': 'kUJmAcSf', + 'ext': 'mp4', + 'title': 'VideoPress Demo', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 634.6, + 'timestamp': 1434983935, + 'upload_date': '20150622', + 'age_limit': 0, + }, + }, { + # 17+, requires birth_* params + 'url': 'https://videopress.com/embed/iH3gstfZ', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)', + webpage) + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id, + video_id, query={ + 'birth_month': random.randint(1, 12), + 'birth_day': random.randint(1, 31), + 'birth_year': random.randint(1950, 1995), + }) + + title = video['title'] + + def base_url(scheme): + return try_get( + video, lambda x: x['file_url_base'][scheme], compat_str) + + base_url = base_url('https') or base_url('http') + + QUALITIES = ('std', 'dvd', 'hd') + quality = qualities(QUALITIES) + + formats = [] + for format_id, f in video['files'].items(): + if not isinstance(f, dict): + continue + for ext, path in f.items(): + if ext in ('mp4', 'ogg'): + formats.append({ + 'url': urljoin(base_url, path), + 'format_id': '%s-%s' % (format_id, ext), + 'ext': determine_ext(path, ext), + 'quality': quality(format_id), + }) + original_url = try_get(video, lambda x: x['original'], compat_str) + if original_url: + formats.append({ + 'url': original_url, + 'format_id': 'original', + 'quality': len(QUALITIES), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': video.get('description'), + 'thumbnail': video.get('poster'), + 'duration': float_or_none(video.get('duration'), 1000), + 'timestamp': unified_timestamp(video.get('upload_date')), + 'age_limit': parse_age_limit(video.get('rating')), + 'formats': formats, + } From e4e50f60b1040a4b6aa8ecb9139f7d5de195f407 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 5 Feb 2017 21:41:08 +0800 Subject: [PATCH 180/195] [googledrive] Fix extraction on Python 3.6 Since Python 3.6, invalid escape sequences are deprecated. It's likely that there are invalid escape sequences somewhere on the webpage, so instead of unescaping the whole webpage, just unescape the URL. See https://bugs.python.org/issue27364. That change was designed for string literals, while it affects the 'unicode_escape' encoding as well. The code path is: str.decode('unicode_escape') codecs.unicode_escape_decode() PyUnicode_DecodeUnicodeEscape() --- ChangeLog | 6 ++++++ youtube_dl/extractor/googledrive.py | 9 +++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 23a729559..a0025ab91 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [googledrive] Fix extraction on Python 3.6 + + version 2017.02.04.1 Extractors diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 766fc26d0..fec36cbbb 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + lowercase_escape, ) @@ -13,12 +14,12 @@ class GoogleDriveIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})' _TESTS = [{ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', - 'md5': '881f7700aec4f538571fa1e0eed4a7b6', + 'md5': 'd109872761f7e7ecf353fa108c0dbe1e', 'info_dict': { 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ', 'ext': 'mp4', 'title': 'Big Buck Bunny.mp4', - 'duration': 46, + 'duration': 45, } }, { # video id is longer than 28 characters @@ -55,7 +56,7 @@ class GoogleDriveIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape') + 'http://docs.google.com/file/d/%s' % video_id, video_id) reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) if reason: @@ -74,7 +75,7 @@ class GoogleDriveIE(InfoExtractor): resolution = fmt.split('/')[1] width, height = resolution.split('x') formats.append({ - 'url': fmt_url, + 'url': lowercase_escape(fmt_url), 'format_id': fmt_id, 'resolution': resolution, 'width': int_or_none(width), From caf0f5f8b7d0854caaf6778fe3a646ee0d7668fe Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 5 Feb 2017 21:48:13 +0800 Subject: [PATCH 181/195] [iwara] Fix extraction (closes #11781) --- ChangeLog | 1 + youtube_dl/extractor/iwara.py | 41 +++++++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index a0025ab91..77286dbef 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [iwara] Fix extraction (#11781) * [googledrive] Fix extraction on Python 3.6 diff --git a/youtube_dl/extractor/iwara.py b/youtube_dl/extractor/iwara.py index 8d7e7f472..011274b02 100644 --- a/youtube_dl/extractor/iwara.py +++ b/youtube_dl/extractor/iwara.py @@ -3,14 +3,18 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse -from ..utils import remove_end +from ..utils import ( + int_or_none, + mimetype2ext, + remove_end, +) class IwaraIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD', - 'md5': '1d53866b2c514b23ed69e4352fdc9839', + # md5 is unstable 'info_dict': { 'id': 'amVwUl1EHpAD9RD', 'ext': 'mp4', @@ -23,17 +27,17 @@ class IwaraIE(InfoExtractor): 'info_dict': { 'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc', 'ext': 'mp4', - 'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4', + 'title': '[3D Hentai] Kyonyu × Genkai × Emaki Shinobi Girls.mp4', 'age_limit': 18, }, 'add_ie': ['GoogleDrive'], }, { 'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq', - 'md5': '1d85f1e5217d2791626cff5ec83bb189', + # md5 is unstable 'info_dict': { 'id': '6liAP9s2Ojc', 'ext': 'mp4', - 'age_limit': 0, + 'age_limit': 18, 'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)', 'description': 'md5:590c12c0df1443d833fbebe05da8c47a', 'upload_date': '20160910', @@ -52,9 +56,9 @@ class IwaraIE(InfoExtractor): # ecchi is 'sexy' in Japanese age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0 - entries = self._parse_html5_media_entries(url, webpage, video_id) + video_data = self._download_json('http://www.iwara.tv/api/video/%s' % video_id, video_id) - if not entries: + if not video_data: iframe_url = self._html_search_regex( r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage, 'iframe URL', group='url') @@ -67,11 +71,24 @@ class IwaraIE(InfoExtractor): title = remove_end(self._html_search_regex( r'<title>([^<]+)', webpage, 'title'), ' | Iwara') - info_dict = entries[0] - info_dict.update({ + formats = [] + for a_format in video_data: + format_id = a_format.get('resolution') + height = int_or_none(self._search_regex( + r'(\d+)p', format_id, 'height', default=None)) + formats.append({ + 'url': a_format['uri'], + 'format_id': format_id, + 'ext': mimetype2ext(a_format.get('mime')) or 'mp4', + 'height': height, + 'quality': 1 if format_id == 'Source' else 0, + }) + + self._sort_formats(formats) + + return { 'id': video_id, 'title': title, 'age_limit': age_limit, - }) - - return info_dict + 'formats': formats, + } From 2ab2c0d1f53f66614eda4fefb042e851e78097f0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 5 Feb 2017 22:30:13 +0800 Subject: [PATCH 182/195] [iwara] Add width (closes #11724) The heuristic is from #11724 --- youtube_dl/extractor/iwara.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/iwara.py b/youtube_dl/extractor/iwara.py index 011274b02..a7514fc80 100644 --- a/youtube_dl/extractor/iwara.py +++ b/youtube_dl/extractor/iwara.py @@ -81,6 +81,7 @@ class IwaraIE(InfoExtractor): 'format_id': format_id, 'ext': mimetype2ext(a_format.get('mime')) or 'mp4', 'height': height, + 'width': int_or_none(height / 9.0 * 16.0 if height else None), 'quality': 1 if format_id == 'Source' else 0, }) From 019f4c03717bfd2b887309e5a4c96ea82cbedf34 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 5 Feb 2017 22:47:04 +0800 Subject: [PATCH 183/195] [bandcamp] Fix extraction for incomplete albums Closes #11727 --- ChangeLog | 1 + youtube_dl/extractor/bandcamp.py | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 77286dbef..984191925 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [bandcamp] Fix extraction for incomplete albums (#11727) * [iwara] Fix extraction (#11781) * [googledrive] Fix extraction on Python 3.6 diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 88c590e98..056e06376 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -209,6 +209,15 @@ class BandcampAlbumIE(InfoExtractor): 'id': 'entropy-ep', }, 'playlist_mincount': 3, + }, { + # not all tracks have songs + 'url': 'https://insulters.bandcamp.com/album/we-are-the-plague', + 'info_dict': { + 'id': 'we-are-the-plague', + 'title': 'WE ARE THE PLAGUE', + 'uploader_id': 'insulters', + }, + 'playlist_count': 2, }] def _real_extract(self, url): @@ -217,12 +226,16 @@ class BandcampAlbumIE(InfoExtractor): album_id = mobj.group('album_id') playlist_id = album_id or uploader_id webpage = self._download_webpage(url, playlist_id) - tracks_paths = re.findall(r'