From 06e9363b7a21acf6a592780a706b0fdd6b5a2d4e Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Sun, 8 Jan 2017 22:27:28 +0530 Subject: [PATCH 01/93] [openload] Fix extraction (closes #10408) Just a minor fix for openload --- youtube_dl/extractor/openload.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 2ce9f3826..3d4ad7dca 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -64,16 +64,17 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True) ol_id = self._search_regex( - ']+id="[a-zA-Z0-9]+x"[^>]*>([0-9]+)', + ']+id="[^"]+"[^>]*>([0-9]+)', webpage, 'openload ID') - first_two_chars = int(float(ol_id[0:][:2])) + first_three_chars = int(float(ol_id[0:][:3])) + fifth_char = int(float(ol_id[3:5])) urlcode = '' - num = 2 + num = 5 while num < len(ol_id): - urlcode += compat_chr(int(float(ol_id[num:][:3])) - - first_two_chars * int(float(ol_id[num + 3:][:2]))) + urlcode += compat_chr(int(float(ol_id[num:][:3])) + + first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2]))) num += 5 video_url = 'https://openload.co/stream/' + urlcode From fb6a59205e3dc5bb1d37d50ac1161314c0d66cf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Jan 2017 23:55:55 +0700 Subject: [PATCH 02/93] [mixcloud] Fix extraction (closes #11674) --- youtube_dl/extractor/mixcloud.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 4ba2310fd..a24b3165a 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -16,7 +16,6 @@ from ..utils import ( clean_html, ExtractorError, OnDemandPagedList, - parse_count, str_to_int, ) @@ -36,7 +35,6 @@ class MixcloudIE(InfoExtractor): 'uploader_id': 'dholbach', 'thumbnail': r're:https?://.*\.jpg', 'view_count': int, - 'like_count': int, }, }, { 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', @@ -49,7 +47,6 @@ class MixcloudIE(InfoExtractor): 'uploader_id': 'gillespeterson', 'thumbnail': 're:https?://.*', 'view_count': int, - 'like_count': int, }, }, { 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', @@ -89,26 +86,18 @@ class MixcloudIE(InfoExtractor): song_url = play_info['stream_url'] - PREFIX = ( - r'm-play-on-spacebar[^>]+' - r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') - title = self._html_search_regex( - PREFIX + r'm-title="([^"]+)"', webpage, 'title') + title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title') thumbnail = self._proto_relative_url(self._html_search_regex( - PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', - fatal=False)) + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False)) uploader = self._html_search_regex( - PREFIX + r'm-owner-name="([^"]+)"', - webpage, 'uploader', fatal=False) + r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False) uploader_id = self._search_regex( r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) description = self._og_search_description(webpage) - like_count = parse_count(self._search_regex( - r'\bbutton-favorite[^>]+>.*?]+class=["\']toggle-number[^>]+>\s*([^<]+)', - webpage, 'like count', default=None)) view_count = str_to_int(self._search_regex( [r'([0-9,.]+)'], + r'/listeners/?">([0-9,.]+)', + r'm-tooltip=["\']([\d,.]+) plays'], webpage, 'play count', default=None)) return { @@ -120,7 +109,6 @@ class MixcloudIE(InfoExtractor): 'uploader': uploader, 'uploader_id': uploader_id, 'view_count': view_count, - 'like_count': like_count, } From 9837cb7507e0635755082a7fd2e748c4106fefc4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 13 Jan 2017 23:02:50 +0100 Subject: [PATCH 03/93] [ooyala] add support for videos with embedToken(#11684) --- youtube_dl/extractor/generic.py | 9 ++++++++- youtube_dl/extractor/ooyala.py | 14 +++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 86dc79307..ac29ec600 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1939,7 +1939,14 @@ class GenericIE(InfoExtractor): re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P.{32})[\'"]\)', webpage) or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P.{32})[\'"]', webpage)) if mobj is not None: - return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url})) + embed_token = self._search_regex( + r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)', + webpage, 'ooyala embed token', default=None) + return OoyalaIE._build_url_result(smuggle_url( + mobj.group('ec'), { + 'domain': url, + 'embed_token': embed_token, + })) # Look for multiple Ooyala embeds on SBN network websites mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index c2807d0f6..f00cf745b 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor): _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?' - def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None): + def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None, embed_token=None): content_tree = self._download_json(content_tree_url, video_id)['content_tree'] metadata = content_tree[list(content_tree)[0]] embed_code = metadata['embed_code'] @@ -29,7 +29,8 @@ class OoyalaBaseIE(InfoExtractor): self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + compat_urllib_parse_urlencode({ 'domain': domain, - 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds', + 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth', + 'embedToken': embed_token, }), video_id) cur_auth_data = auth_data['authorization_data'][embed_code] @@ -52,6 +53,12 @@ class OoyalaBaseIE(InfoExtractor): elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) + elif delivery_type == 'hds' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + s_url, embed_code, mpd_id='dash', fatal=False)) + elif delivery_type == 'smooth': + self._extract_ism_formats( + s_url, embed_code, ism_id='mss', fatal=False) elif ext == 'smil': formats.extend(self._extract_smil_formats( s_url, embed_code, fatal=False)) @@ -146,8 +153,9 @@ class OoyalaIE(OoyalaBaseIE): embed_code = self._match_id(url) domain = smuggled_data.get('domain') supportedformats = smuggled_data.get('supportedformats') + embed_token = smuggled_data.get('embed_token') content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code) - return self._extract(content_tree_url, embed_code, domain, supportedformats) + return self._extract(content_tree_url, embed_code, domain, supportedformats, embed_token) class OoyalaExternalIE(OoyalaBaseIE): From 5e8eebb6009ac3e9f7dfc803d8561174d207c1a2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 13 Jan 2017 23:06:07 +0100 Subject: [PATCH 04/93] [mitele] extract dash formats --- youtube_dl/extractor/mitele.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 8984d3b8d..79e0b8ada 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -190,7 +190,7 @@ class MiTeleIE(InfoExtractor): return { '_type': 'url_transparent', # for some reason only HLS is supported - 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}), + 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8,dash'}), 'id': video_id, 'title': title, 'description': description, From adf063dad1792f0c9c680d13ccd984b4ad60ac29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 06:17:03 +0700 Subject: [PATCH 05/93] [mtv,cc,cmt,spike] Improve and refactor - Eliminate _transform_rtmp_url * Generalize triforce mgid extraction + [cmt] Add support for full-episodes (closes #11623) --- youtube_dl/extractor/cmt.py | 25 ++++++------ youtube_dl/extractor/comedycentral.py | 17 +------- youtube_dl/extractor/mtv.py | 58 ++++++++++++++++++--------- youtube_dl/extractor/spike.py | 2 +- 4 files changed, 54 insertions(+), 48 deletions(-) diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index 7d3e9b0c9..6302b8d9c 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -1,13 +1,11 @@ from __future__ import unicode_literals from .mtv import MTVIE -from ..utils import ExtractorError class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P\d+)' - _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes)/(?P[^/]+)' _TESTS = [{ 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', @@ -35,15 +33,16 @@ class CMTIE(MTVIE): 'only_matching': True, }] - @classmethod - def _transform_rtmp_url(cls, rtmp_video_url): - if 'error_not_available.swf' in rtmp_video_url: - raise ExtractorError( - '%s said: video is not available' % cls.IE_NAME, expected=True) - - return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url) - def _extract_mgid(self, webpage): - return self._search_regex( + mgid = self._search_regex( r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P.+?)\1', - webpage, 'mgid', group='mgid') + webpage, 'mgid', group='mgid', default=None) + if not mgid: + mgid = self._extract_triforce_mgid(webpage) + return mgid + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mgid = self._extract_mgid(webpage) + return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 816e0bfb6..4cac29415 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -48,17 +48,8 @@ class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - - feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed') - feed = self._parse_json(feed_json, playlist_id) - zones = feed['manifest']['zones'] - - video_zone = zones['t2_lc_promo1'] - feed = self._download_json(video_zone['feed'], playlist_id) - mgid = feed['result']['data']['id'] - + mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1') videos_info = self._get_videos_info(mgid) - return videos_info @@ -94,12 +85,6 @@ class ToshIE(MTVServicesInfoExtractor): 'only_matching': True, }] - @classmethod - def _transform_rtmp_url(cls, rtmp_video_url): - new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url) - new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm') - return new_urls - class ComedyCentralTVIE(MTVServicesInfoExtractor): _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P[^/?#&]+)' diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 5250db212..00a980c7d 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -13,11 +13,11 @@ from ..utils import ( fix_xml_ampersands, float_or_none, HEADRequest, - NO_DEFAULT, RegexNotFoundError, sanitized_Request, strip_or_none, timeconvert, + try_get, unescapeHTML, update_url_query, url_basename, @@ -42,15 +42,6 @@ class MTVServicesInfoExtractor(InfoExtractor): # Remove the templates, like &device={device} return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) - # This was originally implemented for ComedyCentral, but it also works here - @classmethod - def _transform_rtmp_url(cls, rtmp_video_url): - m = re.match(r'^rtmpe?://.*?/(?Pgsp\..+?/.*)$', rtmp_video_url) - if not m: - return {'rtmp': rtmp_video_url} - base = 'http://viacommtvstrmfs.fplive.net/' - return {'http': base + m.group('finalid')} - def _get_feed_url(self, uri): return self._FEED_URL @@ -91,22 +82,28 @@ class MTVServicesInfoExtractor(InfoExtractor): if rendition.get('method') == 'hls': hls_url = rendition.find('./src').text formats.extend(self._extract_m3u8_formats( - hls_url, video_id, ext='mp4', entry_protocol='m3u8_native')) + hls_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls')) else: # fms try: _, _, ext = rendition.attrib['type'].partition('/') rtmp_video_url = rendition.find('./src').text + if 'error_not_available.swf' in rtmp_video_url: + raise ExtractorError( + '%s said: video is not available' % self.IE_NAME, + expected=True) if rtmp_video_url.endswith('siteunavail.png'): continue - new_urls = self._transform_rtmp_url(rtmp_video_url) formats.extend([{ - 'ext': 'flv' if new_url.startswith('rtmp') else ext, - 'url': new_url, - 'format_id': '-'.join(filter(None, [kind, rendition.get('bitrate')])), + 'ext': 'flv' if rtmp_video_url.startswith('rtmp') else ext, + 'url': rtmp_video_url, + 'format_id': '-'.join(filter(None, [ + 'rtmp' if rtmp_video_url.startswith('rtmp') else None, + rendition.get('bitrate')])), 'width': int(rendition.get('width')), 'height': int(rendition.get('height')), - } for kind, new_url in new_urls.items()]) + }]) except (KeyError, TypeError): raise ExtractorError('Invalid rendition field.') self._sort_formats(formats) @@ -212,7 +209,28 @@ class MTVServicesInfoExtractor(InfoExtractor): [self._get_video_info(item, use_hls) for item in idoc.findall('.//item')], playlist_title=title, playlist_description=description) - def _extract_mgid(self, webpage, default=NO_DEFAULT): + def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): + triforce_feed = self._parse_json(self._search_regex( + r'triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, + 'triforce feed', default='{}'), video_id, fatal=False) + + data_zone = self._search_regex( + r'data-zone=(["\'])(?P.+?_lc_promo.*?)\1', webpage, + 'data zone', default=data_zone, group='zone') + + feed_url = try_get( + triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'], + compat_str) + if not feed_url: + return + + feed = self._download_json(feed_url, video_id, fatal=False) + if not feed: + return + + return try_get(feed, lambda x: x['result']['data']['id'], compat_str) + + def _extract_mgid(self, webpage): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -232,7 +250,11 @@ class MTVServicesInfoExtractor(InfoExtractor): sm4_embed = self._html_search_meta( 'sm4:video:embed', webpage, 'sm4 embed', default='') mgid = self._search_regex( - r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default) + r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None) + + if not mgid: + mgid = self._extract_triforce_mgid(webpage) + return mgid def _real_extract(self, url): diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index abfee3ece..c59896a17 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -46,7 +46,7 @@ class SpikeIE(MTVServicesInfoExtractor): _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)') def _extract_mgid(self, webpage): - mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None) + mgid = super(SpikeIE, self)._extract_mgid(webpage) if mgid is None: url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id') video_type, episode_id = url_parts.split('/', 1) From e54fc0524ebf7e3ec02fbd22f00fce466c952791 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 06:23:24 +0700 Subject: [PATCH 06/93] [cmt] Add support for video-clips --- youtube_dl/extractor/cmt.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index 6302b8d9c..f6b794fb3 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -5,7 +5,7 @@ from .mtv import MTVIE class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes)/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes|video-clips)/(?P[^/]+)' _TESTS = [{ 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', @@ -31,6 +31,12 @@ class CMTIE(MTVIE): }, { 'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172', 'only_matching': True, + }, { + 'url': 'http://www.cmt.com/full-episodes/537qb3/nashville-the-wayfaring-stranger-season-5-ep-501', + 'only_matching': True, + }, { + 'url': 'http://www.cmt.com/video-clips/t9e4ci/nashville-juliette-in-2-minutes', + 'only_matching': True, }] def _extract_mgid(self, webpage): From 4f66c16f337f3b2250d369b56bc31cfd7de06f89 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Sat, 14 Jan 2017 00:26:11 +0100 Subject: [PATCH 07/93] [brightcove:legacy] Fix misplaced backslash in a regexp --- youtube_dl/extractor/brightcove.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index aa2923ccf..2e56d1df9 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -204,7 +204,7 @@ class BrightcoveLegacyIE(InfoExtractor): # // build Brightcove XML # } m = re.search( - r'''(?x)customBC.\createVideo\( + r'''(?x)customBC\.createVideo\( .*? # skipping width and height ["\'](?P\d+)["\']\s*,\s* # playerID ["\'](?PAQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters From 0b94510cd00d50ddda74ba0079f856650f24680e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 07:27:20 +0700 Subject: [PATCH 08/93] [ChangeLog] Actualize --- ChangeLog | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ChangeLog b/ChangeLog index f1e234507..0106a7ae8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +version + +Core ++ [common] Add ability to customize akamai manifest host ++ [utils] Add more date formats + +Extractors +- [mtv] Eliminate _transform_rtmp_url +* [mtv] Generalize triforce mgid extraction ++ [cmt] Add support for full episodes and video clips (#11623) ++ [mitele] Extract DASH formats ++ [ooyala] Add support for videos with embedToken (#11684) +* [mixcloud] Fix extraction (#11674) +* [openload] Fix extraction (#10408) +* [tv4] Improve extraction (#11698) +* [freesound] Fix and improve extraction (#11602) ++ [nick] Add support for beta.nick.com (#11655) +* [mtv,cc] Use HLS by default with native HLS downloader (#11641) +* [mtv] Fix non-HLS extraction + + version 2017.01.10 Extractors From 5d4c7daa49b8ff83aa6fb13b183f47d4427c6513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 07:31:07 +0700 Subject: [PATCH 09/93] release 2017.01.14 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 6a4c25680..a7bf2b90c 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.14** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.10 +[debug] youtube-dl version 2017.01.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 0106a7ae8..dba18d39b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.14 Core + [common] Add ability to customize akamai manifest host diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 214124722..17c6f9eb2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.10' +__version__ = '2017.01.14' From abe8cb763fd43ee2db09c73965f38db7db02559e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 08:30:00 +0700 Subject: [PATCH 10/93] [cbc] Improve playlist support (closes #11704) --- youtube_dl/extractor/cbc.py | 55 +++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 7c76ceac8..a291685bf 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -90,36 +90,49 @@ class CBCIE(InfoExtractor): }, }], 'skip': 'Geo-restricted to Canada', + }, { + # multiple CBC.APP.Caffeine.initInstance(...) + 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238', + 'info_dict': { + 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', + 'id': 'dog-indoor-exercise-winter-1.3928238', + }, + 'playlist_mincount': 6, }] @classmethod def suitable(cls, url): return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url) + def _extract_player_init(self, player_init, display_id): + player_info = self._parse_json(player_init, display_id, js_to_json) + media_id = player_info.get('mediaId') + if not media_id: + clip_id = player_info['clipId'] + feed = self._download_json( + 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, + clip_id, fatal=False) + if feed: + media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) + if not media_id: + media_id = self._download_json( + 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, + clip_id)['entries'][0]['id'].split('/')[-1] + return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - player_init = self._search_regex( - r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init', - default=None) - if player_init: - player_info = self._parse_json(player_init, display_id, js_to_json) - media_id = player_info.get('mediaId') - if not media_id: - clip_id = player_info['clipId'] - feed = self._download_json( - 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, - clip_id, fatal=False) - if feed: - media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) - if not media_id: - media_id = self._download_json( - 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, - clip_id)['entries'][0]['id'].split('/')[-1] - return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) - else: - entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r']+src="[^"]+?mediaId=(\d+)"', webpage)] - return self.playlist_result(entries) + entries = [ + self._extract_player_init(player_init, display_id) + for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)] + entries.extend([ + self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + for media_id in re.findall(r']+src="[^"]+?mediaId=(\d+)"', webpage)]) + return self.playlist_result( + entries, display_id, + self._og_search_title(webpage, fatal=False), + self._og_search_description(webpage)) class CBCPlayerIE(InfoExtractor): From 8854f3fe782e48f4b145eacf58cca533a9f9b199 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 08:30:00 +0700 Subject: [PATCH 11/93] [README.md] Clarify newline format in cookies section (closes #11709) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 905c1b73f..a606346b2 100644 --- a/README.md +++ b/README.md @@ -841,7 +841,7 @@ Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox). -Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. +Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, Mac OS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare). From 99d537a5e08499e20c3507c3f84048feacf77522 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 14 Jan 2017 07:12:31 +0100 Subject: [PATCH 12/93] [ooyala] fix typo --- youtube_dl/extractor/ooyala.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index f00cf745b..84be2b1e3 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -53,7 +53,7 @@ class OoyalaBaseIE(InfoExtractor): elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) - elif delivery_type == 'hds' or ext == 'mpd': + elif delivery_type == 'dash' or ext == 'mpd': formats.extend(self._extract_mpd_formats( s_url, embed_code, mpd_id='dash', fatal=False)) elif delivery_type == 'smooth': From b80e2ebc8daa1ec30396cfa69836f1d96d23028f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 18:27:22 +0700 Subject: [PATCH 13/93] [dramafever] Add support for URLs with language code (#11714) --- youtube_dl/extractor/dramafever.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 1edd8e7bd..bcd9fe2a0 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -66,7 +66,7 @@ class DramaFeverBaseIE(AMPIE): class DramaFeverIE(DramaFeverBaseIE): IE_NAME = 'dramafever' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+/[0-9]+)(?:/|$)' + _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+/[0-9]+)(?:/|$)' _TESTS = [{ 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/', 'info_dict': { @@ -103,6 +103,9 @@ class DramaFeverIE(DramaFeverBaseIE): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/', + 'only_matching': True, }] def _real_extract(self, url): @@ -148,7 +151,7 @@ class DramaFeverIE(DramaFeverBaseIE): class DramaFeverSeriesIE(DramaFeverBaseIE): IE_NAME = 'dramafever:series' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$' + _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$' _TESTS = [{ 'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/', 'info_dict': { From 621a2800ca259399c0c010a1cbc2c56aee90228c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 04:42:05 +0700 Subject: [PATCH 14/93] [vevo] Improve geo restriction detection --- youtube_dl/extractor/vevo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index d82261e5e..f0a8075fb 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -206,7 +206,7 @@ class VevoIE(VevoBaseIE): note='Retrieving oauth token', errnote='Unable to retrieve oauth token') - if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage: + if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage): self.raise_geo_restricted( '%s said: This page is currently unavailable in your region' % self.IE_NAME) From cd55c6ccd7b9cd0c48d475330c40f382eb0bc625 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Wed, 4 Jan 2017 01:51:08 +0300 Subject: [PATCH 15/93] [beam:live] Add extractor --- youtube_dl/extractor/beampro.py | 82 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 83 insertions(+) create mode 100644 youtube_dl/extractor/beampro.py diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py new file mode 100644 index 000000000..dc0a2b4af --- /dev/null +++ b/youtube_dl/extractor/beampro.py @@ -0,0 +1,82 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + compat_str, + int_or_none, + parse_iso8601, + try_get, +) + + +class BeamProLiveIE(InfoExtractor): + IE_NAME = 'Beam:live' + _VALID_URL = r'https?://(?:\w+.)?beam.pro/(?P[^?]+)$' + _API_CHANNEL = 'https://beam.pro/api/v1/channels/{0}' + _API_MANIFEST = 'https://beam.pro/api/v1/channels/{0}/manifest.m3u8' + _RATINGS = {'family': 0, 'teen': 13, '18+': 18} + + _TEST = { + 'url': 'http://www.beam.pro/niterhayven', + 'info_dict': { + 'id': '261562', + 'ext': 'mp4', + 'uploader': 'niterhayven', + 'timestamp': 1483477281, + 'age_limit': 18, + 'title': 'Introducing The Witcher 3 // The Grind Starts Now!', + 'thumbnail': r're:https://.*\.jpg$', + 'upload_date': '20170103', + 'uploader_id': 373396, + 'description': 'md5:0b161ac080f15fe05d18a07adb44a74d', + 'is_live': True, + }, + 'skip': 'niterhayven is offline', + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + chan_data = self._download_json(self._API_CHANNEL.format(channel_id), channel_id) + + if not chan_data.get('online'): + raise ExtractorError('{0} is offline'.format(channel_id), expected=True) + + formats = self._extract_m3u8_formats( + self._API_MANIFEST.format(chan_data.get('id')), channel_id, ext='mp4') + + self._sort_formats(formats) + info = {} + info['formats'] = formats + if chan_data: + info.update(self._extract_info(chan_data)) + if not info.get('title'): + info['title'] = self._live_title(channel_id) + if not info.get('id'): # barely possible but just in case + info['id'] = compat_str(abs(hash(channel_id)) % (10 ** 8)) + + return info + + def _extract_info(self, info): + thumbnail = try_get(info, lambda x: x['thumbnail']['url'], compat_str) + username = try_get(info, lambda x: x['user']['url'], compat_str) + video_id = compat_str(info['id']) if info.get('id') else None + rating = info.get('audience') + + return { + 'id': video_id, + 'title': info.get('name'), + 'description': clean_html(info.get('description')), + 'age_limit': self._RATINGS[rating] if rating in self._RATINGS else None, + 'is_live': True if info.get('online') else False, + 'timestamp': parse_iso8601(info.get('updatedAt')), + 'uploader': info.get('token') or username, + 'uploader_id': int_or_none(info.get('userId')), + 'view_count': int_or_none(info.get('viewersTotal')), + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5ba8efb0e..9d0610d21 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -88,6 +88,7 @@ from .bbc import ( BBCCoUkPlaylistIE, BBCIE, ) +from .beampro import BeamProLiveIE from .beeg import BeegIE from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE From af62de104f33ebf8b473b3f7935451077fa56ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 06:07:35 +0700 Subject: [PATCH 16/93] [beam:live] Improve and simplify (#10702, closes #11596) --- youtube_dl/extractor/beampro.py | 73 +++++++++++++++------------------ 1 file changed, 32 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index dc0a2b4af..f3a9e3278 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -14,25 +14,23 @@ from ..utils import ( class BeamProLiveIE(InfoExtractor): IE_NAME = 'Beam:live' - _VALID_URL = r'https?://(?:\w+.)?beam.pro/(?P[^?]+)$' - _API_CHANNEL = 'https://beam.pro/api/v1/channels/{0}' - _API_MANIFEST = 'https://beam.pro/api/v1/channels/{0}/manifest.m3u8' + _VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P[^/?#&]+)' _RATINGS = {'family': 0, 'teen': 13, '18+': 18} - _TEST = { 'url': 'http://www.beam.pro/niterhayven', 'info_dict': { 'id': '261562', 'ext': 'mp4', - 'uploader': 'niterhayven', - 'timestamp': 1483477281, - 'age_limit': 18, 'title': 'Introducing The Witcher 3 // The Grind Starts Now!', - 'thumbnail': r're:https://.*\.jpg$', - 'upload_date': '20170103', - 'uploader_id': 373396, 'description': 'md5:0b161ac080f15fe05d18a07adb44a74d', + 'thumbnail': r're:https://.*\.jpg$', + 'timestamp': 1483477281, + 'upload_date': '20170103', + 'uploader': 'niterhayven', + 'uploader_id': '373396', + 'age_limit': 18, 'is_live': True, + 'view_count': int, }, 'skip': 'niterhayven is offline', 'params': { @@ -41,42 +39,35 @@ class BeamProLiveIE(InfoExtractor): } def _real_extract(self, url): - channel_id = self._match_id(url) - chan_data = self._download_json(self._API_CHANNEL.format(channel_id), channel_id) + channel_name = self._match_id(url) - if not chan_data.get('online'): - raise ExtractorError('{0} is offline'.format(channel_id), expected=True) + chan = self._download_json( + 'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name) + + if chan.get('online') is False: + raise ExtractorError( + '{0} is offline'.format(channel_name), expected=True) + + channel_id = chan['id'] formats = self._extract_m3u8_formats( - self._API_MANIFEST.format(chan_data.get('id')), channel_id, ext='mp4') - + 'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id, + channel_name, ext='mp4', m3u8_id='hls', fatal=False) self._sort_formats(formats) - info = {} - info['formats'] = formats - if chan_data: - info.update(self._extract_info(chan_data)) - if not info.get('title'): - info['title'] = self._live_title(channel_id) - if not info.get('id'): # barely possible but just in case - info['id'] = compat_str(abs(hash(channel_id)) % (10 ** 8)) - return info - - def _extract_info(self, info): - thumbnail = try_get(info, lambda x: x['thumbnail']['url'], compat_str) - username = try_get(info, lambda x: x['user']['url'], compat_str) - video_id = compat_str(info['id']) if info.get('id') else None - rating = info.get('audience') + user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id']) return { - 'id': video_id, - 'title': info.get('name'), - 'description': clean_html(info.get('description')), - 'age_limit': self._RATINGS[rating] if rating in self._RATINGS else None, - 'is_live': True if info.get('online') else False, - 'timestamp': parse_iso8601(info.get('updatedAt')), - 'uploader': info.get('token') or username, - 'uploader_id': int_or_none(info.get('userId')), - 'view_count': int_or_none(info.get('viewersTotal')), - 'thumbnail': thumbnail, + 'id': compat_str(chan.get('id') or channel_name), + 'title': self._live_title(chan.get('name') or channel_name), + 'description': clean_html(chan.get('description')), + 'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str), + 'timestamp': parse_iso8601(chan.get('updatedAt')), + 'uploader': chan.get('token') or try_get( + chan, lambda x: x['user']['username'], compat_str), + 'uploader_id': compat_str(user_id) if user_id else None, + 'age_limit': self._RATINGS.get(chan.get('audience')), + 'is_live': True, + 'view_count': int_or_none(chan.get('viewersTotal')), + 'formats': formats, } From 6f0be937473c5d5f60cd8e712287fcee844093d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 06:09:32 +0700 Subject: [PATCH 17/93] [YoutubeDL] Improve protocol auto determining (closes #11720) --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5d654f55f..41d9a63ee 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1363,7 +1363,7 @@ class YoutubeDL(object): format['ext'] = determine_ext(format['url']).lower() # Automatically determine protocol if missing (useful for format # selection purposes) - if 'protocol' not in format: + if format.get('protocol') is None: format['protocol'] = determine_protocol(format) # Add HTTP headers, so that external programs can use them from the # json output From a7acf868a55b3d734bef564e3392020f18c20422 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 10:34:39 +0700 Subject: [PATCH 18/93] [yourupload] Fix extraction (closes #11601) --- youtube_dl/extractor/yourupload.py | 49 +++++++++++++----------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/yourupload.py b/youtube_dl/extractor/yourupload.py index 4ce327845..9fa772838 100644 --- a/youtube_dl/extractor/yourupload.py +++ b/youtube_dl/extractor/yourupload.py @@ -2,44 +2,37 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import urljoin class YourUploadIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)? - (?:yourupload\.com/watch| - embed\.yourupload\.com| - embed\.yucache\.net - )/(?P[A-Za-z0-9]+) - ''' - _TESTS = [ - { - 'url': 'http://yourupload.com/watch/14i14h', - 'md5': '5e2c63385454c557f97c4c4131a393cd', - 'info_dict': { - 'id': '14i14h', - 'ext': 'mp4', - 'title': 'BigBuckBunny_320x180.mp4', - 'thumbnail': r're:^https?://.*\.jpe?g', - } - }, - { - 'url': 'http://embed.yourupload.com/14i14h', - 'only_matching': True, - }, - { - 'url': 'http://embed.yucache.net/14i14h?client_file_id=803349', - 'only_matching': True, - }, - ] + _VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P[A-Za-z0-9]+)' + _TESTS = [{ + 'url': 'http://yourupload.com/watch/14i14h', + 'md5': '5e2c63385454c557f97c4c4131a393cd', + 'info_dict': { + 'id': '14i14h', + 'ext': 'mp4', + 'title': 'BigBuckBunny_320x180.mp4', + 'thumbnail': r're:^https?://.*\.jpe?g', + } + }, { + 'url': 'http://www.yourupload.com/embed/14i14h', + 'only_matching': True, + }, { + 'url': 'http://embed.yourupload.com/14i14h', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - embed_url = 'http://embed.yucache.net/{0:}'.format(video_id) + embed_url = 'http://www.yourupload.com/embed/%s' % video_id + webpage = self._download_webpage(embed_url, video_id) title = self._og_search_title(webpage) - video_url = self._og_search_video_url(webpage) + video_url = urljoin(embed_url, self._og_search_video_url(webpage)) thumbnail = self._og_search_thumbnail(webpage, default=None) return { From 8e4988f1a21184839dcd23d7133c250a43c5ea58 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 15 Jan 2017 22:10:57 +0800 Subject: [PATCH 19/93] [niconico] Remove codes for downloading anonymously Apparently Niconico now blocks playing without an account Closes #11170 --- youtube_dl/extractor/niconico.py | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index a104e33f8..7e6c594c8 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -7,7 +7,6 @@ import datetime from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -40,6 +39,7 @@ class NiconicoIE(InfoExtractor): 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', 'duration': 33, }, + 'skip': 'Requires an account', }, { # File downloaded with and without credentials are different, so omit # the md5 field @@ -55,6 +55,7 @@ class NiconicoIE(InfoExtractor): 'timestamp': 1304065916, 'duration': 209, }, + 'skip': 'Requires an account', }, { # 'video exists but is marked as "deleted" # md5 is unstable @@ -65,9 +66,10 @@ class NiconicoIE(InfoExtractor): 'description': 'deleted', 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>', 'upload_date': '20071224', - 'timestamp': 1198527840, # timestamp field has different value if logged in + 'timestamp': int, # timestamp field has different value if logged in 'duration': 304, }, + 'skip': 'Requires an account', }, { 'url': 'http://www.nicovideo.jp/watch/so22543406', 'info_dict': { @@ -79,7 +81,8 @@ class NiconicoIE(InfoExtractor): 'upload_date': '20140104', 'uploader': 'アニメロチャンネル', 'uploader_id': '312', - } + }, + 'skip': 'The viewing period of the video you were searching for has expired.', }] _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P(?:[a-z]{2})?[0-9]+)' @@ -134,23 +137,7 @@ class NiconicoIE(InfoExtractor): 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', video_id, 'Downloading flv info') else: - # Get external player info - ext_player_info = self._download_webpage( - 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id) - thumb_play_key = self._search_regex( - r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') - - # Get flv info - flv_info_data = compat_urllib_parse_urlencode({ - 'k': thumb_play_key, - 'v': video_id - }) - flv_info_request = sanitized_Request( - 'http://ext.nicovideo.jp/thumb_watch', flv_info_data, - {'Content-Type': 'application/x-www-form-urlencoded'}) - flv_info_webpage = self._download_webpage( - flv_info_request, video_id, - note='Downloading flv info', errnote='Unable to download flv info') + raise ExtractorError('Niconico videos now require logging in', expected=True) flv_info = compat_urlparse.parse_qs(flv_info_webpage) if 'url' not in flv_info: From dcae7b3fdc6e6812e78c8dba96d671ccf0ab068e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 15 Jan 2017 22:51:54 +0800 Subject: [PATCH 20/93] [niconico] Allow login via cookies Some codes are borrowed from #7968, which is by @jlhg Closes #7968 --- ChangeLog | 5 +++++ youtube_dl/extractor/niconico.py | 18 +++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index dba18d39b..029d13426 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version + +Extractors ++ [niconico] Support login via cookies (#7968) + version 2017.01.14 Core diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 7e6c594c8..8baac23e4 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -87,8 +87,6 @@ class NiconicoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P(?:[a-z]{2})?[0-9]+)' _NETRC_MACHINE = 'niconico' - # Determine whether the downloader used authentication to download video - _AUTHENTICATED = False def _real_initialize(self): self._login() @@ -112,8 +110,6 @@ class NiconicoIE(InfoExtractor): if re.search(r'(?i)

Log in error

', login_results) is not None: self._downloader.report_warning('unable to log in: bad username or password') return False - # Successful login - self._AUTHENTICATED = True return True def _real_extract(self, url): @@ -131,19 +127,19 @@ class NiconicoIE(InfoExtractor): 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, note='Downloading video info page') - if self._AUTHENTICATED: - # Get flv info - flv_info_webpage = self._download_webpage( - 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', - video_id, 'Downloading flv info') - else: - raise ExtractorError('Niconico videos now require logging in', expected=True) + # Get flv info + flv_info_webpage = self._download_webpage( + 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', + video_id, 'Downloading flv info') flv_info = compat_urlparse.parse_qs(flv_info_webpage) if 'url' not in flv_info: if 'deleted' in flv_info: raise ExtractorError('The video has been deleted.', expected=True) + elif 'closed' in flv_info: + raise ExtractorError('Niconico videos now require logging in', + expected=True) else: raise ExtractorError('Unable to find video URL') From 16e2c8f7710bffb462921dbc93adfa6274bd9334 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 16 Jan 2017 00:06:52 +0800 Subject: [PATCH 21/93] [brightcove] Recognize another player ID Closes #11688 --- ChangeLog | 1 + youtube_dl/extractor/brightcove.py | 2 +- youtube_dl/extractor/generic.py | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 029d13426..2e0ddd4f6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [brightcove] Recognize another player ID pattern (#11688) + [niconico] Support login via cookies (#7968) version 2017.01.14 diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 2e56d1df9..5c6e99da1 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -179,7 +179,7 @@ class BrightcoveLegacyIE(InfoExtractor): params = {} - playerID = find_param('playerID') + playerID = find_param('playerID') or find_param('playerId') if playerID is None: raise ExtractorError('Cannot find player ID') params['playerID'] = playerID diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ac29ec600..a3ac7d26b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -422,6 +422,26 @@ class GenericIE(InfoExtractor): 'skip_download': True, # m3u8 download }, }, + { + # Brightcove with alternative playerID key + 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html', + 'info_dict': { + 'id': 'nmeth.2062_SV1', + 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research', + }, + 'playlist': [{ + 'info_dict': { + 'id': '2228375078001', + 'ext': 'mp4', + 'title': 'nmeth.2062-sv1', + 'description': 'nmeth.2062-sv1', + 'timestamp': 1363357591, + 'upload_date': '20130315', + 'uploader': 'Nature Publishing Group', + 'uploader_id': '1964492299001', + }, + }], + }, # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', From 906420cae37ee3c2f48d23c3a4fa0543a66947d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 21:54:47 +0700 Subject: [PATCH 22/93] [limelight] Improve and make more robust (closes #11737) + Add support for direct http for videos hosted on video.llnw.net * Check handmade http URLs --- youtube_dl/extractor/limelight.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 905a0e85f..e635f3c4d 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -59,14 +59,26 @@ class LimelightBaseIE(InfoExtractor): format_id = 'rtmp' if stream.get('videoBitRate'): format_id += '-%d' % int_or_none(stream['videoBitRate']) - http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:] - urls.append(http_url) - http_fmt = fmt.copy() - http_fmt.update({ - 'url': http_url, - 'format_id': format_id.replace('rtmp', 'http'), - }) - formats.append(http_fmt) + http_format_id = format_id.replace('rtmp', 'http') + + CDN_HOSTS = ( + ('delvenetworks.com', 'cpl.delvenetworks.com'), + ('video.llnw.net', 's2.content.video.llnw.net'), + ) + for cdn_host, http_host in CDN_HOSTS: + if cdn_host not in rtmp.group('host').lower(): + continue + http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:]) + urls.append(http_url) + if self._is_valid_url(http_url, video_id, http_format_id): + http_fmt = fmt.copy() + http_fmt.update({ + 'url': http_url, + 'format_id': http_format_id, + }) + formats.append(http_fmt) + break + fmt.update({ 'url': rtmp.group('url'), 'play_path': rtmp.group('playpath'), From 0ce8c66fb05fefbe51ac1eca8d3ddbd561b38a54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 22:07:12 +0700 Subject: [PATCH 23/93] [options] Include custom conf in final argv (closes #11741) --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 0eb4924b6..0b8c1671d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -867,7 +867,7 @@ def parseOpts(overrideArguments=None): if '--ignore-config' not in system_conf: user_conf = _readUserConf() - argv = system_conf + user_conf + command_line_conf + argv = system_conf + user_conf + custom_conf + command_line_conf opts, args = parser.parse_args(argv) if opts.verbose: for conf_label, conf in ( From 79fc8496c6ab423d591f9ed1a41358d038242bbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:31:50 +0700 Subject: [PATCH 24/93] [xiami] Improve extraction (closes #11699) * Relax _VALID_URLs * Improve track metadata extraction --- youtube_dl/extractor/xiami.py | 53 +++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index 86abef257..d017e03de 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -16,7 +16,9 @@ class XiamiBaseIE(InfoExtractor): return webpage def _extract_track(self, track, track_id=None): - title = track['title'] + track_name = track.get('songName') or track.get('name') or track['subName'] + artist = track.get('artist') or track.get('artist_name') or track.get('singers') + title = '%s - %s' % (artist, track_name) if artist else track_name track_url = self._decrypt(track['location']) subtitles = {} @@ -31,9 +33,10 @@ class XiamiBaseIE(InfoExtractor): 'thumbnail': track.get('pic') or track.get('album_pic'), 'duration': int_or_none(track.get('length')), 'creator': track.get('artist', '').split(';')[0], - 'track': title, - 'album': track.get('album_name'), - 'artist': track.get('artist'), + 'track': track_name, + 'track_number': int_or_none(track.get('track')), + 'album': track.get('album_name') or track.get('title'), + 'artist': artist, 'subtitles': subtitles, } @@ -68,14 +71,14 @@ class XiamiBaseIE(InfoExtractor): class XiamiSongIE(XiamiBaseIE): IE_NAME = 'xiami:song' IE_DESC = '虾米音乐' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.xiami.com/song/1775610518', 'md5': '521dd6bea40fd5c9c69f913c232cb57e', 'info_dict': { 'id': '1775610518', 'ext': 'mp3', - 'title': 'Woman', + 'title': 'HONNE - Woman', 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', 'duration': 265, 'creator': 'HONNE', @@ -95,7 +98,7 @@ class XiamiSongIE(XiamiBaseIE): 'info_dict': { 'id': '1775256504', 'ext': 'mp3', - 'title': '悟空', + 'title': '戴荃 - 悟空', 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', 'duration': 200, 'creator': '戴荃', @@ -109,6 +112,26 @@ class XiamiSongIE(XiamiBaseIE): }, }, 'skip': 'Georestricted', + }, { + 'url': 'http://www.xiami.com/song/1775953850', + 'info_dict': { + 'id': '1775953850', + 'ext': 'mp3', + 'title': 'До Скону - Чума Пожирает Землю', + 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', + 'duration': 683, + 'creator': 'До Скону', + 'track': 'Чума Пожирает Землю', + 'track_number': 7, + 'album': 'Ад', + 'artist': 'До Скону', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.xiami.com/song/xLHGwgd07a1', + 'only_matching': True, }] def _real_extract(self, url): @@ -124,7 +147,7 @@ class XiamiPlaylistBaseIE(XiamiBaseIE): class XiamiAlbumIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:album' IE_DESC = '虾米音乐 - 专辑' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P[^/?#&]+)' _TYPE = '1' _TESTS = [{ 'url': 'http://www.xiami.com/album/2100300444', @@ -136,28 +159,34 @@ class XiamiAlbumIE(XiamiPlaylistBaseIE): }, { 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', 'only_matching': True, + }, { + 'url': 'http://www.xiami.com/album/URVDji2a506', + 'only_matching': True, }] class XiamiArtistIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:artist' IE_DESC = '虾米音乐 - 歌手' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P[^/?#&]+)' _TYPE = '2' - _TEST = { + _TESTS = [{ 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp', 'info_dict': { 'id': '2132', }, 'playlist_count': 20, 'skip': 'Georestricted', - } + }, { + 'url': 'http://www.xiami.com/artist/bC5Tk2K6eb99', + 'only_matching': True, + }] class XiamiCollectionIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:collection' IE_DESC = '虾米音乐 - 精选集' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P[^/?#&]+)' _TYPE = '3' _TEST = { 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr', From ddd53c392e0b3d3d2c62ba28117a9b07702c5bd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:42:04 +0700 Subject: [PATCH 25/93] [ChangeLog] Actualize --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2e0ddd4f6..ee59e120c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,22 @@ version +Core +* [options] Apply custom config to final composite configuration (#11741) +* [YoutubeDL] Improve protocol auto determining (#11720) + Extractors +* [xiami] Relax URL regular expressions +* [xiami] Improve track metadata extraction (#11699) ++ [limelight] Check hand-make direct HTTP links ++ [limelight] Add support for direct HTTP links at video.llnw.net (#11737) + [brightcove] Recognize another player ID pattern (#11688) + [niconico] Support login via cookies (#7968) +* [yourupload] Fix extraction (#11601) ++ [beam:live] Add support for beam.pro live streams (#10702, #11596) +* [vevo] Improve geo restriction detection ++ [dramafever] Add support for URLs with language code (#11714) +* [cbc] Improve playlist support (#11704) + version 2017.01.14 From c1c2fe2045911c310fd5d2eda7bbb53ad581d250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:44:04 +0700 Subject: [PATCH 26/93] release 2017.01.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a7bf2b90c..c04f6246a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.14 +[debug] youtube-dl version 2017.01.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index ee59e120c..f6d73f982 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.16 Core * [options] Apply custom config to final composite configuration (#11741) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 0f6c4ec0c..a3c76d5db 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -86,6 +86,7 @@ - **bbc.co.uk:article**: BBC articles - **bbc.co.uk:iplayer:playlist** - **bbc.co.uk:playlist** + - **Beam:live** - **Beatport** - **Beeg** - **BehindKink** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 17c6f9eb2..c20718dd6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.14' +__version__ = '2017.01.16' From c0bd51c090d617811f5e405294dce06f5871d717 Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi Date: Mon, 16 Jan 2017 22:19:52 +0300 Subject: [PATCH 27/93] [naver] Support tv.naver.com links --- youtube_dl/extractor/naver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 055070ff5..aba0a9a70 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -12,7 +12,7 @@ from ..utils import ( class NaverIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P\d+)' + _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P\d+)' _TESTS = [{ 'url': 'http://tvcast.naver.com/v/81652', From 8a5f0a6357746d293f7330e40a3cf5823b1b626d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 17 Jan 2017 21:19:57 +0700 Subject: [PATCH 28/93] [naver] Update tests for #11743 --- youtube_dl/extractor/naver.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index aba0a9a70..e8131333f 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -15,7 +15,7 @@ class NaverIE(InfoExtractor): _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P\d+)' _TESTS = [{ - 'url': 'http://tvcast.naver.com/v/81652', + 'url': 'http://tv.naver.com/v/81652', 'info_dict': { 'id': '81652', 'ext': 'mp4', @@ -24,7 +24,7 @@ class NaverIE(InfoExtractor): 'upload_date': '20130903', }, }, { - 'url': 'http://tvcast.naver.com/v/395837', + 'url': 'http://tv.naver.com/v/395837', 'md5': '638ed4c12012c458fefcddfd01f173cd', 'info_dict': { 'id': '395837', @@ -34,6 +34,9 @@ class NaverIE(InfoExtractor): 'upload_date': '20150519', }, 'skip': 'Georestricted', + }, { + 'url': 'http://tvcast.naver.com/v/81652', + 'only_matching': True, }] def _real_extract(self, url): From 136078966b2047b21e9784060cebdc893c643ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 17 Jan 2017 23:14:07 +0700 Subject: [PATCH 29/93] [imdb] Extend _VALID_URL (closes #11744) --- youtube_dl/extractor/imdb.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index f0fc8d49a..f95c00c73 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -13,7 +13,7 @@ from ..utils import ( class ImdbIE(InfoExtractor): IE_NAME = 'imdb' IE_DESC = 'Internet Movie Database trailers' - _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P\d+)' + _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-|videoplayer/)vi(?P\d+)' _TESTS = [{ 'url': 'http://www.imdb.com/video/imdb/vi2524815897', @@ -32,6 +32,9 @@ class ImdbIE(InfoExtractor): }, { 'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897', 'only_matching': True, + }, { + 'url': 'http://www.imdb.com/videoplayer/vi1562949145', + 'only_matching': True, }] def _real_extract(self, url): From 4e44598547b02d42aa628506245c40c3d633814e Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Mon, 9 Jan 2017 21:19:55 +0100 Subject: [PATCH 30/93] [20min] Fix extraction --- youtube_dl/extractor/twentymin.py | 37 ++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py index b721ecb0a..68d5a0cb5 100644 --- a/youtube_dl/extractor/twentymin.py +++ b/youtube_dl/extractor/twentymin.py @@ -13,10 +13,10 @@ class TwentyMinutenIE(InfoExtractor): _TESTS = [{ # regular video 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', - 'md5': 'b52d6bc6ea6398e6a38f12cfd418149c', + 'md5': 'e7264320db31eed8c38364150c12496e', 'info_dict': { 'id': '469148', - 'ext': 'flv', + 'ext': 'mp4', 'title': '85 000 Franken für 15 perfekte Minuten', 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' @@ -34,17 +34,29 @@ class TwentyMinutenIE(InfoExtractor): 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' }, 'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.', + }, { + # news article with video + 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'md5': '372917ba85ed969e176d287ae54b2f94', + 'info_dict': { + 'id': '523629', + 'display_id': 'So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'ext': 'mp4', + 'title': 'So kommen Sie bei Eis und Schnee sicher an', + 'description': 'Schneegestöber und Glatteis führten in den letzten Tagen zu zahlreichen Strassenunfällen. Ein Experte erklärt, worauf man nun beim Autofahren achten muss.', + 'thumbnail': 'http://www.20min.ch/images/content/2/7/0/27032552/83/teaserbreit.jpg', + } }, { # YouTube embed 'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184', - 'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f', + 'md5': 'e7e237fd98da2a3cc1422ce683df234d', 'info_dict': { 'id': 'ivM7A7SpDOs', 'ext': 'mp4', 'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016', 'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a', 'upload_date': '20160424', - 'uploader': 'RTVCM Castilla-La Mancha', + 'uploader': 'CMM Castilla-La Mancha Media', 'uploader_id': 'RTVCM', }, 'add_ie': ['Youtube'], @@ -77,18 +89,31 @@ class TwentyMinutenIE(InfoExtractor): r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') if not video_id: + params = self._html_search_regex( + r']+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"', + webpage, '20min embed URL') video_id = self._search_regex( - r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id') + r'.*videoId@(\d+)', + params, 'Video Id') description = self._html_search_meta( 'description', webpage, 'description') thumbnail = self._og_search_thumbnail(webpage) + formats = [] + format_preferences = [('sd', ''), ('hd', 'h')] + for format_id, url_extension in format_preferences: + format_url = 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, url_extension) + formats.append({ + 'format_id': format_id, + 'url': format_url, + }) + return { 'id': video_id, 'display_id': display_id, - 'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, + 'formats': formats, } From 538b17a09c6546d58babc5eb4a3abc08dcff2d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:05:11 +0700 Subject: [PATCH 31/93] [20min] Improve --- youtube_dl/extractor/twentymin.py | 122 ++++++++++++------------------ 1 file changed, 47 insertions(+), 75 deletions(-) diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py index 68d5a0cb5..4fd1aa4bf 100644 --- a/youtube_dl/extractor/twentymin.py +++ b/youtube_dl/extractor/twentymin.py @@ -4,116 +4,88 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import remove_end +from ..utils import ( + int_or_none, + try_get, +) class TwentyMinutenIE(InfoExtractor): IE_NAME = '20min' - _VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P\d+)|(?:[^/]+/)*(?P[^/#?]+))' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?20min\.ch/ + (?: + videotv/*\?.*?\bvid=| + videoplayer/videoplayer\.html\?.*?\bvideoId@ + ) + (?P\d+) + ''' _TESTS = [{ - # regular video 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', 'md5': 'e7264320db31eed8c38364150c12496e', 'info_dict': { 'id': '469148', 'ext': 'mp4', 'title': '85 000 Franken für 15 perfekte Minuten', - 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', - 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' - } - }, { - # news article with video - 'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469', - 'md5': 'cd4cbb99b94130cff423e967cd275e5e', - 'info_dict': { - 'id': '469408', - 'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469', - 'ext': 'flv', - 'title': '«Wir müssen mutig nach vorne schauen»', - 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.', - 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' + 'thumbnail': r're:https?://.*\.jpg$', }, - 'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.', }, { - # news article with video - 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', - 'md5': '372917ba85ed969e176d287ae54b2f94', + 'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629', 'info_dict': { 'id': '523629', - 'display_id': 'So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', 'ext': 'mp4', 'title': 'So kommen Sie bei Eis und Schnee sicher an', - 'description': 'Schneegestöber und Glatteis führten in den letzten Tagen zu zahlreichen Strassenunfällen. Ein Experte erklärt, worauf man nun beim Autofahren achten muss.', - 'thumbnail': 'http://www.20min.ch/images/content/2/7/0/27032552/83/teaserbreit.jpg', - } - }, { - # YouTube embed - 'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184', - 'md5': 'e7e237fd98da2a3cc1422ce683df234d', - 'info_dict': { - 'id': 'ivM7A7SpDOs', - 'ext': 'mp4', - 'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016', - 'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a', - 'upload_date': '20160424', - 'uploader': 'CMM Castilla-La Mancha Media', - 'uploader_id': 'RTVCM', + 'description': 'md5:117c212f64b25e3d95747e5276863f7d', + 'thumbnail': r're:https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, }, - 'add_ie': ['Youtube'], }, { 'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738', 'only_matching': True, - }, { - 'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411', - 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [m.group('url') for m in re.finditer( + r']+src=(["\'])(?P(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1', + webpage)] + def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id + video_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + video = self._download_json( + 'http://api.20min.ch/video/%s/show' % video_id, + video_id)['content'] - youtube_url = self._html_search_regex( - r']+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"', - webpage, 'YouTube embed URL', default=None) - if youtube_url is not None: - return self.url_result(youtube_url, 'Youtube') + title = video['title'] - title = self._html_search_regex( - r'

.*?(.+?)

', - webpage, 'title', default=None) - if not title: - title = remove_end(re.sub( - r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') + formats = [{ + 'format_id': format_id, + 'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p), + 'quality': quality, + } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])] + self._sort_formats(formats) - if not video_id: - params = self._html_search_regex( - r']+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"', - webpage, '20min embed URL') - video_id = self._search_regex( - r'.*videoId@(\d+)', - params, 'Video Id') + description = video.get('lead') + thumbnail = video.get('thumbnail') - description = self._html_search_meta( - 'description', webpage, 'description') - thumbnail = self._og_search_thumbnail(webpage) + def extract_count(kind): + return try_get( + video, + lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind])) - formats = [] - format_preferences = [('sd', ''), ('hd', 'h')] - for format_id, url_extension in format_preferences: - format_url = 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, url_extension) - formats.append({ - 'format_id': format_id, - 'url': format_url, - }) + like_count = extract_count('up') + dislike_count = extract_count('down') return { 'id': video_id, - 'display_id': display_id, 'title': title, 'description': description, 'thumbnail': thumbnail, + 'like_count': like_count, + 'dislike_count': dislike_count, 'formats': formats, } From b687c85eab942553e925256ad10de693227ba553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:08:31 +0700 Subject: [PATCH 32/93] [extractor/generic] Add support for 20 minuten embeds (closes #11683, closes #11751) --- youtube_dl/extractor/generic.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a3ac7d26b..154545df7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -78,6 +78,7 @@ from .vbox7 import Vbox7IE from .dbtv import DBTVIE from .piksel import PikselIE from .videa import VideaIE +from .twentymin import TwentyMinutenIE class GenericIE(InfoExtractor): @@ -1468,6 +1469,20 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 2, }, + { + # 20 minuten embed + 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'info_dict': { + 'id': '523629', + 'ext': 'mp4', + 'title': 'So kommen Sie bei Eis und Schnee sicher an', + 'description': 'md5:117c212f64b25e3d95747e5276863f7d', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [TwentyMinutenIE.ie_key()], + } # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2421,6 +2436,12 @@ class GenericIE(InfoExtractor): if videa_urls: return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key()) + # Look for 20 minuten embeds + twentymin_urls = TwentyMinutenIE._extract_urls(webpage) + if twentymin_urls: + return _playlist_from_matches( + twentymin_urls, ie=TwentyMinutenIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') From aaf2b7c57a3d2dc9ba12f1aa401cba088e114916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:20:11 +0700 Subject: [PATCH 33/93] [canalplus] Add fallback for video id (closes #11764) --- youtube_dl/extractor/canalplus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 10cf165bc..b3f76a7b1 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -107,7 +107,7 @@ class CanalplusIE(InfoExtractor): [r']+?videoId=(["\'])(?P\d+)', r'id=["\']canal_video_player(?P\d+)', r'data-video=["\'](?P\d+)'], - webpage, 'video id', group='id') + webpage, 'video id', default=mobj.group('vid'), group='id') info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) video_data = self._download_json(info_url, video_id, 'Downloading video JSON') From baa3e1845b26d9756642325bbb0d58e22025b2ec Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 18 Jan 2017 17:00:15 +0100 Subject: [PATCH 34/93] [bilibili] fix extraction(closes #11077) --- youtube_dl/extractor/bilibili.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 5051934ef..85ea5e6ee 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -34,8 +34,8 @@ class BiliBiliIE(InfoExtractor): }, } - _APP_KEY = '6f90a59ac58a4123' - _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326' + _APP_KEY = '84956560bc028eb7' + _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' def _real_extract(self, url): video_id = self._match_id(url) From 460f61fac42592eb273b7d58efc314cc83687b8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:06:46 +0700 Subject: [PATCH 35/93] [ChangeLog] Actualize --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index f6d73f982..994895edc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +version + +Extractors +* [bilibili] Fix extraction (#11077) ++ [canalplus] Add fallback for video id (#11764) +* [20min] Fix extraction (#11683, #11751) +* [imdb] Extend URL regular expression (#11744) ++ [naver] Add support for tv.naver.com links (#11743) + + version 2017.01.16 Core From 1560baacc677c43c1007acfc89b8190f81a59684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:10:00 +0700 Subject: [PATCH 36/93] release 2017.01.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c04f6246a..38cb13a33 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.16 +[debug] youtube-dl version 2017.01.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 994895edc..5aa4e3c6b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.18 Extractors * [bilibili] Fix extraction (#11077) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c20718dd6..669f60f65 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.16' +__version__ = '2017.01.18' From f1e70fc2ff6f1536873ed73ffc9bff63653fd5ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:34:11 +0700 Subject: [PATCH 37/93] [mtv] Relax triforce feed regex (closes #11766) --- youtube_dl/extractor/mtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 00a980c7d..e48ea2481 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -211,7 +211,7 @@ class MTVServicesInfoExtractor(InfoExtractor): def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): triforce_feed = self._parse_json(self._search_regex( - r'triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, + r'triforceManifestFeed\s*=\s*({.+?})\s*;\s*\n', webpage, 'triforce feed', default='{}'), video_id, fatal=False) data_zone = self._search_regex( From eb3f008c9e686f38c50511004d5c9a51b2e8cdd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 19 Jan 2017 04:49:31 +0700 Subject: [PATCH 38/93] [uol] Fix extraction (closes #11770) --- youtube_dl/extractor/uol.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/uol.py b/youtube_dl/extractor/uol.py index c27c64387..e67083004 100644 --- a/youtube_dl/extractor/uol.py +++ b/youtube_dl/extractor/uol.py @@ -84,12 +84,27 @@ class UOLIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - if not video_id.isdigit(): - embed_page = self._download_webpage('https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, video_id) - video_id = self._search_regex(r'mediaId=(\d+)', embed_page, 'media id') + media_id = None + + if video_id.isdigit(): + media_id = video_id + + if not media_id: + embed_page = self._download_webpage( + 'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, + video_id, 'Downloading embed page', fatal=False) + if embed_page: + media_id = self._search_regex( + (r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'), + embed_page, 'media id', default=None) + + if not media_id: + webpage = self._download_webpage(url, video_id) + media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id') + video_data = self._download_json( - 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % video_id, - video_id)['item'] + 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id, + media_id)['item'] title = video_data['title'] query = { @@ -118,7 +133,7 @@ class UOLIE(InfoExtractor): tags.append(tag_description) return { - 'id': video_id, + 'id': media_id, 'title': title, 'description': clean_html(video_data.get('desMedia')), 'thumbnail': video_data.get('thumbnail'), From cccd70a2752ad079ed560e42ff085adcabebaac2 Mon Sep 17 00:00:00 2001 From: james mike dupont Date: Thu, 19 Jan 2017 04:18:13 -0500 Subject: [PATCH 39/93] untie --- youtube_dl/extractor/flipagram.py | 2 +- youtube_dl/extractor/vimeo.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py index 1902a2393..b7be40f1b 100644 --- a/youtube_dl/extractor/flipagram.py +++ b/youtube_dl/extractor/flipagram.py @@ -81,7 +81,7 @@ class FlipagramIE(InfoExtractor): 'filesize': int_or_none(cover.get('size')), } for cover in flipagram.get('covers', []) if cover.get('url')] - # Note that this only retrieves comments that are initally loaded. + # Note that this only retrieves comments that are initially loaded. # For videos with large amounts of comments, most won't be retrieved. comments = [] for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []): diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 2e98b0e6f..add753635 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -338,7 +338,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'expected_warnings': ['Unable to download JSON metadata'], }, { - # redirects to ondemand extractor and should be passed throught it + # redirects to ondemand extractor and should be passed through it # for successful extraction 'url': 'https://vimeo.com/73445910', 'info_dict': { From 1fe84be0f3b36822af804db6cf7c06a1ac5ac688 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 Jan 2017 00:47:04 +0700 Subject: [PATCH 40/93] [1tv] Add support for hls (closes #11786) --- youtube_dl/extractor/firsttv.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index c6fb67057..081c71842 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -86,18 +86,43 @@ class FirstTVIE(InfoExtractor): title = item['title'] quality = qualities(QUALITIES) formats = [] + path = None for f in item.get('mbr', []): src = f.get('src') if not src or not isinstance(src, compat_str): continue tbr = int_or_none(self._search_regex( r'_(\d{3,})\.mp4', src, 'tbr', default=None)) + if not path: + path = self._search_regex( + r'//[^/]+/(.+?)_\d+\.mp4', src, + 'm3u8 path', default=None) formats.append({ 'url': src, 'format_id': f.get('name'), 'tbr': tbr, - 'quality': quality(f.get('name')), + 'source_preference': quality(f.get('name')), }) + # m3u8 URL format is reverse engineered from [1] (search for + # master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru) + # is taken from [2]. + # 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted + # 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834 + if not path and len(formats) == 1: + path = self._search_regex( + r'//[^/]+/(.+?$)', formats[0]['url'], + 'm3u8 path', default=None) + if path: + if len(formats) == 1: + m3u8_path = ',' + else: + tbrs = [compat_str(t) for t in sorted(f['tbr'] for f in formats)] + m3u8_path = '_,%s,%s' % (','.join(tbrs), '.mp4') + formats.extend(self._extract_m3u8_formats( + 'http://balancer-vod.1tv.ru/%s%s.urlset/master.m3u8' + % (path, m3u8_path), + display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) self._sort_formats(formats) thumbnail = item.get('poster') or self._og_search_thumbnail(webpage) From d77ac737900eede5e1508b9822e71c8595fe0879 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 21:59:24 +0800 Subject: [PATCH 41/93] [ustream] Add UstreamIE._extract_url() Ref: #11547 --- youtube_dl/extractor/generic.py | 8 ++++---- youtube_dl/extractor/ustream.py | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 154545df7..a7c104845 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -79,6 +79,7 @@ from .dbtv import DBTVIE from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE +from .ustream import UstreamIE class GenericIE(InfoExtractor): @@ -2112,10 +2113,9 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'TED') # Look for embedded Ustream videos - mobj = re.search( - r']+?src=(["\'])(?Phttp://www\.ustream\.tv/embed/.+?)\1', webpage) - if mobj is not None: - return self.url_result(mobj.group('url'), 'Ustream') + ustream_url = UstreamIE._extract_url(webpage) + if ustream_url: + return self.url_result(ustream_url, UstreamIE.ie_key()) # Look for embedded arte.tv player mobj = re.search( diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 0c06bf36b..5737d4d16 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -69,6 +69,13 @@ class UstreamIE(InfoExtractor): }, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+?src=(["\'])(?Phttp://www\.ustream\.tv/embed/.+?)\1', webpage) + if mobj is not None: + return mobj.group('url') + def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None): def num_to_hex(n): return hex(n)[2:] From 4447fb23320b9214ab3188717794d00b18887617 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:11:43 +0800 Subject: [PATCH 42/93] [cspan] Support Ustream embedded videos Closes #11547 --- ChangeLog | 6 ++++++ youtube_dl/extractor/cspan.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5aa4e3c6b..217971ec6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [cspan] Support Ustream embedded videos (#11547) + + version 2017.01.18 Extractors diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 7e5d4f227..92a827a4b 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -12,6 +12,7 @@ from ..utils import ( ExtractorError, ) from .senateisvp import SenateISVPIE +from .ustream import UstreamIE class CSpanIE(InfoExtractor): @@ -57,12 +58,30 @@ class CSpanIE(InfoExtractor): 'params': { 'skip_download': True, # m3u8 downloads } + }, { + # Ustream embedded video + 'url': 'https://www.c-span.org/video/?114917-1/armed-services', + 'info_dict': { + 'id': '58428542', + 'ext': 'flv', + 'title': 'USHR07 Armed Services Committee', + 'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee', + 'timestamp': 1423060374, + 'upload_date': '20150204', + 'uploader': 'HouseCommittee', + 'uploader_id': '12987475', + }, }] def _real_extract(self, url): video_id = self._match_id(url) video_type = None webpage = self._download_webpage(url, video_id) + + ustream_url = UstreamIE._extract_url(webpage) + if ustream_url: + return self.url_result(ustream_url, UstreamIE.ie_key()) + # We first look for clipid, because clipprog always appears before patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] results = list(filter(None, (re.search(p, webpage) for p in patterns))) From 972efe60c3fdaff83f9b8e7a637ee81f4c27bb64 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:13:54 +0800 Subject: [PATCH 43/93] [generic] Remove a dead test The web page does not contain a video anymore Ref: #2694, #2696 --- youtube_dl/extractor/generic.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a7c104845..40201f311 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -589,17 +589,6 @@ class GenericIE(InfoExtractor): 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9', } }, - # Embedded Ustream video - { - 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm', - 'md5': '27b99cdb639c9b12a79bca876a073417', - 'info_dict': { - 'id': '45734260', - 'ext': 'flv', - 'uploader': 'AU SPA: The NSA and Privacy', - 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman' - } - }, # nowvideo embed hidden behind percent encoding { 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/', From f3c21cb7a7e2d8685f466368e3142739077498cf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:25:20 +0800 Subject: [PATCH 44/93] [cspan] Fix _TESTS --- youtube_dl/extractor/cspan.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 92a827a4b..d4576160b 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -23,14 +23,13 @@ class CSpanIE(InfoExtractor): 'md5': '94b29a4f131ff03d23471dd6f60b6a1d', 'info_dict': { 'id': '315139', - 'ext': 'mp4', 'title': 'Attorney General Eric Holder on Voting Rights Act Decision', - 'description': 'Attorney General Eric Holder speaks to reporters following the Supreme Court decision in [Shelby County v. Holder], in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced.', }, + 'playlist_mincount': 2, 'skip': 'Regularly fails on travis, for unknown reasons', }, { 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models', - 'md5': '8e5fbfabe6ad0f89f3012a7943c1287b', + # md5 is unstable 'info_dict': { 'id': 'c4486943', 'ext': 'mp4', @@ -39,14 +38,11 @@ class CSpanIE(InfoExtractor): } }, { 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall', - 'md5': '2ae5051559169baadba13fc35345ae74', 'info_dict': { 'id': '342759', - 'ext': 'mp4', 'title': 'General Motors Ignition Switch Recall', - 'duration': 14848, - 'description': 'md5:118081aedd24bf1d3b68b3803344e7f3' }, + 'playlist_mincount': 6, }, { # Video from senate.gov 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', From f4ec8dce481564589419e4dffc45437211daa13f Mon Sep 17 00:00:00 2001 From: Iulian Onofrei Date: Fri, 20 Jan 2017 18:25:04 +0200 Subject: [PATCH 45/93] Update README.md (#11787) Add audio format argument dependency warning --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 0b8c1671d..0d2ce8d15 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -751,7 +751,7 @@ def parseOpts(overrideArguments=None): help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') postproc.add_option( '--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default') + help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default; No effect without -x') postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5', From 12afdc2ad617dedfd7d60654b8c57b99604332ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 Jan 2017 18:10:32 +0700 Subject: [PATCH 46/93] [youtube] Extract episode metadata (closes #9695, closes #11774) --- youtube_dl/extractor/youtube.py | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e6b840735..63597dd16 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -864,6 +864,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, + { + # YouTube Red video with episode data + 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4', + 'info_dict': { + 'id': 'iqKdEhx-dD4', + 'ext': 'mp4', + 'title': 'Isolation - Mind Field (Ep 1)', + 'description': 'md5:3a72f23c086a1496c9e2c54a25fa0822', + 'upload_date': '20170118', + 'uploader': 'Vsauce', + 'uploader_id': 'Vsauce', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce', + 'license': 'Standard YouTube License', + 'series': 'Mind Field', + 'season_number': 1, + 'episode_number': 1, + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': [ + 'Skipping DASH manifest', + ], + }, { # itag 212 'url': '1t24XAntNCY', @@ -1454,6 +1478,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: video_alt_title = video_creator = None + m_episode = re.search( + r']+id="watch7-headline"[^>]*>\s*]*>.*?>(?P[^<]+)\s*S(?P\d+)\s*•\s*E(?P\d+)', + video_webpage) + if m_episode: + series = m_episode.group('series') + season_number = int(m_episode.group('season')) + episode_number = int(m_episode.group('episode')) + else: + series = season_number = episode_number = None + m_cat_container = self._search_regex( r'(?s)]*>\s*Category\s*\s*]*>(.*?)', video_webpage, 'categories', default=None) @@ -1743,6 +1777,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'is_live': is_live, 'start_time': start_time, 'end_time': end_time, + 'series': series, + 'season_number': season_number, + 'episode_number': episode_number, } From 04a3d4d23472ffa4a482d8ebf2d8fdbb3e974327 Mon Sep 17 00:00:00 2001 From: ha shao Date: Sat, 21 Jan 2017 15:47:39 +0800 Subject: [PATCH 47/93] [vimeo:channel] Extract videos' titles for playlist entries --- youtube_dl/extractor/vimeo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index add753635..a6bbd4c05 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -730,12 +730,12 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): # Try extracting href first since not all videos are available via # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729) clips = re.findall( - r'id="clip_(\d+)"[^>]*>\s*]+href="(/(?:[^/]+/)*\1)', webpage) + r'id="clip_(\d+)"[^>]*>\s*]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage) if clips: - for video_id, video_url in clips: + for video_id, video_url, video_title in clips: yield self.url_result( compat_urlparse.urljoin(base_url, video_url), - VimeoIE.ie_key(), video_id=video_id) + VimeoIE.ie_key(), video_id=video_id, video_title=video_title) # More relaxed fallback else: for video_id in re.findall(r'id=["\']clip_(\d+)', webpage): From 7c20b7484cc91a4818a98ca8d5b7ef94d5c38fb8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 22 Jan 2017 02:06:34 +0800 Subject: [PATCH 48/93] [nextmedia] Support redirected URLs --- ChangeLog | 1 + youtube_dl/extractor/nextmedia.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 217971ec6..00c8a063f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [nextmedia] Support redirected URLs + [cspan] Support Ustream embedded videos (#11547) diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index c900f232a..626ed8b49 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import parse_iso8601 @@ -30,6 +31,12 @@ class NextMediaIE(InfoExtractor): return self._extract_from_nextmedia_page(news_id, url, page) def _extract_from_nextmedia_page(self, news_id, url, page): + redirection_url = self._search_regex( + r'window\.location\.href\s*=\s*([\'"])(?P(?!\1).+)\1', + page, 'redirection URL', default=None, group='url') + if redirection_url: + return self.url_result(compat_urlparse.urljoin(url, redirection_url)) + title = self._fetch_title(page) video_url = self._search_regex(self._URL_PATTERN, page, 'video url') @@ -93,7 +100,7 @@ class NextMediaActionNewsIE(NextMediaIE): class AppleDailyIE(NextMediaIE): IE_DESC = '臺灣蘋果日報' - _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' + _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', @@ -157,6 +164,10 @@ class AppleDailyIE(NextMediaIE): }, { 'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/', 'only_matching': True, + }, { + # Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694 + 'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694', + 'only_matching': True, }] _URL_PATTERN = r'\{url: \'(.+)\'\}' From e84495cd8d7bdb89bbfe233263bd8ad0b448f8cc Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Sat, 21 Jan 2017 15:23:26 +0100 Subject: [PATCH 49/93] [azmedien] Add extractor (closes #11785) --- youtube_dl/extractor/azmedientv.py | 87 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 4 ++ 2 files changed, 91 insertions(+) create mode 100644 youtube_dl/extractor/azmedientv.py diff --git a/youtube_dl/extractor/azmedientv.py b/youtube_dl/extractor/azmedientv.py new file mode 100644 index 000000000..51d46fb94 --- /dev/null +++ b/youtube_dl/extractor/azmedientv.py @@ -0,0 +1,87 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .kaltura import KalturaIE +from ..utils import get_element_by_class + + +class AZMedienTVIE(InfoExtractor): + IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch videos' + _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+(?:/[0-9]+-segment-(?:[^/#]+#)?|#)|#)(?P[^#]+)' + + _TESTS = [{ + # URL with 'segment' + 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', + 'md5': 'fda85ada1299cee517a622bfbc5f6b66', + 'info_dict': { + 'id': '1_2444peh4', + 'ext': 'mov', + 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', + 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', + 'uploader_id': 'TeleZ?ri', + 'upload_date': '20161218', + 'timestamp': 1482084490, + } + }, { + # URL with 'segment' and fragment: + 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', + 'only_matching': True + }, { + # URL with 'episode' and fragment: + 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', + 'only_matching': True + }, { + # URL with 'show' and fragment: + 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + kaltura_partner_id = self._html_search_regex( + r']+src=["\']https?://www\.kaltura\.com/.*/partner_id/([0-9]+)', + webpage, 'Kaltura partner ID') + kaltura_entry_id = self._html_search_regex( + r']+data-id=["\'](.*?)["\'][^>]+data-slug=["\']%s' % video_id, + webpage, 'Kaltura entry ID') + + return self.url_result( + 'kaltura:%s:%s' % (kaltura_partner_id, kaltura_entry_id), + ie=KalturaIE.ie_key()) + + +class AZMedienTVShowIE(InfoExtractor): + IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch shows' + _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' + + _TESTS = [{ + # URL with 'episode': + 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'info_dict': { + 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'title': 'News', + }, + 'playlist_count': 9, + }, { + # URL with 'show' only: + 'url': 'http://www.telezueri.ch/86-show-talktaeglich', + 'only_matching': True + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + webpage = self._download_webpage(url, show_id) + + title = get_element_by_class('title-block-cell', webpage) + if title: + title = title.strip() + + entries = [self.url_result(m.group('url'), ie=AZMedienTVIE.ie_key()) for m in re.finditer( + r']+data-real=["\'](?P.+?)["\']', webpage)] + + return self.playlist_result( + entries, show_id, title) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9d0610d21..4cfb3c70f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -77,6 +77,10 @@ from .awaan import ( AWAANLiveIE, AWAANSeasonIE, ) +from .azmedientv import ( + AZMedienTVIE, + AZMedienTVShowIE, +) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE From 94629e537f2f6ed80b19e3863456f9ba8073af36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Jan 2017 02:15:20 +0700 Subject: [PATCH 50/93] [azmedien] Improve (closes #11784) --- youtube_dl/extractor/azmedien.py | 132 +++++++++++++++++++++++++++++ youtube_dl/extractor/azmedientv.py | 87 ------------------- youtube_dl/extractor/extractors.py | 6 +- 3 files changed, 135 insertions(+), 90 deletions(-) create mode 100644 youtube_dl/extractor/azmedien.py delete mode 100644 youtube_dl/extractor/azmedientv.py diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py new file mode 100644 index 000000000..059dc6e4b --- /dev/null +++ b/youtube_dl/extractor/azmedien.py @@ -0,0 +1,132 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .kaltura import KalturaIE +from ..utils import ( + get_element_by_class, + strip_or_none, +) + + +class AZMedienBaseIE(InfoExtractor): + def _kaltura_video(self, partner_id, entry_id): + return self.url_result( + 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(), + video_id=entry_id) + + +class AZMedienIE(AZMedienBaseIE): + IE_DESC = 'AZ Medien videos' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + telezueri\.ch| + telebaern\.tv| + telem1\.ch + )/ + [0-9]+-show-[^/\#]+ + (?: + /[0-9]+-episode-[^/\#]+ + (?: + /[0-9]+-segment-(?:[^/\#]+\#)?| + \# + )| + \# + ) + (?P[^\#]+) + ''' + + _TESTS = [{ + # URL with 'segment' + 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', + 'info_dict': { + 'id': '1_2444peh4', + 'ext': 'mov', + 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', + 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', + 'uploader_id': 'TeleZ?ri', + 'upload_date': '20161218', + 'timestamp': 1482084490, + }, + 'params': { + 'skip_download': True, + }, + }, { + # URL with 'segment' and fragment: + 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', + 'only_matching': True + }, { + # URL with 'episode' and fragment: + 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', + 'only_matching': True + }, { + # URL with 'show' and fragment: + 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + partner_id = self._search_regex( + r']+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)', + webpage, 'kaltura partner id') + entry_id = self._html_search_regex( + r']+data-id=(["\'])(?P(?:(?!\1).)+)\1[^>]+data-slug=["\']%s' + % re.escape(video_id), webpage, 'kaltura entry id', group='id') + + return self._kaltura_video(partner_id, entry_id) + + +class AZMedienShowIE(AZMedienBaseIE): + IE_DESC = 'AZ Medien shows' + _VALID_URL = r'https?://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' + + _TESTS = [{ + # URL with 'episode' + 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'info_dict': { + 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'title': 'News - Donnerstag, 15. Dezember 2016', + }, + 'playlist_count': 9, + }, { + # URL with 'show' only + 'url': 'http://www.telezueri.ch/86-show-talktaeglich', + 'only_matching': True + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + webpage = self._download_webpage(url, show_id) + + entries = [] + + partner_id = self._search_regex( + r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)', + webpage, 'kaltura partner id', default=None) + + if partner_id: + entries = [ + self._kaltura_video(partner_id, m.group('id')) + for m in re.finditer( + r'data-id=(["\'])(?P(?:(?!\1).)+)\1', webpage)] + + if not entries: + entries = [ + self.url_result(m.group('url'), ie=AZMedienIE.ie_key()) + for m in re.finditer( + r']+data-real=(["\'])(?Phttp.+?)\1', webpage)] + + title = self._search_regex( + r'episodeShareTitle\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'title', + default=strip_or_none(get_element_by_class( + 'title-block-cell', webpage)), group='title') + + return self.playlist_result(entries, show_id, title) diff --git a/youtube_dl/extractor/azmedientv.py b/youtube_dl/extractor/azmedientv.py deleted file mode 100644 index 51d46fb94..000000000 --- a/youtube_dl/extractor/azmedientv.py +++ /dev/null @@ -1,87 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from .kaltura import KalturaIE -from ..utils import get_element_by_class - - -class AZMedienTVIE(InfoExtractor): - IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch videos' - _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+(?:/[0-9]+-segment-(?:[^/#]+#)?|#)|#)(?P<id>[^#]+)' - - _TESTS = [{ - # URL with 'segment' - 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', - 'md5': 'fda85ada1299cee517a622bfbc5f6b66', - 'info_dict': { - 'id': '1_2444peh4', - 'ext': 'mov', - 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', - 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', - 'uploader_id': 'TeleZ?ri', - 'upload_date': '20161218', - 'timestamp': 1482084490, - } - }, { - # URL with 'segment' and fragment: - 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', - 'only_matching': True - }, { - # URL with 'episode' and fragment: - 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', - 'only_matching': True - }, { - # URL with 'show' and fragment: - 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', - 'only_matching': True - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - kaltura_partner_id = self._html_search_regex( - r'<script[^>]+src=["\']https?://www\.kaltura\.com/.*/partner_id/([0-9]+)', - webpage, 'Kaltura partner ID') - kaltura_entry_id = self._html_search_regex( - r'<a[^>]+data-id=["\'](.*?)["\'][^>]+data-slug=["\']%s' % video_id, - webpage, 'Kaltura entry ID') - - return self.url_result( - 'kaltura:%s:%s' % (kaltura_partner_id, kaltura_entry_id), - ie=KalturaIE.ie_key()) - - -class AZMedienTVShowIE(InfoExtractor): - IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch shows' - _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P<id>[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' - - _TESTS = [{ - # URL with 'episode': - 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', - 'info_dict': { - 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', - 'title': 'News', - }, - 'playlist_count': 9, - }, { - # URL with 'show' only: - 'url': 'http://www.telezueri.ch/86-show-talktaeglich', - 'only_matching': True - }] - - def _real_extract(self, url): - show_id = self._match_id(url) - webpage = self._download_webpage(url, show_id) - - title = get_element_by_class('title-block-cell', webpage) - if title: - title = title.strip() - - entries = [self.url_result(m.group('url'), ie=AZMedienTVIE.ie_key()) for m in re.finditer( - r'<a href=["\']#["\'][^>]+data-real=["\'](?P<url>.+?)["\']', webpage)] - - return self.playlist_result( - entries, show_id, title) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4cfb3c70f..de5f94738 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -77,9 +77,9 @@ from .awaan import ( AWAANLiveIE, AWAANSeasonIE, ) -from .azmedientv import ( - AZMedienTVIE, - AZMedienTVShowIE, +from .azmedien import ( + AZMedienIE, + AZMedienShowIE, ) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE From 42697bab3c4d65a232054d5d5482cc177da12c72 Mon Sep 17 00:00:00 2001 From: einstein95 <einstein95@users.noreply.github.com> Date: Sun, 22 Jan 2017 02:00:38 +1300 Subject: [PATCH 51/93] [chaturbate] Fix extraction --- youtube_dl/extractor/chaturbate.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index 29a8820d5..1c2f065df 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ExtractorError @@ -31,30 +33,32 @@ class ChaturbateIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - m3u8_url = self._search_regex( - r'src=(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage, - 'playlist', default=None, group='url') + m3u8_urls = re.findall( + r'var hlsSource.+? = (["\'])(?P<url>http.+?\.m3u8)', webpage) - if not m3u8_url: + if not m3u8_urls: error = self._search_regex( [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>', r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'], webpage, 'error', group='error', default=None) if not error: - if any(p not in webpage for p in ( + if any(p in webpage for p in ( self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): error = self._ROOM_OFFLINE if error: raise ExtractorError(error, expected=True) raise ExtractorError('Unable to find stream URL') - formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + formats = [] + for m3u8_url in m3u8_urls: + formats.append(self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')[0]) + self._sort_formats(formats) return { 'id': video_id, 'title': self._live_title(video_id), - 'thumbnail': 'https://cdn-s.highwebmedia.com/uHK3McUtGCG3SMFcd4ZJsRv8/roomimage/%s.jpg' % video_id, + 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id, 'age_limit': self._rta_search(webpage), 'is_live': True, 'formats': formats, From a243abb80d5fdaacc502bc5a2b5cb20d0766e93a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 03:00:10 +0700 Subject: [PATCH 52/93] [chaturbate] Improve (closes #11797) --- youtube_dl/extractor/chaturbate.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index 1c2f065df..8fbc91c1f 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -33,10 +33,10 @@ class ChaturbateIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - m3u8_urls = re.findall( - r'var hlsSource.+? = (["\'])(?P<url>http.+?\.m3u8)', webpage) + m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer( + r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)] - if not m3u8_urls: + if not m3u8_formats: error = self._search_regex( [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>', r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'], @@ -50,9 +50,12 @@ class ChaturbateIE(InfoExtractor): raise ExtractorError('Unable to find stream URL') formats = [] - for m3u8_url in m3u8_urls: - formats.append(self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')[0]) - + for m3u8_id, m3u8_url in m3u8_formats: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', + # ffmpeg skips segments for fast m3u8 + preference=-10 if m3u8_id == 'fast' else None, + m3u8_id=m3u8_id, fatal=False, live=True)) self._sort_formats(formats) return { From 8d1fbe0cb20fdfab8487bb478c2a002f12c1a5d9 Mon Sep 17 00:00:00 2001 From: einstein95 <einstein95@users.noreply.github.com> Date: Sat, 21 Jan 2017 20:02:55 +1300 Subject: [PATCH 53/93] [pornflip] Add extractor (closes #11556) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pornflip.py | 59 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 youtube_dl/extractor/pornflip.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de5f94738..cfddf5b92 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -724,6 +724,7 @@ from .polskieradio import ( ) from .porn91 import Porn91IE from .porncom import PornComIE +from .pornflip import PornFlipIE from .pornhd import PornHdIE from .pornhub import ( PornHubIE, diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py new file mode 100644 index 000000000..b6077f7cb --- /dev/null +++ b/youtube_dl/extractor/pornflip.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, +) +from ..utils import ( + int_or_none, + try_get, + RegexNotFoundError, +) + + +class PornFlipIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/v/(?P<id>[0-9A-Za-z]{11})' + _TEST = { + 'url': 'https://www.pornflip.com/v/wz7DfNhMmep', + 'md5': '98c46639849145ae1fd77af532a9278c', + 'info_dict': { + 'id': 'wz7DfNhMmep', + 'ext': 'mp4', + 'title': '2 Amateurs swallow make his dream cumshots true', + 'uploader': 'figifoto', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + uploader = self._html_search_regex( + r'<span class="name">\s+<a class="ajax" href=".+>\s+<strong>([^<]+)<', webpage, 'uploader', fatal=False) + flashvars = compat_parse_qs(self._html_search_regex( + r'<embed.+?flashvars="([^"]+)"', + webpage, 'flashvars')) + title = flashvars['video_vars[title]'][0] + thumbnail = try_get(flashvars, lambda x: x['video_vars[big_thumb]'][0]) + formats = [] + for k, v in flashvars.items(): + height = self._search_regex(r'video_vars\[video_urls\]\[(\d+).+?\]', k, 'height', default=None) + if height: + url = v[0] + formats.append({ + 'height': int_or_none(height), + 'url': url + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'uploader': uploader, + 'thumbnail': thumbnail, + 'age_limit': 18, + } From 271808b6b2bd75ec9bdf943a55dbc4737bfa6f81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 03:43:27 +0700 Subject: [PATCH 54/93] [pornflip] Improve and extract dash formats (closes #11795) --- youtube_dl/extractor/pornflip.py | 79 ++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py index b6077f7cb..a4a5d390e 100644 --- a/youtube_dl/extractor/pornflip.py +++ b/youtube_dl/extractor/pornflip.py @@ -4,56 +4,89 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( compat_parse_qs, + compat_str, ) from ..utils import ( int_or_none, try_get, - RegexNotFoundError, + unified_timestamp, ) class PornFlipIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornflip\.com/v/(?P<id>[0-9A-Za-z]{11})' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})' + _TESTS = [{ 'url': 'https://www.pornflip.com/v/wz7DfNhMmep', 'md5': '98c46639849145ae1fd77af532a9278c', 'info_dict': { 'id': 'wz7DfNhMmep', 'ext': 'mp4', 'title': '2 Amateurs swallow make his dream cumshots true', - 'uploader': 'figifoto', 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 112, + 'timestamp': 1481655502, + 'upload_date': '20161213', + 'uploader_id': '106786', + 'uploader': 'figifoto', + 'view_count': int, 'age_limit': 18, } - } + }, { + 'url': 'https://www.pornflip.com/embed/wz7DfNhMmep', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - uploader = self._html_search_regex( - r'<span class="name">\s+<a class="ajax" href=".+>\s+<strong>([^<]+)<', webpage, 'uploader', fatal=False) - flashvars = compat_parse_qs(self._html_search_regex( - r'<embed.+?flashvars="([^"]+)"', - webpage, 'flashvars')) - title = flashvars['video_vars[title]'][0] - thumbnail = try_get(flashvars, lambda x: x['video_vars[big_thumb]'][0]) - formats = [] - for k, v in flashvars.items(): - height = self._search_regex(r'video_vars\[video_urls\]\[(\d+).+?\]', k, 'height', default=None) - if height: - url = v[0] - formats.append({ - 'height': int_or_none(height), - 'url': url - }) + webpage = self._download_webpage( + 'https://www.pornflip.com/v/%s' % video_id, video_id) + + flashvars = compat_parse_qs(self._search_regex( + r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1', + webpage, 'flashvars', group='flashvars')) + + title = flashvars['video_vars[title]'][0] + + def flashvar(kind): + return try_get( + flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str) + + formats = [] + for key, value in flashvars.items(): + if not (value and isinstance(value, list)): + continue + format_url = value[0] + if key == 'video_vars[hds_manifest]': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False)) + continue + height = self._search_regex( + r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None) + if not height: + continue + formats.append({ + 'url': format_url, + 'format_id': 'http-%s' % height, + 'height': int_or_none(height), + }) self._sort_formats(formats) + uploader = self._html_search_regex( + (r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)', + r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'), + webpage, 'uploader', fatal=False, group='uploader') + return { 'id': video_id, 'formats': formats, 'title': title, + 'thumbnail': flashvar('big_thumb'), + 'duration': int_or_none(flashvar('duration')), + 'timestamp': unified_timestamp(self._html_search_meta( + 'uploadDate', webpage, 'timestamp')), + 'uploader_id': flashvar('author_id'), 'uploader': uploader, - 'thumbnail': thumbnail, + 'view_count': int_or_none(flashvar('views')), 'age_limit': 18, } From 6c031a35f31717cc1a535d5d808b94967b841a93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 18:57:15 +0700 Subject: [PATCH 55/93] [ChangeLog] Actualize --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index 00c8a063f..a814b934c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,16 @@ version <unreleased> Extractors ++ [pornflip] Add support for pornflip.com (#11556, #11795) +* [chaturbate] Fix extraction (#11797, #11802) ++ [azmedien] Add support for AZ Medien sites (#11784, #11785) + [nextmedia] Support redirected URLs ++ [vimeo:channel] Extract videos' titles for playlist entries (#11796) ++ [youtube] Extract episode metadata (#9695, #11774) + [cspan] Support Ustream embedded videos (#11547) ++ [1tv] Add support for HLS videos (#11786) +* [uol] Fix extraction (#11770) +* [mtv] Relax triforce feed regular expression (#11766) version 2017.01.18 From 9d5b29c881f679b1d4270326af4ba6f657807011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 18:59:04 +0700 Subject: [PATCH 56/93] release 2017.01.22 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 38cb13a33..30cc27c7b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.22** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.18 +[debug] youtube-dl version 2017.01.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a814b934c..beea17e54 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.01.22 Extractors + [pornflip] Add support for pornflip.com (#11556, #11795) diff --git a/README.md b/README.md index a606346b2..4f677d0cc 100644 --- a/README.md +++ b/README.md @@ -374,7 +374,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo avprobe) --audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; - "best" by default + "best" by default; No effect without -x --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a3c76d5db..b906d443a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -74,6 +74,8 @@ - **awaan:live** - **awaan:season** - **awaan:video** + - **AZMedien**: AZ Medien videos + - **AZMedienShow**: AZ Medien shows - **Azubu** - **AzubuLive** - **BaiduVideo**: 百度视频 @@ -572,6 +574,7 @@ - **PolskieRadio** - **PolskieRadioCategory** - **PornCom** + - **PornFlip** - **PornHd** - **PornHub**: PornHub and Thumbzilla - **PornHubPlaylist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 669f60f65..9466c9637 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.18' +__version__ = '2017.01.22' From 30dda24de304dd53fc63dfb5bf4672c2ec747014 Mon Sep 17 00:00:00 2001 From: Gaetan Gilbert <gaetan.gilbert@ens-lyon.fr> Date: Sun, 22 Jan 2017 20:27:38 +0100 Subject: [PATCH 57/93] [chirbit] Extract uploader --- youtube_dl/extractor/chirbit.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py index f35df143a..4815b34be 100644 --- a/youtube_dl/extractor/chirbit.py +++ b/youtube_dl/extractor/chirbit.py @@ -19,6 +19,7 @@ class ChirbitIE(InfoExtractor): 'title': 'md5:f542ea253f5255240be4da375c6a5d7e', 'description': 'md5:f24a4e22a71763e32da5fed59e47c770', 'duration': 306, + 'uploader': 'Gerryaudio', }, 'params': { 'skip_download': True, @@ -54,6 +55,9 @@ class ChirbitIE(InfoExtractor): duration = parse_duration(self._search_regex( r'class=["\']c-length["\'][^>]*>([^<]+)', webpage, 'duration', fatal=False)) + uploader = self._search_regex( + r'id=["\']chirbit-username["\'][^>]*>([^<]+)', + webpage, 'uploader', fatal=False) return { 'id': audio_id, @@ -61,6 +65,7 @@ class ChirbitIE(InfoExtractor): 'title': title, 'description': description, 'duration': duration, + 'uploader': uploader, } From a089545e036619a798aa19f33085f2b0b87a1b0a Mon Sep 17 00:00:00 2001 From: Alex Seiler <seileralex@gmail.com> Date: Sun, 22 Jan 2017 20:30:29 +0100 Subject: [PATCH 58/93] [azmedien:show] Improve _VALID_URL --- youtube_dl/extractor/azmedien.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index 059dc6e4b..a89f71c20 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -85,7 +85,20 @@ class AZMedienIE(AZMedienBaseIE): class AZMedienShowIE(AZMedienBaseIE): IE_DESC = 'AZ Medien shows' - _VALID_URL = r'https?://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P<id>[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + telezueri\.ch| + telebaern\.tv| + telem1\.ch + )/ + (?P<id>[0-9]+-show-[^/\#]+ + (?: + /[0-9]+-episode-[^/\#]+ + )? + )$ + ''' _TESTS = [{ # URL with 'episode' From 8bc0800d7cf24b17204f0fb3c6e76327ed8d527f Mon Sep 17 00:00:00 2001 From: Grzegorz P <Grzechooo@users.noreply.github.com> Date: Sun, 22 Jan 2017 20:35:38 +0100 Subject: [PATCH 59/93] [youtube:playlist] Fix nonexistent/private playlist detection (closes #11604) --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 63597dd16..644653357 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1998,7 +1998,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) - for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page): + # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604) + for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page): match = match.strip() # Check if the playlist exists or is private if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match): From 4201ba13e674788c36ae69fbfbffc4b246717d6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 Jan 2017 02:49:56 +0700 Subject: [PATCH 60/93] [youtube:playlist] Fix nonexistent/private playlist detection and skip private tests --- youtube_dl/extractor/youtube.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 644653357..5202beb3e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1856,6 +1856,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'title': 'YDL_Empty_List', }, 'playlist_count': 0, + 'skip': 'This playlist is private', }, { 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', @@ -1887,6 +1888,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', }, 'playlist_count': 2, + 'skip': 'This playlist is private', }, { 'note': 'embedded', 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', @@ -2002,11 +2004,14 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page): match = match.strip() # Check if the playlist exists or is private - if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match): - raise ExtractorError( - 'The playlist doesn\'t exist or is private, use --username or ' - '--netrc to access it.', - expected=True) + mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match) + if mobj: + reason = mobj.group('reason') + message = 'This playlist %s' % reason + if 'private' in reason: + message += ', use --username or --netrc to access it' + message += '.' + raise ExtractorError(message, expected=True) elif re.match(r'[^<]*Invalid parameters[^<]*', match): raise ExtractorError( 'Invalid parameters. Maybe URL is incorrect.', From 6d119c2a6bdd2a987ef2e7553b357bd4a3f18690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 Jan 2017 03:50:39 +0700 Subject: [PATCH 61/93] [24video] Fix extraction (closes #11811) --- youtube_dl/extractor/twentyfourvideo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index 1093a3829..a983ebf05 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -12,7 +12,7 @@ from ..utils import ( class TwentyFourVideoIE(InfoExtractor): IE_NAME = '24video' - _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.24video.net/video/view/1044982', @@ -43,7 +43,7 @@ class TwentyFourVideoIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.24video.net/video/view/%s' % video_id, video_id) + 'http://www.24video.sex/video/view/%s' % video_id, video_id) title = self._og_search_title(webpage) description = self._html_search_regex( @@ -69,11 +69,11 @@ class TwentyFourVideoIE(InfoExtractor): # Sets some cookies self._download_xml( - r'http://www.24video.net/video/xml/%s?mode=init' % video_id, + r'http://www.24video.sex/video/xml/%s?mode=init' % video_id, video_id, 'Downloading init XML') video_xml = self._download_xml( - 'http://www.24video.net/video/xml/%s?mode=play' % video_id, + 'http://www.24video.sex/video/xml/%s?mode=play' % video_id, video_id, 'Downloading video XML') video = xpath_element(video_xml, './/video', 'video', fatal=True) From 0c1c6f4b9f97375ffc68cbc9c7276838f7bf8514 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 23 Jan 2017 23:31:43 +0800 Subject: [PATCH 62/93] [utils] Add another date format seen in NextTV --- youtube_dl/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 12863e74a..98acc2b45 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -143,6 +143,7 @@ DATE_FORMATS = ( '%Y/%m/%d', '%Y/%m/%d %H:%M', '%Y/%m/%d %H:%M:%S', + '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M:%S.%f', '%d.%m.%Y %H:%M', From bc35ed3fb6fcae88d59fd440b505b9e1a7cf112e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 23 Jan 2017 23:33:30 +0800 Subject: [PATCH 63/93] =?UTF-8?q?[nextmedia]=20Add=20support=20for=20NextT?= =?UTF-8?q?V=20(=E5=A3=B9=E9=9B=BB=E8=A6=96)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog | 6 ++++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nextmedia.py | 54 +++++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index beea17e54..ba2f5cffc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [nextmedia] Add support for NextTV (壹電視) + + version 2017.01.22 Extractors diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cfddf5b92..e23b5d0f6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -598,6 +598,7 @@ from .nextmedia import ( NextMediaIE, NextMediaActionNewsIE, AppleDailyIE, + NextTVIE, ) from .nfb import NFBIE from .nfl import NFLIE diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index 626ed8b49..680f03aad 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -3,7 +3,14 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urlparse -from ..utils import parse_iso8601 +from ..utils import ( + clean_html, + get_element_by_class, + int_or_none, + parse_iso8601, + remove_start, + unified_timestamp, +) class NextMediaIE(InfoExtractor): @@ -184,3 +191,48 @@ class AppleDailyIE(NextMediaIE): def _fetch_description(self, page): return self._html_search_meta('description', page, 'news description') + + +class NextTVIE(InfoExtractor): + IE_DESC = '壹電視' + _VALID_URL = r'https?://(?:www\.)?nexttv\.com\.tw/(?:[^/]+/)+(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.nexttv.com.tw/news/realtime/politics/11779671', + 'info_dict': { + 'id': '11779671', + 'ext': 'mp4', + 'title': '「超收稅」近4千億! 藍議員籲發消費券', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1484825400, + 'upload_date': '20170119', + 'view_count': int, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r'<h1[^>]*>([^<]+)</h1>', webpage, 'title') + + data = self._hidden_inputs(webpage) + + video_url = data['ntt-vod-src-detailview'] + + date_str = get_element_by_class('date', webpage) + timestamp = unified_timestamp(date_str + '+0800') if date_str else None + + view_count = int_or_none(remove_start( + clean_html(get_element_by_class('click', webpage)), '點閱:')) + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'thumbnail': data.get('ntt-vod-img-src'), + 'timestamp': timestamp, + 'view_count': view_count, + } From b494d6856c55bd351107fd7266f8ac2eeaee341f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 02:50:49 +0700 Subject: [PATCH 64/93] [pluralsight] Fix extraction (closes #11820) --- youtube_dl/extractor/pluralsight.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 0ffd41ecd..5c798e874 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -157,13 +157,10 @@ class PluralsightIE(PluralsightBaseIE): display_id = '%s-%s' % (name, clip_id) - parsed_url = compat_urlparse.urlparse(url) - - payload_url = compat_urlparse.urlunparse(parsed_url._replace( - netloc='app.pluralsight.com', path='player/api/v1/payload')) - course = self._download_json( - payload_url, display_id, headers={'Referer': url})['payload']['course'] + 'https://app.pluralsight.com/player/user/api/v1/player/payload', + display_id, data=urlencode_postdata({'courseId': course_name}), + headers={'Referer': url}) collection = course['modules'] From ee4c091ce5bb3732c3016410230f45f2283e5055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 02:56:19 +0700 Subject: [PATCH 65/93] [ChangeLog] Actualize --- ChangeLog | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ChangeLog b/ChangeLog index ba2f5cffc..406301549 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,11 @@ version <unreleased> Extractors +* [pluralsight] Fix extraction (#11820) + [nextmedia] Add support for NextTV (壹電視) +* [24video] Fix extraction (#11811) +* [youtube:playlist] Fix nonexistent and private playlist detection (#11604) ++ [chirbit] Extract uploader (#11809) version 2017.01.22 From c3a65c3de0667b8de4af8fdc8c1eb04a1498e104 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 02:58:37 +0700 Subject: [PATCH 66/93] release 2017.01.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 30cc27c7b..f771d72c0 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.22** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.22 +[debug] youtube-dl version 2017.01.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 406301549..4bc30cff7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.01.24 Extractors * [pluralsight] Fix extraction (#11820) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b906d443a..2d28b3f72 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -485,6 +485,7 @@ - **Newstube** - **NextMedia**: 蘋果日報 - **NextMediaActionNews**: 蘋果日報 - 動新聞 + - **NextTV**: 壹電視 - **nfb**: National Film Board of Canada - **nfl.com** - **NhkVod** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9466c9637..8a66c2fb9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.22' +__version__ = '2017.01.24' From d61aa5eb37244a04caa09f1f238a4f81366c109b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 22:46:40 +0700 Subject: [PATCH 67/93] [vimeo:review] Fix config URL extraction (closes #11821) --- youtube_dl/extractor/vimeo.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a6bbd4c05..c12eeadd4 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -884,10 +884,14 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): def _get_config_url(self, webpage_url, video_id, video_password_verified=False): webpage = self._download_webpage(webpage_url, video_id) - data = self._parse_json(self._search_regex( - r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', - default=NO_DEFAULT if video_password_verified else '{}'), video_id) - config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') + config_url = self._html_search_regex( + r'data-config-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'config URL', default=None, group='url') + if not config_url: + data = self._parse_json(self._search_regex( + r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', + default=NO_DEFAULT if video_password_verified else '{}'), video_id) + config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') if config_url is None: self._verify_video_password(webpage_url, video_id, webpage) config_url = self._get_config_url( From 74af9c700d308e3638db0ff2e4510770f9daf31c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 22:55:49 +0700 Subject: [PATCH 68/93] [konserthusetplay] Add support for hls formats (closes #11823) --- youtube_dl/extractor/konserthusetplay.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/konserthusetplay.py b/youtube_dl/extractor/konserthusetplay.py index 55291c66f..7e6ea9696 100644 --- a/youtube_dl/extractor/konserthusetplay.py +++ b/youtube_dl/extractor/konserthusetplay.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + determine_ext, float_or_none, int_or_none, ) @@ -42,12 +43,18 @@ class KonserthusetPlayIE(InfoExtractor): player_config = media['playerconfig'] playlist = player_config['playlist'] - source = next(f for f in playlist if f.get('bitrates')) + source = next(f for f in playlist if f.get('bitrates') or f.get('provider')) FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4' formats = [] + m3u8_url = source.get('url') + if m3u8_url and determine_ext(m3u8_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + fallback_url = source.get('fallbackUrl') fallback_format_id = None if fallback_url: From 23b35a634e06d9b92c9650b0d66a3d5d7eb03a54 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 24 Jan 2017 16:55:07 +0100 Subject: [PATCH 69/93] [crackle] improve extraction - extract vtt subtitles - extract multiple resolutions for thumbnails - pass geo verification proxy headers - add support for mobile urls --- youtube_dl/extractor/crackle.py | 53 ++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index 25c5e7d04..377fb45e9 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -6,7 +6,7 @@ from ..utils import int_or_none class CrackleIE(InfoExtractor): - _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' + _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' _TEST = { 'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934', 'info_dict': { @@ -31,8 +31,32 @@ class CrackleIE(InfoExtractor): } } + _THUMBNAIL_RES = [ + (120, 90), + (208, 156), + (220, 124), + (220, 220), + (240, 180), + (250, 141), + (315, 236), + (320, 180), + (360, 203), + (400, 300), + (421, 316), + (460, 330), + (460, 460), + (462, 260), + (480, 270), + (587, 330), + (640, 480), + (700, 330), + (700, 394), + (854, 480), + (1024, 1024), + (1920, 1080), + ] + # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx - _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614' _MEDIA_FILE_SLOTS = { 'c544.flv': { 'width': 544, @@ -61,17 +85,25 @@ class CrackleIE(InfoExtractor): item = self._download_xml( 'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, - video_id).find('i') + video_id, headers=self.geo_verification_headers()).find('i') title = item.attrib['t'] subtitles = {} formats = self._extract_m3u8_formats( 'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id), video_id, 'mp4', m3u8_id='hls', fatal=None) - thumbnail = None + thumbnails = [] path = item.attrib.get('p') if path: - thumbnail = self._THUMBNAIL_TEMPLATE % path + for width, height in self._THUMBNAIL_RES: + res = '%dx%d' % (width, height) + thumbnails.append({ + 'id': res, + 'url': 'http://images-us-am.crackle.com/%stnl_%s.jpg' % (path, res), + 'width': width, + 'height': height, + 'resolution': res, + }) http_base_url = 'http://ahttp.crackle.com/' + path for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items(): formats.append({ @@ -86,10 +118,11 @@ class CrackleIE(InfoExtractor): if locale and v: if locale not in subtitles: subtitles[locale] = [] - subtitles[locale] = [{ - 'url': '%s/%s%s_%s.xml' % (config_doc.attrib['strSubtitleServer'], path, locale, v), - 'ext': 'ttml', - }] + for url_ext, ext in (('vtt', 'vtt'), ('xml', 'tt')): + subtitles.setdefault(locale, []).append({ + 'url': '%s/%s%s_%s.%s' % (config_doc.attrib['strSubtitleServer'], path, locale, v, url_ext), + 'ext': ext, + }) self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) return { @@ -100,7 +133,7 @@ class CrackleIE(InfoExtractor): 'series': item.attrib.get('sn'), 'season_number': int_or_none(item.attrib.get('se')), 'episode_number': int_or_none(item.attrib.get('ep')), - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'subtitles': subtitles, 'formats': formats, } From af59bddc4e4a6c260e7966fe75d9d687c3b13b32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 23:02:20 +0700 Subject: [PATCH 70/93] [konserthusetplay] Extract subtitles (#11823) --- youtube_dl/extractor/konserthusetplay.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/konserthusetplay.py b/youtube_dl/extractor/konserthusetplay.py index 7e6ea9696..3ae2aa317 100644 --- a/youtube_dl/extractor/konserthusetplay.py +++ b/youtube_dl/extractor/konserthusetplay.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( determine_ext, float_or_none, @@ -104,6 +105,13 @@ class KonserthusetPlayIE(InfoExtractor): thumbnail = media.get('image') duration = float_or_none(media.get('duration'), 1000) + subtitles = {} + captions = source.get('captionsAvailableLanguages') + if isinstance(captions, dict): + for lang, subtitle_url in captions.items(): + if lang != 'none' and isinstance(subtitle_url, compat_str): + subtitles.setdefault(lang, []).append({'url': subtitle_url}) + return { 'id': video_id, 'title': title, @@ -111,4 +119,5 @@ class KonserthusetPlayIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, + 'subtitles': subtitles, } From c60089c0222433775dcc1305d85b42fc6158c8df Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 25 Jan 2017 07:38:17 +0100 Subject: [PATCH 71/93] [afreecatv:global] Add new extractor(closes #11807) --- youtube_dl/extractor/afreecatv.py | 92 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 75b366993..4f6cdb8a2 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -18,6 +18,7 @@ from ..utils import ( class AfreecaTVIE(InfoExtractor): + IE_NAME = 'afreecatv' IE_DESC = 'afreecatv.com' _VALID_URL = r'''(?x) https?:// @@ -143,3 +144,94 @@ class AfreecaTVIE(InfoExtractor): expected=True) return info + + +class AfreecaTVGlobalIE(AfreecaTVIE): + IE_NAME = 'afreecatv:global' + _VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P<channel_id>\d+)(?:/v/(?P<video_id>\d+))?' + _TESTS = [{ + 'url': 'http://afreeca.tv/36853014/v/58301', + 'info_dict': { + 'id': '58301', + 'title': 'tryhard top100', + 'uploader_id': '36853014', + 'uploader': 'makgi Hearthstone Live!', + }, + 'playlist_count': 3, + }] + + def _real_extract(self, url): + channel_id, video_id = re.match(self._VALID_URL, url).groups() + video_type = 'video' if video_id else 'live' + query = { + 'pt': 'view', + 'bid': channel_id, + } + if video_id: + query['vno'] = video_id + video_data = self._download_json( + 'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type), + video_id or channel_id, query=query)['channel'] + + if video_data.get('result') != 1: + raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg'])) + + title = video_data['title'] + + info = { + 'thumbnail': video_data.get('thumb'), + 'view_count': int_or_none(video_data.get('vcnt')), + 'age_limit': int_or_none(video_data.get('grade')), + 'uploader_id': channel_id, + 'uploader': video_data.get('cname'), + } + + if video_id: + entries = [] + for i, f in enumerate(video_data.get('flist', [])): + video_key = self.parse_video_key(f.get('key', '')) + f_url = f.get('file') + if not video_key or not f_url: + continue + entries.append({ + 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), + 'title': title, + 'upload_date': video_key.get('upload_date'), + 'duration': int_or_none(f.get('length')), + 'url': f_url, + 'protocol': 'm3u8_native', + 'ext': 'mp4', + }) + + info.update({ + 'id': video_id, + 'title': title, + 'duration': int_or_none(video_data.get('length')), + }) + if len(entries) > 1: + info['_type'] = 'multi_video' + info['entries'] = entries + elif len(entries) == 1: + i = entries[0].copy() + i.update(info) + info = i + else: + formats = [] + for s in video_data.get('strm', []): + s_url = s.get('purl') + if not s_url: + continue + # TODO: extract rtmp formats + if s.get('stype') == 'HLS': + formats.extend(self._extract_m3u8_formats( + s_url, channel_id, 'mp4', fatal=False)) + self._sort_formats(formats) + + info.update({ + 'id': channel_id, + 'title': self._live_title(title), + 'is_live': True, + 'formats': formats, + }) + + return info diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e23b5d0f6..f09b4cf2c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -30,7 +30,10 @@ from .aenetworks import ( AENetworksIE, HistoryTopicIE, ) -from .afreecatv import AfreecaTVIE +from .afreecatv import ( + AfreecaTVIE, + AfreecaTVGlobalIE, +) from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE From b8a03b66601f6af9e6b4009cba634dac6e0d30e6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 25 Jan 2017 07:39:11 +0100 Subject: [PATCH 72/93] [srgssr] fix rts video extraction(closes #11831) --- youtube_dl/extractor/srgssr.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index 47aa887cc..319a48a7a 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -48,9 +48,6 @@ class SRGSSRIE(InfoExtractor): def _real_extract(self, url): bu, media_type, media_id = re.match(self._VALID_URL, url).groups() - if bu == 'rts': - return self.url_result('rts:%s' % media_id, 'RTS') - media_data = self.get_media_data(bu, media_type, media_id) metadata = media_data['AssetMetadatas']['AssetMetadata'][0] From 17f8deeb481a7aa3079d7e11da2c255f893b9e8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 Jan 2017 23:27:22 +0700 Subject: [PATCH 73/93] [extractor/generic] Add support for openload embeds (closes #11536, closes #11812) --- youtube_dl/extractor/generic.py | 7 +++++++ youtube_dl/extractor/openload.py | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 40201f311..a23486620 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -80,6 +80,7 @@ from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE +from .openload import OpenloadIE class GenericIE(InfoExtractor): @@ -2431,6 +2432,12 @@ class GenericIE(InfoExtractor): return _playlist_from_matches( twentymin_urls, ie=TwentyMinutenIE.ie_key()) + # Look for Openload embeds + openload_urls = OpenloadIE._extract_urls(webpage) + if openload_urls: + return _playlist_from_matches( + openload_urls, ie=OpenloadIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 3d4ad7dca..4893ade5d 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_chr from ..utils import ( @@ -56,6 +58,12 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+src=["\']((?:https?://)?(?:openload\.(?:co|io)|oload\.tv)/embed/[a-zA-Z0-9-_]+)', + webpage) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) From c1fa3f46727ccbbb75389ce82753f2e63449ece6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 Jan 2017 23:28:45 +0700 Subject: [PATCH 74/93] [openload] Fallback video extension to mp4 --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 4893ade5d..32289d897 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -101,7 +101,7 @@ class OpenloadIE(InfoExtractor): 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'url': video_url, # Seems all videos have extensions in their titles - 'ext': determine_ext(title), + 'ext': determine_ext(title, 'mp4'), 'subtitles': subtitles, } return info_dict From 2c302cf66b235aed6be5786489f259c0fa993fae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 Jan 2017 23:33:46 +0700 Subject: [PATCH 75/93] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4bc30cff7..e0af3f671 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version <unreleased> + +Extractors ++ [openload] Fallback video extension to mp4 ++ [extractor/generic] Add support for Openload embeds (#11536, #11812) +* [srgssr] Fix rts video extraction (#11831) ++ [afreecatv:global] Add support for afreeca.tv (#11807) ++ [crackle] Extract vtt subtitles ++ [crackle] Extract multiple resolutions for thumbnails ++ [crackle] Add support for mobile URLs ++ [konserthusetplay] Extract subtitles (#11823) ++ [konserthusetplay] Add support for HLS videos (#11823) +* [vimeo:review] Fix config URL extraction (#11821) + + version 2017.01.24 Extractors From 2417d41535a907a2da05a8b6490198916279d2ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 Jan 2017 23:36:03 +0700 Subject: [PATCH 76/93] release 2017.01.25 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f771d72c0..4d409f785 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.25*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.25** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.24 +[debug] youtube-dl version 2017.01.25 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e0af3f671..ff305d7e8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.01.25 Extractors + [openload] Fallback video extension to mp4 diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2d28b3f72..f640cfcaa 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -33,7 +33,8 @@ - **AdobeTVVideo** - **AdultSwim** - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network - - **AfreecaTV**: afreecatv.com + - **afreecatv**: afreecatv.com + - **afreecatv:global**: afreecatv.com - **AirMozilla** - **AlJazeera** - **Allocine** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8a66c2fb9..c23fe85de 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.24' +__version__ = '2017.01.25' From 556dbe7fe35667cb061dbf0ee84d3a065ad11055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 26 Jan 2017 21:43:14 +0700 Subject: [PATCH 77/93] [youtube] Add fallback for duration extraction (closes #11841) --- youtube_dl/extractor/youtube.py | 36 ++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5202beb3e..630586796 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -40,6 +40,7 @@ from ..utils import ( sanitized_Request, smuggle_url, str_to_int, + try_get, unescapeHTML, unified_strdate, unsmuggle_url, @@ -383,6 +384,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], + 'duration': 10, 'like_count': int, 'dislike_count': int, 'start_time': 1, @@ -402,6 +404,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli', 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop', 'iconic ep', 'iconic', 'love', 'it'], + 'duration': 180, 'uploader': 'Icona Pop', 'uploader_id': 'IconaPop', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop', @@ -419,6 +422,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', 'alt_title': 'Tunnel Vision', 'description': 'md5:64249768eec3bc4276236606ea996373', + 'duration': 419, 'uploader': 'justintimberlakeVEVO', 'uploader_id': 'justintimberlakeVEVO', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO', @@ -458,6 +462,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], + 'duration': 10, 'like_count': int, 'dislike_count': int, }, @@ -493,6 +498,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'm4a', 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson', 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d', + 'duration': 244, 'uploader': 'AfrojackVEVO', 'uploader_id': 'AfrojackVEVO', 'upload_date': '20131011', @@ -512,6 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'Taylor Swift - Shake It Off', 'alt_title': 'Shake It Off', 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3', + 'duration': 242, 'uploader': 'TaylorSwiftVEVO', 'uploader_id': 'TaylorSwiftVEVO', 'upload_date': '20140818', @@ -529,6 +536,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'T4XJQO3qol8', 'ext': 'mp4', + 'duration': 219, 'upload_date': '20100909', 'uploader': 'The Amazing Atheist', 'uploader_id': 'TheAmazingAtheist', @@ -546,6 +554,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer', 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}', + 'duration': 142, 'uploader': 'The Witcher', 'uploader_id': 'WitcherGame', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame', @@ -562,6 +571,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'Dedication To My Ex (Miss That) (Lyric Video)', 'description': 'md5:33765bb339e1b47e7e72b5490139bb41', + 'duration': 247, 'uploader': 'LloydVEVO', 'uploader_id': 'LloydVEVO', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO', @@ -576,6 +586,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': '__2ABJjxzNo', 'ext': 'mp4', + 'duration': 266, 'upload_date': '20100430', 'uploader_id': 'deadmau5', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5', @@ -596,6 +607,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'lqQg6PlCWgI', 'ext': 'mp4', + 'duration': 6085, 'upload_date': '20150827', 'uploader_id': 'olympic', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic', @@ -615,6 +627,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': '_b-2C3KPAM0', 'ext': 'mp4', 'stretched_ratio': 16 / 9., + 'duration': 85, 'upload_date': '20110310', 'uploader_id': 'AllenMeow', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow', @@ -649,6 +662,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'md5:7b81415841e02ecd4313668cde88737a', 'description': 'md5:116377fd2963b81ec4ce64b542173306', + 'duration': 220, 'upload_date': '20150625', 'uploader_id': 'dorappi2000', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000', @@ -691,6 +705,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)', 'description': 'md5:dc7872fb300e143831327f1bae3af010', + 'duration': 7335, 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', @@ -703,6 +718,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)', 'description': 'md5:dc7872fb300e143831327f1bae3af010', + 'duration': 7337, 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', @@ -715,6 +731,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'teamPGP: Rocket League Noob Stream (grizzle)', 'description': 'md5:dc7872fb300e143831327f1bae3af010', + 'duration': 7337, 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', @@ -727,6 +744,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'teamPGP: Rocket League Noob Stream (zim)', 'description': 'md5:dc7872fb300e143831327f1bae3af010', + 'duration': 7334, 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', @@ -768,6 +786,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21', 'alt_title': 'Dark Walk', 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a', + 'duration': 133, 'upload_date': '20151119', 'uploader_id': 'IronSoulElf', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf', @@ -809,10 +828,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'md5:e41008789470fc2533a3252216f1c1d1', 'description': 'md5:a677553cf0840649b731a3024aeff4cc', + 'duration': 721, 'upload_date': '20150127', 'uploader_id': 'BerkmanCenter', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter', - 'uploader': 'BerkmanCenter', + 'uploader': 'The Berkman Klein Center for Internet & Society', 'license': 'Creative Commons Attribution license (reuse allowed)', }, 'params': { @@ -827,6 +847,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders', 'description': 'md5:dda0d780d5a6e120758d1711d062a867', + 'duration': 4060, 'upload_date': '20151119', 'uploader': 'Bernie 2016', 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg', @@ -871,7 +892,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': 'iqKdEhx-dD4', 'ext': 'mp4', 'title': 'Isolation - Mind Field (Ep 1)', - 'description': 'md5:3a72f23c086a1496c9e2c54a25fa0822', + 'description': 'md5:8013b7ddea787342608f63a13ddc9492', + 'duration': 2085, 'upload_date': '20170118', 'uploader': 'Vsauce', 'uploader_id': 'Vsauce', @@ -1516,11 +1538,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_subtitles = self.extract_subtitles(video_id, video_webpage) automatic_captions = self.extract_automatic_captions(video_id, video_webpage) - if 'length_seconds' not in video_info: - self._downloader.report_warning('unable to extract video duration') - video_duration = None - else: - video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0])) + video_duration = try_get( + video_info, lambda x: int_or_none(x['length_seconds'][0])) + if not video_duration: + video_duration = parse_duration(self._html_search_meta( + 'duration', video_webpage, 'video duration')) # annotations video_annotations = None From cf0cabbe5011228c78a3d88c1a1b179b10333d6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 26 Jan 2017 21:49:34 +0700 Subject: [PATCH 78/93] [cmt,mtv,southpark] Add support for episode URLs (closes #11837) --- youtube_dl/extractor/cmt.py | 2 +- youtube_dl/extractor/mtv.py | 5 ++++- youtube_dl/extractor/southpark.py | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index f6b794fb3..e701fbeab 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -5,7 +5,7 @@ from .mtv import MTVIE class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes|video-clips)/(?P<id>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index e48ea2481..855c3996f 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -304,7 +304,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): class MTVIE(MTVServicesInfoExtractor): IE_NAME = 'mtv' - _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|full-episodes)/(?P<id>[^/?#.]+)' + _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)' _FEED_URL = 'http://www.mtv.com/feeds/mrss/' _TESTS = [{ @@ -321,6 +321,9 @@ class MTVIE(MTVServicesInfoExtractor): }, { 'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101', 'only_matching': True, + }, { + 'url': 'http://www.mtv.com/episodes/g8xu7q/teen-mom-2-breaking-the-wall-season-7-ep-713', + 'only_matching': True, }] diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 08f8c5744..d8ce416fc 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor class SouthParkIE(MTVServicesInfoExtractor): IE_NAME = 'southpark.cc.com' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' @@ -75,7 +75,7 @@ class SouthParkDeIE(SouthParkIE): class SouthParkNlIE(SouthParkIE): IE_NAME = 'southpark.nl' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/' _TESTS = [{ From 9bccdc7004f48963da9a51b6fe24a398d59da725 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 26 Jan 2017 16:06:01 +0100 Subject: [PATCH 79/93] [vevo] remove request to old api and catch apiv2 errors --- youtube_dl/extractor/vevo.py | 267 +++++++++++------------------------ 1 file changed, 79 insertions(+), 188 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index f0a8075fb..c4e37f694 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -4,9 +4,9 @@ import re from .common import InfoExtractor from ..compat import ( - compat_etree_fromstring, compat_str, compat_urlparse, + compat_HTTPError, ) from ..utils import ( ExtractorError, @@ -140,21 +140,6 @@ class VevoIE(VevoBaseIE): 'url': 'http://www.vevo.com/watch/INS171400764', 'only_matching': True, }] - _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com' - _SOURCE_TYPES = { - 0: 'youtube', - 1: 'brightcove', - 2: 'http', - 3: 'hls_ios', - 4: 'hls', - 5: 'smil', # http - 7: 'f4m_cc', - 8: 'f4m_ak', - 9: 'f4m_l3', - 10: 'ism', - 13: 'smil', # rtmp - 18: 'dash', - } _VERSIONS = { 0: 'youtube', # only in AuthenticateVideo videoVersions 1: 'level3', @@ -163,41 +148,6 @@ class VevoIE(VevoBaseIE): 4: 'amazon', } - def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): - formats = [] - els = smil.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') - for el in els: - src = el.attrib['src'] - m = re.match(r'''(?xi) - (?P<ext>[a-z0-9]+): - (?P<path> - [/a-z0-9]+ # The directory and main part of the URL - _(?P<tbr>[0-9]+)k - _(?P<width>[0-9]+)x(?P<height>[0-9]+) - _(?P<vcodec>[a-z0-9]+) - _(?P<vbr>[0-9]+) - _(?P<acodec>[a-z0-9]+) - _(?P<abr>[0-9]+) - \.[a-z0-9]+ # File extension - )''', src) - if not m: - continue - - format_url = self._SMIL_BASE_URL + m.group('path') - formats.append({ - 'url': format_url, - 'format_id': 'smil_' + m.group('tbr'), - 'vcodec': m.group('vcodec'), - 'acodec': m.group('acodec'), - 'tbr': int(m.group('tbr')), - 'vbr': int(m.group('vbr')), - 'abr': int(m.group('abr')), - 'ext': m.group('ext'), - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) - return formats - def _initialize_api(self, video_id): req = sanitized_Request( 'http://www.vevo.com/auth', data=b'') @@ -214,148 +164,91 @@ class VevoIE(VevoBaseIE): self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token'] def _call_api(self, path, *args, **kwargs): - return self._download_json(self._api_url_template % path, *args, **kwargs) + try: + data = self._download_json(self._api_url_template % path, *args, **kwargs) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + errors = self._parse_json(e.cause.read().decode(), None)['errors'] + error_message = ', '.join([error['message'] for error in errors]) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) + raise + return data def _real_extract(self, url): video_id = self._match_id(url) - json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id - response = self._download_json( - json_url, video_id, 'Downloading video info', - 'Unable to download info', fatal=False) or {} - video_info = response.get('video') or {} + self._initialize_api(video_id) + + video_info = self._call_api( + 'video/%s' % video_id, video_id, 'Downloading api video info', + 'Failed to download video info') + + video_versions = self._call_api( + 'video/%s/streams' % video_id, video_id, + 'Downloading video versions info', + 'Failed to download video versions info', + fatal=False) + + # Some videos are only available via webpage (e.g. + # https://github.com/rg3/youtube-dl/issues/9366) + if not video_versions: + webpage = self._download_webpage(url, video_id) + video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0] + + uploader = None artist = None featured_artist = None - uploader = None - view_count = None + artists = video_info.get('artists') + for curr_artist in artists: + if curr_artist.get('role') == 'Featured': + featured_artist = curr_artist['name'] + else: + artist = uploader = curr_artist['name'] + formats = [] + for video_version in video_versions: + version = self._VERSIONS.get(video_version['version']) + version_url = video_version.get('url') + if not version_url: + continue - if not video_info: - try: - self._initialize_api(video_id) - except ExtractorError: - ytid = response.get('errorInfo', {}).get('ytid') - if ytid: - self.report_warning( - 'Video is geoblocked, trying with the YouTube video %s' % ytid) - return self.url_result(ytid, 'Youtube', ytid) - - raise - - video_info = self._call_api( - 'video/%s' % video_id, video_id, 'Downloading api video info', - 'Failed to download video info') - - video_versions = self._call_api( - 'video/%s/streams' % video_id, video_id, - 'Downloading video versions info', - 'Failed to download video versions info', - fatal=False) - - # Some videos are only available via webpage (e.g. - # https://github.com/rg3/youtube-dl/issues/9366) - if not video_versions: - webpage = self._download_webpage(url, video_id) - video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0] - - timestamp = parse_iso8601(video_info.get('releaseDate')) - artists = video_info.get('artists') - for curr_artist in artists: - if curr_artist.get('role') == 'Featured': - featured_artist = curr_artist['name'] - else: - artist = uploader = curr_artist['name'] - view_count = int_or_none(video_info.get('views', {}).get('total')) - - for video_version in video_versions: - version = self._VERSIONS.get(video_version['version']) - version_url = video_version.get('url') - if not version_url: + if '.ism' in version_url: + continue + elif '.mpd' in version_url: + formats.extend(self._extract_mpd_formats( + version_url, video_id, mpd_id='dash-%s' % version, + note='Downloading %s MPD information' % version, + errnote='Failed to download %s MPD information' % version, + fatal=False)) + elif '.m3u8' in version_url: + formats.extend(self._extract_m3u8_formats( + version_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls-%s' % version, + note='Downloading %s m3u8 information' % version, + errnote='Failed to download %s m3u8 information' % version, + fatal=False)) + else: + m = re.search(r'''(?xi) + _(?P<width>[0-9]+)x(?P<height>[0-9]+) + _(?P<vcodec>[a-z0-9]+) + _(?P<vbr>[0-9]+) + _(?P<acodec>[a-z0-9]+) + _(?P<abr>[0-9]+) + \.(?P<ext>[a-z0-9]+)''', version_url) + if not m: continue - if '.ism' in version_url: - continue - elif '.mpd' in version_url: - formats.extend(self._extract_mpd_formats( - version_url, video_id, mpd_id='dash-%s' % version, - note='Downloading %s MPD information' % version, - errnote='Failed to download %s MPD information' % version, - fatal=False)) - elif '.m3u8' in version_url: - formats.extend(self._extract_m3u8_formats( - version_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls-%s' % version, - note='Downloading %s m3u8 information' % version, - errnote='Failed to download %s m3u8 information' % version, - fatal=False)) - else: - m = re.search(r'''(?xi) - _(?P<width>[0-9]+)x(?P<height>[0-9]+) - _(?P<vcodec>[a-z0-9]+) - _(?P<vbr>[0-9]+) - _(?P<acodec>[a-z0-9]+) - _(?P<abr>[0-9]+) - \.(?P<ext>[a-z0-9]+)''', version_url) - if not m: - continue - - formats.append({ - 'url': version_url, - 'format_id': 'http-%s-%s' % (version, video_version['quality']), - 'vcodec': m.group('vcodec'), - 'acodec': m.group('acodec'), - 'vbr': int(m.group('vbr')), - 'abr': int(m.group('abr')), - 'ext': m.group('ext'), - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) - else: - timestamp = int_or_none(self._search_regex( - r'/Date\((\d+)\)/', - video_info['releaseDate'], 'release date', fatal=False), - scale=1000) - artists = video_info.get('mainArtists') - if artists: - artist = uploader = artists[0]['artistName'] - - featured_artists = video_info.get('featuredArtists') - if featured_artists: - featured_artist = featured_artists[0]['artistName'] - - smil_parsed = False - for video_version in video_info['videoVersions']: - version = self._VERSIONS.get(video_version['version']) - if version == 'youtube': - continue - else: - source_type = self._SOURCE_TYPES.get(video_version['sourceType']) - renditions = compat_etree_fromstring(video_version['data']) - if source_type == 'http': - for rend in renditions.findall('rendition'): - attr = rend.attrib - formats.append({ - 'url': attr['url'], - 'format_id': 'http-%s-%s' % (version, attr['name']), - 'height': int_or_none(attr.get('frameheight')), - 'width': int_or_none(attr.get('frameWidth')), - 'tbr': int_or_none(attr.get('totalBitrate')), - 'vbr': int_or_none(attr.get('videoBitrate')), - 'abr': int_or_none(attr.get('audioBitrate')), - 'vcodec': attr.get('videoCodec'), - 'acodec': attr.get('audioCodec'), - }) - elif source_type == 'hls': - formats.extend(self._extract_m3u8_formats( - renditions.find('rendition').attrib['url'], video_id, - 'mp4', 'm3u8_native', m3u8_id='hls-%s' % version, - note='Downloading %s m3u8 information' % version, - errnote='Failed to download %s m3u8 information' % version, - fatal=False)) - elif source_type == 'smil' and version == 'level3' and not smil_parsed: - formats.extend(self._extract_smil_formats( - renditions.find('rendition').attrib['url'], video_id, False)) - smil_parsed = True + formats.append({ + 'url': version_url, + 'format_id': 'http-%s-%s' % (version, video_version['quality']), + 'vcodec': m.group('vcodec'), + 'acodec': m.group('acodec'), + 'vbr': int(m.group('vbr')), + 'abr': int(m.group('abr')), + 'ext': m.group('ext'), + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) self._sort_formats(formats) track = video_info['title'] @@ -376,17 +269,15 @@ class VevoIE(VevoBaseIE): else: age_limit = None - duration = video_info.get('duration') - return { 'id': video_id, 'title': title, 'formats': formats, 'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'), - 'timestamp': timestamp, + 'timestamp': parse_iso8601(video_info.get('releaseDate')), 'uploader': uploader, - 'duration': duration, - 'view_count': view_count, + 'duration': int_or_none(video_info.get('duration')), + 'view_count': int_or_none(video_info.get('views', {}).get('total')), 'age_limit': age_limit, 'track': track, 'artist': uploader, From b3277115a192b88df34692e42f62f39bd4a65bac Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 26 Jan 2017 16:14:42 +0100 Subject: [PATCH 80/93] [disney] Add new extractor(closes #7409)(closes #11801)(#4975)(#11000) --- youtube_dl/extractor/disney.py | 115 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 116 insertions(+) create mode 100644 youtube_dl/extractor/disney.py diff --git a/youtube_dl/extractor/disney.py b/youtube_dl/extractor/disney.py new file mode 100644 index 000000000..396873c6d --- /dev/null +++ b/youtube_dl/extractor/disney.py @@ -0,0 +1,115 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + unified_strdate, + compat_str, + determine_ext, +) + + +class DisneyIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|starwars\.com))/(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})''' + _TESTS = [{ + 'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977', + 'info_dict': { + 'id': '545ed1857afee5a0ec239977', + 'ext': 'mp4', + 'title': 'Moana - Trailer', + 'description': 'A fun adventure for the entire Family! Bring home Moana on Digital HD Feb 21 & Blu-ray March 7', + 'upload_date': '20170112', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2', + 'only_matching': True, + }, { + 'url': 'http://video.en.disneyme.com/watch/future-worm/robo-carp-2001-544b66002aa7353cdd3f5114', + 'only_matching': True, + }, { + 'url': 'http://video.disneyturkiye.com.tr/izle/7c-7-cuceler/kimin-sesi-zaten-5456f3d015f6b36c8afdd0e2', + 'only_matching': True, + }, { + 'url': 'http://disneyjunior.disney.com/embed/546a4798ddba3d1612e4005d', + 'only_matching': True, + }, { + 'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097', + 'only_matching': True, + }] + + def _real_extract(self, url): + domain, video_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage( + 'http://%s/embed/%s' % (domain, video_id), video_id) + video_data = self._parse_json(self._search_regex( + r'Disney\.EmbedVideo=({.+});', webpage, 'embed data'), video_id)['video'] + + for external in video_data.get('externals', []): + if external.get('source') == 'vevo': + return self.url_result('vevo:' + external['data_id'], 'Vevo') + + title = video_data['title'] + + formats = [] + for flavor in video_data.get('flavors', []): + flavor_format = flavor.get('format') + flavor_url = flavor.get('url') + if not flavor_url or not re.match(r'https?://', flavor_url): + continue + tbr = int_or_none(flavor.get('bitrate')) + if tbr == 99999: + formats.extend(self._extract_m3u8_formats( + flavor_url, video_id, 'mp4', m3u8_id=flavor_format, fatal=False)) + continue + format_id = [] + if flavor_format: + format_id.append(flavor_format) + if tbr: + format_id.append(compat_str(tbr)) + ext = determine_ext(flavor_url) + if flavor_format == 'applehttp' or ext == 'm3u8': + ext = 'mp4' + width = int_or_none(flavor.get('width')) + height = int_or_none(flavor.get('height')) + formats.append({ + 'format_id': '-'.join(format_id), + 'url': flavor_url, + 'width': width, + 'height': height, + 'tbr': tbr, + 'ext': ext, + 'vcodec': 'none' if (width == 0 and height == 0) else None, + }) + self._sort_formats(formats) + + subtitles = {} + for caption in video_data.get('captions', []): + caption_url = caption.get('url') + caption_format = caption.get('format') + if not caption_url or caption_format.startswith('unknown'): + continue + subtitles.setdefault(caption.get('language', 'en'), []).append({ + 'url': caption_url, + 'ext': { + 'webvtt': 'vtt', + }.get(caption_format, caption_format), + }) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description') or video_data.get('short_desc'), + 'thumbnail': video_data.get('thumb') or video_data.get('thumb_secure'), + 'duration': int_or_none(video_data.get('duration_sec')), + 'upload_date': unified_strdate(video_data.get('publish_date')), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f09b4cf2c..0c3e081ad 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -251,6 +251,7 @@ from .dumpert import DumpertIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE from .discoverygo import DiscoveryGoIE +from .disney import DisneyIE from .dispeak import DigitallySpeakingIE from .dropbox import DropboxIE from .dw import ( From c19ef77c3138ecf1ce5c988de2d94031f58b4f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20P=C3=B6schel?= <github@basicmaster.de> Date: Wed, 25 Jan 2017 20:44:03 +0100 Subject: [PATCH 81/93] [jamendo] Extract full title --- youtube_dl/extractor/jamendo.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py index 51d19e67d..3db07e79f 100644 --- a/youtube_dl/extractor/jamendo.py +++ b/youtube_dl/extractor/jamendo.py @@ -16,7 +16,7 @@ class JamendoIE(InfoExtractor): 'id': '196219', 'display_id': 'stories-from-emona-i', 'ext': 'flac', - 'title': 'Stories from Emona I', + 'title': 'Maya Filipič - Stories from Emona I', 'thumbnail': r're:^https?://.*\.jpg' } } @@ -28,7 +28,7 @@ class JamendoIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - title = self._html_search_meta('name', webpage, 'title') + title = self._search_regex(r'<title>(.*?)\ \|\ Jamendo\ Music\ .*', webpage, 'title') formats = [{ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' @@ -62,21 +62,21 @@ class JamendoAlbumIE(InfoExtractor): 'url': 'https://www.jamendo.com/album/121486/duck-on-cover', 'info_dict': { 'id': '121486', - 'title': 'Duck On Cover' + 'title': 'Shearer - Duck On Cover' }, 'playlist': [{ 'md5': 'e1a2fcb42bda30dfac990212924149a8', 'info_dict': { 'id': '1032333', 'ext': 'flac', - 'title': 'Warmachine' + 'title': 'Shearer - Warmachine' } }, { 'md5': '1f358d7b2f98edfe90fd55dac0799d50', 'info_dict': { 'id': '1032330', 'ext': 'flac', - 'title': 'Without Your Ghost' + 'title': 'Shearer - Without Your Ghost' } }], 'params': { @@ -90,7 +90,7 @@ class JamendoAlbumIE(InfoExtractor): webpage = self._download_webpage(url, mobj.group('display_id')) - title = self._html_search_meta('name', webpage, 'title') + title = self._search_regex(r'(.*?)\ \|\ Jamendo\ Music\ .*', webpage, 'title') entries = [ self.url_result( From 15846398ca0af9154b88a69f594557568c6a4782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 26 Jan 2017 23:23:08 +0700 Subject: [PATCH 82/93] [utils] Improve parse_duration --- test/test_utils.py | 1 + youtube_dl/utils.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index e99bf794e..a74d59f34 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -510,6 +510,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('1 hour 3 minutes'), 3780) self.assertEqual(parse_duration('87 Min.'), 5220) self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) + self.assertEqual(parse_duration('PT00H03M30SZ'), 210) def test_fix_xml_ampersands(self): self.assertEqual( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 98acc2b45..cf46711b9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1773,7 +1773,7 @@ def parse_duration(s): s = s.strip() days, hours, mins, secs, ms = [None] * 5 - m = re.match(r'(?:(?:(?:(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+)(?P\.[0-9]+)?$', s) + m = re.match(r'(?:(?:(?:(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+)(?P\.[0-9]+)?Z?$', s) if m: days, hours, mins, secs, ms = m.groups() else: @@ -1790,11 +1790,11 @@ def parse_duration(s): )? (?: (?P[0-9]+)(?P\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* - )?$''', s) + )?Z?$''', s) if m: days, hours, mins, secs, ms = m.groups() else: - m = re.match(r'(?i)(?:(?P[0-9.]+)\s*(?:hours?)|(?P[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)$', s) + m = re.match(r'(?i)(?:(?P[0-9.]+)\s*(?:hours?)|(?P[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s) if m: hours, mins = m.groups() else: From 3cbecdd11121b9c7ff0284e481992f7230806399 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 26 Jan 2017 23:25:40 +0700 Subject: [PATCH 83/93] [jamendo] Improve and extract more metadata (closes #11836) --- youtube_dl/extractor/jamendo.py | 65 ++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py index 3db07e79f..595d7a5b7 100644 --- a/youtube_dl/extractor/jamendo.py +++ b/youtube_dl/extractor/jamendo.py @@ -5,9 +5,27 @@ import re from ..compat import compat_urlparse from .common import InfoExtractor +from ..utils import parse_duration -class JamendoIE(InfoExtractor): +class JamendoBaseIE(InfoExtractor): + def _extract_meta(self, webpage, fatal=True): + title = self._og_search_title( + webpage, default=None) or self._search_regex( + r'([^<]+)', webpage, + 'title', default=None) + if title: + title = self._search_regex( + r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None) + if not title: + title = self._html_search_meta( + 'name', webpage, 'title', fatal=fatal) + mobj = re.search(r'(.+) - (.+)', title or '') + artist, second = mobj.groups() if mobj else [None] * 2 + return title, artist, second + + +class JamendoIE(JamendoBaseIE): _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)' _TEST = { 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', @@ -17,6 +35,9 @@ class JamendoIE(InfoExtractor): 'display_id': 'stories-from-emona-i', 'ext': 'flac', 'title': 'Maya Filipič - Stories from Emona I', + 'artist': 'Maya Filipič', + 'track': 'Stories from Emona I', + 'duration': 210, 'thumbnail': r're:^https?://.*\.jpg' } } @@ -28,7 +49,7 @@ class JamendoIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - title = self._search_regex(r'<title>(.*?)\ \|\ Jamendo\ Music\ .*', webpage, 'title') + title, artist, track = self._extract_meta(webpage) formats = [{ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' @@ -46,17 +67,23 @@ class JamendoIE(InfoExtractor): thumbnail = self._html_search_meta( 'image', webpage, 'thumbnail', fatal=False) + duration = parse_duration(self._search_regex( + r']+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']', + webpage, 'duration', fatal=False)) return { 'id': track_id, 'display_id': display_id, 'thumbnail': thumbnail, 'title': title, + 'duration': duration, + 'artist': artist, + 'track': track, 'formats': formats } -class JamendoAlbumIE(InfoExtractor): +class JamendoAlbumIE(JamendoBaseIE): _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P[0-9]+)/(?P[\w-]+)' _TEST = { 'url': 'https://www.jamendo.com/album/121486/duck-on-cover', @@ -69,14 +96,18 @@ class JamendoAlbumIE(InfoExtractor): 'info_dict': { 'id': '1032333', 'ext': 'flac', - 'title': 'Shearer - Warmachine' + 'title': 'Shearer - Warmachine', + 'artist': 'Shearer', + 'track': 'Warmachine', } }, { 'md5': '1f358d7b2f98edfe90fd55dac0799d50', 'info_dict': { 'id': '1032330', 'ext': 'flac', - 'title': 'Shearer - Without Your Ghost' + 'title': 'Shearer - Without Your Ghost', + 'artist': 'Shearer', + 'track': 'Without Your Ghost', } }], 'params': { @@ -90,18 +121,18 @@ class JamendoAlbumIE(InfoExtractor): webpage = self._download_webpage(url, mobj.group('display_id')) - title = self._search_regex(r'(.*?)\ \|\ Jamendo\ Music\ .*', webpage, 'title') + title, artist, album = self._extract_meta(webpage, fatal=False) - entries = [ - self.url_result( - compat_urlparse.urljoin(url, m.group('path')), - ie=JamendoIE.ie_key(), - video_id=self._search_regex( - r'/track/(\d+)', m.group('path'), - 'track id', default=None)) - for m in re.finditer( - r']+href=(["\'])(?P(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link', - webpage) - ] + entries = [{ + '_type': 'url_transparent', + 'url': compat_urlparse.urljoin(url, m.group('path')), + 'ie_key': JamendoIE.ie_key(), + 'id': self._search_regex( + r'/track/(\d+)', m.group('path'), 'track id', default=None), + 'artist': artist, + 'album': album, + } for m in re.finditer( + r']+href=(["\'])(?P(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link', + webpage)] return self.playlist_result(entries, album_id, title) From 9463637887ba784e3499410ab0945dcd68002bc1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 26 Jan 2017 18:36:28 +0100 Subject: [PATCH 84/93] [tva] Add new extractor(closes #11842) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tva.py | 54 ++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 youtube_dl/extractor/tva.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0c3e081ad..81366f933 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -985,6 +985,7 @@ from .tv2 import ( ) from .tv3 import TV3IE from .tv4 import TV4IE +from .tva import TVAIE from .tvanouvelles import ( TVANouvellesIE, TVANouvellesArticleIE, diff --git a/youtube_dl/extractor/tva.py b/youtube_dl/extractor/tva.py new file mode 100644 index 000000000..3ced098f9 --- /dev/null +++ b/youtube_dl/extractor/tva.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, + smuggle_url, +) + + +class TVAIE(InfoExtractor): + _VALID_URL = r'https?://videos\.tva\.ca/episode/(?P\d+)' + _TEST = { + 'url': 'http://videos.tva.ca/episode/85538', + 'info_dict': { + 'id': '85538', + 'ext': 'mp4', + 'title': 'Épisode du 25 janvier 2017', + 'description': 'md5:e9e7fb5532ab37984d2dc87229cadf98', + 'upload_date': '20170126', + 'timestamp': 1485442329, + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + "https://d18jmrhziuoi7p.cloudfront.net/isl/api/v1/dataservice/Items('%s')" % video_id, + video_id, query={ + '$expand': 'Metadata,CustomId', + '$select': 'Metadata,Id,Title,ShortDescription,LongDescription,CreatedDate,CustomId,AverageUserRating,Categories,ShowName', + '$format': 'json', + }) + metadata = video_data.get('Metadata', {}) + + return { + '_type': 'url_transparent', + 'id': video_id, + 'title': video_data['Title'], + 'url': smuggle_url('ooyala:' + video_data['CustomId'], {'supportedformats': 'm3u8,hds'}), + 'description': video_data.get('LongDescription') or video_data.get('ShortDescription'), + 'series': video_data.get('ShowName'), + 'episode': metadata.get('EpisodeTitle'), + 'episode_number': int_or_none(metadata.get('EpisodeNumber')), + 'categories': video_data.get('Categories'), + 'average_rating': video_data.get('AverageUserRating'), + 'timestamp': parse_iso8601(video_data.get('CreatedDate')), + 'ie_key': 'Ooyala', + } From b51a4ebed45a3944c02bb3c36778630fd9306de7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 26 Jan 2017 19:15:43 +0100 Subject: [PATCH 85/93] [aenetworks] fix season episodes extraction(fixes #11669) --- youtube_dl/extractor/aenetworks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index c5e079a40..c97317400 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -87,7 +87,7 @@ class AENetworksIE(AENetworksBaseIE): self._html_search_meta('aetn:SeriesTitle', webpage)) elif url_parts_len == 2: entries = [] - for episode_item in re.findall(r'(?s)]+class="[^"]*episode-item[^"]*"[^>]*>', webpage): + for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage): episode_attributes = extract_attributes(episode_item) episode_url = compat_urlparse.urljoin( url, episode_attributes['data-canonical']) From 0b23c222ba099d73c287d024f45f90714c15f289 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 21:31:26 +0700 Subject: [PATCH 86/93] [twitch:vod] Expand _VALID_URL (closes #11846) --- youtube_dl/extractor/twitch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 6d67bda86..1ca159a4d 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -209,7 +209,7 @@ class TwitchVodIE(TwitchItemBaseIE): _VALID_URL = r'''(?x) https?:// (?: - (?:www\.)?twitch\.tv/[^/]+/v/| + (?:www\.)?twitch\.tv/(?:[^/]+/v|videos)/| player\.twitch\.tv/\?.*?\bvideo=v ) (?P\d+) @@ -259,6 +259,9 @@ class TwitchVodIE(TwitchItemBaseIE): }, { 'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877', 'only_matching': True, + }, { + 'url': 'https://www.twitch.tv/videos/6528877', + 'only_matching': True, }] def _real_extract(self, url): From 489ffc118232056537e86bd0281488e217fce7d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 22:55:42 +0700 Subject: [PATCH 87/93] [soundcloud] Fix track URL extraction (closes #11852) --- youtube_dl/extractor/soundcloud.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 5a201eaa8..96bebeec5 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -173,11 +173,12 @@ class SoundcloudIE(InfoExtractor): }) # We have to retrieve the url - streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?' - 'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token)) format_dict = self._download_json( - streams_url, - track_id, 'Downloading track url') + 'http://api.soundcloud.com/i1/tracks/%s/streams' % track_id, + track_id, 'Downloading track url', query={ + 'client_id': self._CLIENT_ID, + 'secret_token': secret_token, + }) for key, stream_url in format_dict.items(): if key.startswith('http'): From 9b73471801d24cec678226c82cce9e9ece92732e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 23:08:32 +0700 Subject: [PATCH 88/93] [soundcloud] Extract hls formats --- youtube_dl/extractor/soundcloud.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 96bebeec5..55c80e1cc 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -198,6 +198,13 @@ class SoundcloudIE(InfoExtractor): 'ext': 'flv', 'vcodec': 'none', }) + elif key.startswith('hls'): + m3u8_formats = self._extract_m3u8_formats( + stream_url, track_id, 'mp3', entry_protocol='m3u8_native', + m3u8_id=key, fatal=False) + for f in m3u8_formats: + f['vcodec'] = 'none' + formats.extend(m3u8_formats) if not formats: # We fallback to the stream_url in the original info, this From 3a194cb4ecfa8c2590f22236dffc84e1b1565196 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 23:16:30 +0700 Subject: [PATCH 89/93] [soundcloud] Improve formats extraction and extract audio bitrate --- youtube_dl/extractor/soundcloud.py | 48 +++++++++++++++--------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 55c80e1cc..b3aa4ce26 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -181,46 +181,46 @@ class SoundcloudIE(InfoExtractor): }) for key, stream_url in format_dict.items(): + abr = int_or_none(self._search_regex( + r'_(\d+)_url', key, 'audio bitrate', default=None)) if key.startswith('http'): - formats.append({ + stream_formats = [{ 'format_id': key, 'ext': ext, 'url': stream_url, - 'vcodec': 'none', - }) + }] elif key.startswith('rtmp'): # The url doesn't have an rtmp app, we have to extract the playpath url, path = stream_url.split('mp3:', 1) - formats.append({ + stream_formats = [{ 'format_id': key, 'url': url, 'play_path': 'mp3:' + path, 'ext': 'flv', - 'vcodec': 'none', - }) + }] elif key.startswith('hls'): - m3u8_formats = self._extract_m3u8_formats( + stream_formats = self._extract_m3u8_formats( stream_url, track_id, 'mp3', entry_protocol='m3u8_native', m3u8_id=key, fatal=False) - for f in m3u8_formats: - f['vcodec'] = 'none' - formats.extend(m3u8_formats) + else: + continue - if not formats: - # We fallback to the stream_url in the original info, this - # cannot be always used, sometimes it can give an HTTP 404 error - formats.append({ - 'format_id': 'fallback', - 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID, - 'ext': ext, - 'vcodec': 'none', - }) + for f in stream_formats: + f['abr'] = abr - for f in formats: - if f['format_id'].startswith('http'): - f['protocol'] = 'http' - if f['format_id'].startswith('rtmp'): - f['protocol'] = 'rtmp' + formats.extend(stream_formats) + + if not formats: + # We fallback to the stream_url in the original info, this + # cannot be always used, sometimes it can give an HTTP 404 error + formats.append({ + 'format_id': 'fallback', + 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID, + 'ext': ext, + }) + + for f in formats: + f['vcodec'] = 'none' self._check_formats(formats, track_id) self._sort_formats(formats) From e0b6e50ccd124c6f618bf25bc94361d83cbc8b86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 23:55:55 +0700 Subject: [PATCH 90/93] [crunchyroll] Improve series and season metadata extraction (closes #11832) --- youtube_dl/extractor/crunchyroll.py | 38 ++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 559044352..f811c7f33 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -166,6 +166,25 @@ class CrunchyrollIE(CrunchyrollBaseIE): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589', + 'info_dict': { + 'id': '727589', + 'ext': 'mp4', + 'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance from this Judicial Injustice!", + 'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Kadokawa Pictures Inc.', + 'upload_date': '20170118', + 'series': "KONOSUBA -God's blessing on this wonderful world!", + 'season_number': 2, + 'episode': 'Give Me Deliverance from this Judicial Injustice!', + 'episode_number': 1, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697', 'only_matching': True, @@ -439,6 +458,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text subtitles = self.extract_subtitles(video_id, webpage) + # webpage provide more accurate data than series_title from XML + series = self._html_search_regex( + r'id=["\']showmedia_about_episode_num[^>]+>\s*]+>([^<]+)', + webpage, 'series', default=xpath_text(metadata, 'series_title')) + + episode = xpath_text(metadata, 'episode_title') + episode_number = int_or_none(xpath_text(metadata, 'episode_number')) + + season_number = int_or_none(self._search_regex( + r'(?s)]+id=["\']showmedia_about_episode_num[^>]+>.+?\s*

\s*Season (\d+)', + webpage, 'season number', default=None)) + return { 'id': video_id, 'title': video_title, @@ -446,9 +477,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'thumbnail': xpath_text(metadata, 'episode_image_url'), 'uploader': video_uploader, 'upload_date': video_upload_date, - 'series': xpath_text(metadata, 'series_title'), - 'episode': xpath_text(metadata, 'episode_title'), - 'episode_number': int_or_none(xpath_text(metadata, 'episode_number')), + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, 'subtitles': subtitles, 'formats': formats, } From 815d2a36d81c4cc6181d0536ce811b0e2e4a5021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 00:03:21 +0700 Subject: [PATCH 91/93] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index ff305d7e8..2c670c62e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +version + +Core +* [utils] Improve parse_duration + +Extractors +* [crunchyroll] Improve series and season metadata extraction (#11832) +* [soundcloud] Improve formats extraction and extract audio bitrate ++ [soundcloud] Extract HLS formats +* [soundcloud] Fix track URL extraction (#11852) ++ [twitch:vod] Expand URL regular expressions (#11846) +* [aenetworks] Fix season episodes extraction (#11669) ++ [tva] Add support for videos.tva.ca (#11842) +* [jamendo] Improve and extract more metadata (#11836) ++ [disney] Add support for Disney sites (#7409, #11801, #4975, #11000) +* [vevo] Remove request to old API and catch API v2 errors ++ [cmt,mtv,southpark] Add support for episode URLs (#11837) ++ [youtube] Add fallback for duration extraction (#11841) + + version 2017.01.25 Extractors From d41ed6d243c2079db123963a7f65e91f24b390f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 00:33:55 +0700 Subject: [PATCH 92/93] release 2017.01.28 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 4d409f785..693f3b745 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.25*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.25** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.28** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.25 +[debug] youtube-dl version 2017.01.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 2c670c62e..8e5a04b42 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.28 Core * [utils] Improve parse_duration diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f640cfcaa..6318a862f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -202,6 +202,7 @@ - **Digiteka** - **Discovery** - **DiscoveryGo** + - **Disney** - **Dotsub** - **DouyuTV**: 斗鱼 - **DPlay** @@ -785,6 +786,7 @@ - **TV2Article** - **TV3** - **TV4**: tv4.se and tv4play.se + - **TVA** - **TVANouvelles** - **TVANouvellesArticle** - **TVC** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c23fe85de..c22c410a8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.25' +__version__ = '2017.01.28' From 99a0baf370c7652f6103cff71f878872229b4129 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Tue, 24 Jan 2017 17:42:00 +0100 Subject: [PATCH 93/93] [konserthusetplay] Add support for rspoplay.se --- youtube_dl/extractor/konserthusetplay.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/konserthusetplay.py b/youtube_dl/extractor/konserthusetplay.py index 3ae2aa317..c11cbcf47 100644 --- a/youtube_dl/extractor/konserthusetplay.py +++ b/youtube_dl/extractor/konserthusetplay.py @@ -11,22 +11,22 @@ from ..utils import ( class KonserthusetPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?.*\bm=(?P[^&]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?(?:konserthusetplay|rspoplay)\.se/\?.*\bm=(?P[^&]+)' + _TESTS = [{ 'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A', + 'md5': 'e3fd47bf44e864bd23c08e487abe1967', 'info_dict': { 'id': 'CKDDnlCY-dhWAAqiMERd-A', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Orkesterns instrument: Valthornen', 'description': 'md5:f10e1f0030202020396a4d712d2fa827', 'thumbnail': 're:^https?://.*$', - 'duration': 398.8, + 'duration': 398.76, }, - 'params': { - # rtmp download - 'skip_download': True, - }, - } + }, { + 'url': 'http://rspoplay.se/?m=elWuEH34SMKvaO4wO_cHBw', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url)