From 21160a179256cfbb859948138905ffe67cb970a8 Mon Sep 17 00:00:00 2001 From: Alexander Seiler Date: Sun, 23 Sep 2018 16:34:47 +0200 Subject: [PATCH 01/32] [zattoo] Fix extraction (closes #17175) --- youtube_dl/extractor/zattoo.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index fb167c198..9c9024799 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -93,28 +93,30 @@ class ZattooBaseIE(InfoExtractor): def _extract_cid_and_video_info(self, video_id): data = self._download_json( - '%s/zapi/program/details' % self._HOST_URL, + '%s/zapi/v2/cached/program/power_details/%s' % ( + self._HOST_URL, self._power_guide_hash), video_id, 'Downloading video information', query={ - 'program_id': video_id, - 'complete': True + 'program_ids': video_id, + 'complete': True, }) - p = data['program'] + p = data['programs'][0] cid = p['cid'] info_dict = { 'id': video_id, - 'title': p.get('title') or p['episode_title'], - 'description': p.get('description'), - 'thumbnail': p.get('image_url'), + 'title': p.get('t') or p['et'], + 'description': p.get('d'), + 'thumbnail': p.get('i_url'), 'creator': p.get('channel_name'), - 'episode': p.get('episode_title'), - 'episode_number': int_or_none(p.get('episode_number')), - 'season_number': int_or_none(p.get('season_number')), + 'episode': p.get('et'), + 'episode_number': int_or_none(p.get('e_no')), + 'season_number': int_or_none(p.get('s_no')), 'release_year': int_or_none(p.get('year')), - 'categories': try_get(p, lambda x: x['categories'], list), + 'categories': try_get(p, lambda x: x['c'], list), + 'tags': try_get(p, lambda x: x['g'], list) } return cid, info_dict From f6d7f7b474075955afb02c373b8d2a442faaa0a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Sep 2018 23:30:18 +0700 Subject: [PATCH 02/32] [zattoo] Add support for more zattoo platform sites --- youtube_dl/extractor/extractors.py | 12 ++ youtube_dl/extractor/zattoo.py | 178 ++++++++++++++++++++++++++--- 2 files changed, 172 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7dc569724..464c8d690 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1455,8 +1455,20 @@ from .youtube import ( from .zapiks import ZapiksIE from .zaq1 import Zaq1IE from .zattoo import ( + BBVTVIE, + EinsUndEinsTVIE, + EWETVIE, + GlattvisionTVIE, + MNetTVIE, + MyVisionTVIE, + NetPlusIE, + OsnatelTVIE, + QuantumTVIE, QuicklineIE, QuicklineLiveIE, + SAKTVIE, + VTXTVIE, + WalyTVIE, ZattooIE, ZattooLiveIE, ) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index 9c9024799..bbe0aecb6 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -18,12 +18,12 @@ from ..utils import ( ) -class ZattooBaseIE(InfoExtractor): - _NETRC_MACHINE = 'zattoo' - _HOST_URL = 'https://zattoo.com' - +class ZattooPlatformBaseIE(InfoExtractor): _power_guide_hash = None + def _host_url(self): + return 'https://%s' % self._HOST + def _login(self): username, password = self._get_login_info() if not username or not password: @@ -33,13 +33,13 @@ class ZattooBaseIE(InfoExtractor): try: data = self._download_json( - '%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in', + '%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in', data=urlencode_postdata({ 'login': username, 'password': password, 'remember': 'true', }), headers={ - 'Referer': '%s/login' % self._HOST_URL, + 'Referer': '%s/login' % self._host_url(), 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', }) except ExtractorError as e: @@ -53,7 +53,7 @@ class ZattooBaseIE(InfoExtractor): def _real_initialize(self): webpage = self._download_webpage( - self._HOST_URL, None, 'Downloading app token') + self._host_url(), None, 'Downloading app token') app_token = self._html_search_regex( r'appToken\s*=\s*(["\'])(?P(?:(?!\1).)+?)\1', webpage, 'app token', group='token') @@ -62,7 +62,7 @@ class ZattooBaseIE(InfoExtractor): # Will setup appropriate cookies self._request_webpage( - '%s/zapi/v2/session/hello' % self._HOST_URL, None, + '%s/zapi/v2/session/hello' % self._host_url(), None, 'Opening session', data=urlencode_postdata({ 'client_app_token': app_token, 'uuid': compat_str(uuid4()), @@ -75,7 +75,7 @@ class ZattooBaseIE(InfoExtractor): def _extract_cid(self, video_id, channel_name): channel_groups = self._download_json( - '%s/zapi/v2/cached/channels/%s' % (self._HOST_URL, + '%s/zapi/v2/cached/channels/%s' % (self._host_url(), self._power_guide_hash), video_id, 'Downloading channel list', query={'details': False})['channel_groups'] @@ -94,7 +94,7 @@ class ZattooBaseIE(InfoExtractor): def _extract_cid_and_video_info(self, video_id): data = self._download_json( '%s/zapi/v2/cached/program/power_details/%s' % ( - self._HOST_URL, self._power_guide_hash), + self._host_url(), self._power_guide_hash), video_id, 'Downloading video information', query={ @@ -128,11 +128,11 @@ class ZattooBaseIE(InfoExtractor): if is_live: postdata_common.update({'timeshift': 10800}) - url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid) + url = '%s/zapi/watch/live/%s' % (self._host_url(), cid) elif record_id: - url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id) + url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id) else: - url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id) + url = '%s/zapi/watch/recall/%s/%s' % (self._host_url(), cid, video_id) formats = [] for stream_type in ('dash', 'hls', 'hls5', 'hds'): @@ -203,13 +203,13 @@ class ZattooBaseIE(InfoExtractor): return info_dict -class QuicklineBaseIE(ZattooBaseIE): +class QuicklineBaseIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'quickline' - _HOST_URL = 'https://mobiltv.quickline.com' + _HOST = 'mobiltv.quickline.com' class QuicklineIE(QuicklineBaseIE): - _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P[^/]+)/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P[^/]+)/(?P[0-9]+)' % re.escape(QuicklineBaseIE._HOST) _TEST = { 'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste', @@ -222,7 +222,7 @@ class QuicklineIE(QuicklineBaseIE): class QuicklineLiveIE(QuicklineBaseIE): - _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P[^/]+)' % re.escape(QuicklineBaseIE._HOST) _TEST = { 'url': 'https://mobiltv.quickline.com/watch/srf1', @@ -238,8 +238,18 @@ class QuicklineLiveIE(QuicklineBaseIE): return self._extract_video(channel_name, video_id, is_live=True) +class ZattooBaseIE(ZattooPlatformBaseIE): + _NETRC_MACHINE = 'zattoo' + _HOST = 'zattoo.com' + + +def _make_valid_url(tmpl, host): + return tmpl % re.escape(host) + + class ZattooIE(ZattooBaseIE): - _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P[^/]+?)/(?P[0-9]+)[^/]+(?:/(?P[0-9]+))?' + _VALID_URL_TEMPLATE = r'https?://(?:www\.)?%s/watch/(?P[^/]+?)/(?P[0-9]+)[^/]+(?:/(?P[0-9]+))?' + _VALID_URL = _make_valid_url(_VALID_URL_TEMPLATE, ZattooBaseIE._HOST) # Since regular videos are only available for 7 days and recorded videos # are only available for a specific user, we cannot have detailed tests. @@ -271,3 +281,135 @@ class ZattooLiveIE(ZattooBaseIE): def _real_extract(self, url): channel_name = video_id = self._match_id(url) return self._extract_video(channel_name, video_id, is_live=True) + + +class NetPlusIE(ZattooIE): + _NETRC_MACHINE = 'netplus' + _HOST = 'netplus.tv' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.netplus.tv/watch/abc/123-abc', + 'only_matching': True, + }] + + +class MNetTVIE(ZattooIE): + _NETRC_MACHINE = 'mnettv' + _HOST = 'tvplus.m-net.de' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.tvplus.m-net.de/watch/abc/123-abc', + 'only_matching': True, + }] + + +class WalyTVIE(ZattooIE): + _NETRC_MACHINE = 'walytv' + _HOST = 'player.waly.tv' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.player.waly.tv/watch/abc/123-abc', + 'only_matching': True, + }] + + +class BBVTVIE(ZattooIE): + _NETRC_MACHINE = 'bbvtv' + _HOST = 'bbv-tv.net' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.bbv-tv.net/watch/abc/123-abc', + 'only_matching': True, + }] + + +class VTXTVIE(ZattooIE): + _NETRC_MACHINE = 'vtxtv' + _HOST = 'vtxtv.ch' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.vtxtv.ch/watch/abc/123-abc', + 'only_matching': True, + }] + + +class MyVisionTVIE(ZattooIE): + _NETRC_MACHINE = 'myvisiontv' + _HOST = 'myvisiontv.ch' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.myvisiontv.ch/watch/abc/123-abc', + 'only_matching': True, + }] + + +class GlattvisionTVIE(ZattooIE): + _NETRC_MACHINE = 'glattvisiontv' + _HOST = 'iptv.glattvision.ch' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.iptv.glattvision.ch/watch/abc/123-abc', + 'only_matching': True, + }] + + +class SAKTVIE(ZattooIE): + _NETRC_MACHINE = 'saktv' + _HOST = 'saktv.ch' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.saktv.ch/watch/abc/123-abc', + 'only_matching': True, + }] + + +class EWETVIE(ZattooIE): + _NETRC_MACHINE = 'ewetv' + _HOST = 'tvonline.ewe.de' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.tvonline.ewe.de/watch/abc/123-abc', + 'only_matching': True, + }] + + +class QuantumTVIE(ZattooIE): + _NETRC_MACHINE = 'quantumtv' + _HOST = 'quantum-tv.com' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.quantum-tv.com/watch/abc/123-abc', + 'only_matching': True, + }] + + +class OsnatelTVIE(ZattooIE): + _NETRC_MACHINE = 'osnateltv' + _HOST = 'onlinetv.osnatel.de' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.onlinetv.osnatel.de/watch/abc/123-abc', + 'only_matching': True, + }] + + +class EinsUndEinsTVIE(ZattooIE): + _NETRC_MACHINE = '1und1tv' + _HOST = '1und1.tv' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.1und1.tv/watch/abc/123-abc', + 'only_matching': True, + }] From cd5a74a28e7102e877d00b1e3cffba82d28b2f2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 24 Sep 2018 00:14:49 +0700 Subject: [PATCH 03/32] [youtube] Add support for invidio.us (closes #17613) --- youtube_dl/extractor/youtube.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e80e36f98..78203ef84 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -349,6 +349,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?hooktube\.com/| (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| + (?:www\.)?invidio\.us/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: @@ -1068,6 +1069,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM', 'only_matching': True, }, + { + 'url': 'https://invidio.us/watch?v=BaW_jenozKc', + 'only_matching': True, + }, ] def __init__(self, *args, **kwargs): @@ -2419,7 +2424,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): IE_DESC = 'YouTube.com channels' - _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P[0-9A-Za-z_-]+)' + _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P[0-9A-Za-z_-]+)' _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos' _VIDEO_RE = r'(?:title="(?P[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?' IE_NAME = 'youtube:channel' @@ -2440,6 +2445,9 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'id': 'UUs0ifCMCm1icqRbqhUINa0w', 'title': 'Uploads from Deus Ex', }, + }, { + 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA', + 'only_matching': True, }] @classmethod From 60ce0c67fd1ef71463af2c036bbabf06ec26bd98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 25 Sep 2018 23:43:41 +0700 Subject: [PATCH 04/32] [README.md] Document channel meta fields for output template --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index dd068a462..fdd115c9b 100644 --- a/README.md +++ b/README.md @@ -511,6 +511,8 @@ The basic usage is not to set any template arguments when downloading a single f - `timestamp` (numeric): UNIX timestamp of the moment the video became available - `upload_date` (string): Video upload date (YYYYMMDD) - `uploader_id` (string): Nickname or id of the video uploader + - `channel` (string): Full name of the channel the video is uploaded on + - `channel_id` (string): Id of the channel - `location` (string): Physical location where the video was filmed - `duration` (numeric): Length of the video in seconds - `view_count` (numeric): How many users have watched the video on the platform From 8fd12a083131550476fb771c180a0734794d0b9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 26 Sep 2018 05:38:41 +0700 Subject: [PATCH 05/32] [mediaset] Improve embed support (closes #17668) --- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/mediaset.py | 38 +++++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 76ef01332..2a48667f0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -3023,7 +3023,7 @@ class GenericIE(InfoExtractor): wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key()) # Look for Mediaset embeds - mediaset_urls = MediasetIE._extract_urls(webpage) + mediaset_urls = MediasetIE._extract_urls(self, webpage) if mediaset_urls: return self.playlist_from_matches( mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key()) diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py index 57f97409d..df3748798 100644 --- a/youtube_dl/extractor/mediaset.py +++ b/youtube_dl/extractor/mediaset.py @@ -4,6 +4,11 @@ from __future__ import unicode_literals import re from .theplatform import ThePlatformBaseIE +from ..compat import ( + compat_parse_qs, + compat_str, + compat_urllib_parse_urlparse, +) from ..utils import ( ExtractorError, int_or_none, @@ -76,12 +81,33 @@ class MediasetIE(ThePlatformBaseIE): }] @staticmethod - def _extract_urls(webpage): - return [ - mobj.group('url') - for mobj in re.finditer( - r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1', - webpage)] + def _extract_urls(ie, webpage): + def _qs(url): + return compat_parse_qs(compat_urllib_parse_urlparse(url).query) + + def _program_guid(qs): + return qs.get('programGuid', [None])[0] + + entries = [] + for mobj in re.finditer( + r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1', + webpage): + embed_url = mobj.group('url') + embed_qs = _qs(embed_url) + program_guid = _program_guid(embed_qs) + if program_guid: + entries.append(embed_url) + continue + video_id = embed_qs.get('id', [None])[0] + if not video_id: + continue + urlh = ie._request_webpage( + embed_url, video_id, note='Following embed URL redirect') + embed_url = compat_str(urlh.geturl()) + program_guid = _program_guid(_qs(embed_url)) + if program_guid: + entries.append(embed_url) + return entries def _real_extract(self, url): guid = self._match_id(url) From c17e100b96e3a1c1d2115226c9a2f166c8338031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 26 Sep 2018 09:27:40 +0700 Subject: [PATCH 06/32] [pluralsight] Fix subtitles extraction (closes #17671) --- youtube_dl/extractor/pluralsight.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 1257841e4..ec67381bb 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -213,7 +213,7 @@ query viewClip { def _get_subtitles(self, author, clip_idx, lang, name, duration, video_id): captions_post = { 'a': author, - 'cn': clip_idx, + 'cn': int(clip_idx), 'lc': lang, 'm': name, } From 3d3499742c88ce89de5b49dff4e96483b089fcaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 26 Sep 2018 11:56:15 +0700 Subject: [PATCH 07/32] [ChangeLog] Actualize [ci skip] --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index 800ece790..15b875297 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +version <unreleased> + +Extractors +* [pluralsight] Fix subtitles extraction (#17671) +* [mediaset] Improve embed support (#17668) ++ [youtube] Add support for invidio.us (#17613) ++ [zattoo] Add support for more zattoo platform sites +* [zattoo] Fix extraction (#17175, #17542) + + version 2018.09.18 Core From 4c89a675dd50429a1a5f5903e624db0204d2435a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 26 Sep 2018 11:58:25 +0700 Subject: [PATCH 08/32] release 2018.09.26 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 12 ++++++++++++ youtube_dl/version.py | 2 +- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a4602287a..ed3e0a157 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.26** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.09.18 +[debug] youtube-dl version 2018.09.26 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 15b875297..241712037 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2018.09.26 Extractors * [pluralsight] Fix subtitles extraction (#17671) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9b8601751..736ab6da7 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -98,6 +98,7 @@ - **bbc.co.uk:article**: BBC articles - **bbc.co.uk:iplayer:playlist** - **bbc.co.uk:playlist** + - **BBVTV** - **Beatport** - **Beeg** - **BehindKink** @@ -251,6 +252,7 @@ - **egghead:course**: egghead.io course - **egghead:lesson**: egghead.io lesson - **eHow** + - **EinsUndEinsTV** - **Einthusan** - **eitb.tv** - **EllenTube** @@ -268,6 +270,7 @@ - **EsriVideo** - **Europa** - **EveryonesMixtape** + - **EWETV** - **ExpoTV** - **Expressen** - **ExtremeTube** @@ -327,6 +330,7 @@ - **Gfycat** - **GiantBomb** - **Giga** + - **GlattvisionTV** - **Glide**: Glide mobile video messages (glide.me) - **Globo** - **GloboArticle** @@ -494,6 +498,7 @@ - **Mixer:vod** - **MLB** - **Mnet** + - **MNetTV** - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **Mofosex** - **Mojvideo** @@ -525,6 +530,7 @@ - **Myvi** - **MyVidster** - **MyviEmbed** + - **MyVisionTV** - **n-tv.de** - **natgeo** - **natgeo:episodeguide** @@ -550,6 +556,7 @@ - **netease:program**: 网易云音乐 - 电台节目 - **netease:singer**: 网易云音乐 - 歌手 - **netease:song**: 网易云音乐 + - **NetPlus** - **Netzkino** - **Newgrounds** - **NewgroundsPlaylist** @@ -626,6 +633,7 @@ - **orf:iptv**: iptv.ORF.at - **orf:oe1**: Radio Österreich 1 - **orf:tvthek**: ORF TVthek + - **OsnatelTV** - **PacktPub** - **PacktPubCourse** - **PandaTV**: 熊猫TV @@ -686,6 +694,7 @@ - **qqmusic:playlist**: QQ音乐 - 歌单 - **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:toplist**: QQ音乐 - 排行榜 + - **QuantumTV** - **Quickline** - **QuicklineLive** - **R7** @@ -753,6 +762,7 @@ - **safari**: safaribooksonline.com online video - **safari:api** - **safari:course**: safaribooksonline.com online courses + - **SAKTV** - **Sapo**: SAPO Vídeos - **savefrom.net** - **SBS**: sbs.com.au @@ -1035,12 +1045,14 @@ - **vrv** - **vrv:series** - **VShare** + - **VTXTV** - **vube**: Vube.com - **VuClip** - **VVVVID** - **VyboryMos** - **Vzaar** - **Walla** + - **WalyTV** - **washingtonpost** - **washingtonpost:article** - **wat.tv** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2b3b584a4..6f2cc31df 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.09.18' +__version__ = '2018.09.26' From 85cd69adcb41454f30d4db85eeb4d5cc26b6bb5d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 26 Sep 2018 08:13:16 +0100 Subject: [PATCH 09/32] [hotstar] fix extraction(closes #14694)(closes #14931)(closes #17637) --- youtube_dl/extractor/hotstar.py | 159 +++++++++++++++----------------- 1 file changed, 73 insertions(+), 86 deletions(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index d28af36ec..354ac00dc 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -1,10 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import hashlib +import hmac +import time from .common import InfoExtractor -from ..compat import compat_str +from ..compat import compat_HTTPError from ..utils import ( determine_ext, ExtractorError, @@ -13,37 +15,40 @@ from ..utils import ( class HotStarBaseIE(InfoExtractor): - _GEO_COUNTRIES = ['IN'] + _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' - def _download_json(self, *args, **kwargs): - response = super(HotStarBaseIE, self)._download_json(*args, **kwargs) - if response['resultCode'] != 'OK': - if kwargs.get('fatal'): - raise ExtractorError( - response['errorDescription'], expected=True) - return None - return response['resultObj'] - - def _download_content_info(self, content_id): - return self._download_json( - 'https://account.hotstar.com/AVS/besc', content_id, query={ - 'action': 'GetAggregatedContentDetails', - 'appVersion': '5.0.40', - 'channel': 'PCTV', - 'contentId': content_id, - })['contentInfo'][0] + def _call_api(self, path, video_id, query_name='contentId'): + st = int(time.time()) + exp = st + 6000 + auth = 'st=%d~exp=%d~acl=/*' % (st, exp) + auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() + response = self._download_json( + 'https://api.hotstar.com/' + path, + video_id, headers={ + 'hotstarauth': auth, + 'x-country-code': 'IN', + 'x-platform-code': 'JIO', + }, query={ + query_name: video_id, + 'tas': 10000, + }) + if response['statusCode'] != 'OK': + raise ExtractorError( + response['body']['message'], expected=True) + return response['body']['results'] class HotStarIE(HotStarBaseIE): + IE_NAME = 'hotstar' _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})' _TESTS = [{ - 'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273', + 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273', 'info_dict': { 'id': '1000076273', 'ext': 'mp4', - 'title': 'On Air With AIB', + 'title': 'Can You Not Spread Rumours?', 'description': 'md5:c957d8868e9bc793ccb813691cc4c434', - 'timestamp': 1447227000, + 'timestamp': 1447248600, 'upload_date': '20151111', 'duration': 381, }, @@ -58,47 +63,43 @@ class HotStarIE(HotStarBaseIE): 'url': 'http://www.hotstar.com/1000000515', 'only_matching': True, }] + _GEO_BYPASS = False def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_content_info(video_id) + webpage = self._download_webpage(url, video_id) + app_state = self._parse_json(self._search_regex( + r'<script>window\.APP_STATE\s*=\s*({.+?})</script>', + webpage, 'app state'), video_id) + video_data = list(app_state.values())[0]['initialState']['contentData']['content'] - title = video_data['episodeTitle'] + title = video_data['title'] - if video_data.get('encrypted') == 'Y': + if video_data.get('drmProtected'): raise ExtractorError('This video is DRM protected.', expected=True) formats = [] - for f in ('JIO',): - format_data = self._download_json( - 'http://getcdn.hotstar.com/AVS/besc', - video_id, 'Downloading %s JSON metadata' % f, - fatal=False, query={ - 'action': 'GetCDN', - 'asJson': 'Y', - 'channel': f, - 'id': video_id, - 'type': 'VOD', - }) - if format_data: - format_url = format_data.get('src') - if not format_url: - continue - ext = determine_ext(format_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', - m3u8_id='hls', fatal=False)) - elif ext == 'f4m': - # produce broken files - continue - else: - formats.append({ - 'url': format_url, - 'width': int_or_none(format_data.get('width')), - 'height': int_or_none(format_data.get('height')), - }) + format_data = self._call_api('h/v1/play', video_id)['item'] + format_url = format_data['playbackUrl'] + ext = determine_ext(format_url) + if ext == 'm3u8': + try: + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id='hls')) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + self.raise_geo_restricted(countries=['IN']) + raise + elif ext == 'f4m': + # produce broken files + pass + else: + formats.append({ + 'url': format_url, + 'width': int_or_none(format_data.get('width')), + 'height': int_or_none(format_data.get('height')), + }) self._sort_formats(formats) return { @@ -106,57 +107,43 @@ class HotStarIE(HotStarBaseIE): 'title': title, 'description': video_data.get('description'), 'duration': int_or_none(video_data.get('duration')), - 'timestamp': int_or_none(video_data.get('broadcastDate')), + 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')), 'formats': formats, + 'channel': video_data.get('channelName'), + 'channel_id': video_data.get('channelId'), + 'series': video_data.get('showName'), + 'season': video_data.get('seasonName'), + 'season_number': int_or_none(video_data.get('seasonNo')), + 'season_id': video_data.get('seasonId'), 'episode': title, - 'episode_number': int_or_none(video_data.get('episodeNumber')), - 'series': video_data.get('contentTitle'), + 'episode_number': int_or_none(video_data.get('episodeNo')), } class HotStarPlaylistIE(HotStarBaseIE): IE_NAME = 'hotstar:playlist' - _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)' _TESTS = [{ - 'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993', + 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26', 'info_dict': { - 'id': '14812', + 'id': '3_2_26', }, - 'playlist_mincount': 75, + 'playlist_mincount': 20, }, { - 'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998', + 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480', 'only_matching': True, }] - _ITEM_TYPES = { - 'episodes': 'EPISODE', - 'popular-clips': 'CLIPS', - } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - base_url = mobj.group('url') - content_id = mobj.group('content_id') - playlist_type = mobj.group('type') + playlist_id = self._match_id(url) - content_info = self._download_content_info(content_id) - playlist_id = compat_str(content_info['categoryId']) - - collection = self._download_json( - 'https://search.hotstar.com/AVS/besc', playlist_id, query={ - 'action': 'SearchContents', - 'appVersion': '5.0.40', - 'channel': 'PCTV', - 'moreFilters': 'series:%s;' % playlist_id, - 'query': '*', - 'searchOrder': 'last_broadcast_date desc,year desc,title asc', - 'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'), - }) + collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId') entries = [ self.url_result( - '%s/_/%s' % (base_url, video['contentId']), + 'https://www.hotstar.com/%s' % video['contentId'], ie=HotStarIE.ie_key(), video_id=video['contentId']) - for video in collection['response']['docs'] + for video in collection['assets']['items'] if video.get('contentId')] return self.playlist_result(entries, playlist_id) From 245cbb33bcb7d519c897277e65acdcbc45335233 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 28 Sep 2018 15:13:25 +0100 Subject: [PATCH 10/32] [spike] fix Paramount Network extraction(closes #17677) --- youtube_dl/extractor/spike.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index e76522b45..6090e0066 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -44,3 +44,10 @@ class ParamountNetworkIE(MTVServicesInfoExtractor): _FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/' _GEO_COUNTRIES = ['US'] + + def _extract_mgid(self, webpage): + cs = self._parse_json(self._search_regex( + r'window\.__DATA__\s*=\s*({.+})', + webpage, 'data'), None)['children'] + c = next(c for c in cs if c.get('type') == 'VideoPlayer') + return c['props']['media']['video']['config']['uri'] From 85fa80d5f9d867bab706b92d15a6c9ad5b862b47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Oct 2018 21:13:43 +0700 Subject: [PATCH 11/32] [vimeo] Add another config regex (closes #17690) --- youtube_dl/extractor/vimeo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 0a9239b62..88f4d9979 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -551,6 +551,7 @@ class VimeoIE(VimeoBaseInfoExtractor): else: config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});'] config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;') + config_re.append(r'\bconfig\s*=\s*({.+?})\s*;') config = self._search_regex(config_re, webpage, 'info section', flags=re.DOTALL) config = json.loads(config) From 365343131d752bece96d2279a3e0bcd7e9f0000f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Oct 2018 21:45:24 +0700 Subject: [PATCH 12/32] [pluralsight] Fix subtitles extraction (closes #17726, closes #17728) --- youtube_dl/extractor/pluralsight.py | 34 ++++++++++++++++++----------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index ec67381bb..daf172570 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -210,18 +210,26 @@ query viewClip { raise ExtractorError('Unable to log in') - def _get_subtitles(self, author, clip_idx, lang, name, duration, video_id): - captions_post = { - 'a': author, - 'cn': int(clip_idx), - 'lc': lang, - 'm': name, - } - captions = self._download_json( - '%s/player/retrieve-captions' % self._API_BASE, video_id, - 'Downloading captions JSON', 'Unable to download captions JSON', - fatal=False, data=json.dumps(captions_post).encode('utf-8'), - headers={'Content-Type': 'application/json;charset=utf-8'}) + def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_id): + captions = None + if clip_id: + captions = self._download_json( + '%s/transcript/api/v1/caption/json/%s/%s' + % (self._API_BASE, clip_id, lang), video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False) + if not captions: + captions_post = { + 'a': author, + 'cn': int(clip_idx), + 'lc': lang, + 'm': name, + } + captions = self._download_json( + '%s/player/retrieve-captions' % self._API_BASE, video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False, data=json.dumps(captions_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) if captions: return { lang: [{ @@ -413,7 +421,7 @@ query viewClip { # TODO: other languages? subtitles = self.extract_subtitles( - author, clip_idx, 'en', name, duration, display_id) + author, clip_idx, clip.get('clipId'), 'en', name, duration, display_id) return { 'id': clip_id, From 9795d93316cb63e0e2a73ee840600be7f13e19f5 Mon Sep 17 00:00:00 2001 From: Enes <enessolak99@gmail.com> Date: Mon, 1 Oct 2018 17:48:59 +0300 Subject: [PATCH 13/32] [openload] Add support for oload.cloud (closes #17710) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index d264fe206..dc01b6346 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -307,6 +307,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.download/f/kUEfGclsU9o', 'only_matching': True, + }, { + 'url': 'https://oload.cloud/f/4ZDnBXRWiB8', + 'only_matching': True, }, { # Its title has not got its extension but url has it 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4', From 3c7da54c92b7560c3ceeb8b58a67d4759ed843d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Oct 2018 22:05:18 +0700 Subject: [PATCH 14/32] [jamendo] Add support for licensing.jamendo.com (closes #17724) --- youtube_dl/extractor/jamendo.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py index 595d7a5b7..c21827618 100644 --- a/youtube_dl/extractor/jamendo.py +++ b/youtube_dl/extractor/jamendo.py @@ -26,8 +26,15 @@ class JamendoBaseIE(InfoExtractor): class JamendoIE(JamendoBaseIE): - _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)' - _TEST = { + _VALID_URL = r'''(?x) + https?:// + (?: + licensing\.jamendo\.com/[^/]+| + (?:www\.)?jamendo\.com + ) + /track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+) + ''' + _TESTS = [{ 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', 'md5': '6e9e82ed6db98678f171c25a8ed09ffd', 'info_dict': { @@ -40,14 +47,19 @@ class JamendoIE(JamendoBaseIE): 'duration': 210, 'thumbnail': r're:^https?://.*\.jpg' } - } + }, { + 'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock', + 'only_matching': True, + }] def _real_extract(self, url): mobj = self._VALID_URL_RE.match(url) track_id = mobj.group('id') display_id = mobj.group('display_id') - webpage = self._download_webpage(url, display_id) + webpage = self._download_webpage( + 'https://www.jamendo.com/track/%s/%s' % (track_id, display_id), + display_id) title, artist, track = self._extract_meta(webpage) From 66d106f2705d565675b6a94fb0e7e0bdfe132065 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Oct 2018 23:29:24 +0700 Subject: [PATCH 15/32] [philharmoniedeparis] Fix extraction and add support for pad.philharmoniedeparis.fr (closes #17705) --- youtube_dl/extractor/philharmoniedeparis.py | 118 ++++++++++++-------- 1 file changed, 70 insertions(+), 48 deletions(-) diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py index f1008ae51..f723a2b3b 100644 --- a/youtube_dl/extractor/philharmoniedeparis.py +++ b/youtube_dl/extractor/philharmoniedeparis.py @@ -2,31 +2,38 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( - float_or_none, - int_or_none, - parse_iso8601, - xpath_text, + try_get, + urljoin, ) class PhilharmonieDeParisIE(InfoExtractor): IE_DESC = 'Philharmonie de Paris' - _VALID_URL = r'https?://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)' + _VALID_URL = r'''(?x) + https?:// + (?: + live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)| + pad\.philharmoniedeparis\.fr/doc/CIMU/ + ) + (?P<id>\d+) + ''' _TESTS = [{ + 'url': 'http://pad.philharmoniedeparis.fr/doc/CIMU/1086697/jazz-a-la-villette-knower', + 'md5': 'a0a4b195f544645073631cbec166a2c2', + 'info_dict': { + 'id': '1086697', + 'ext': 'mp4', + 'title': 'Jazz à la Villette : Knower', + }, + }, { 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html', 'info_dict': { 'id': '1032066', - 'ext': 'flv', - 'title': 'md5:d1f5585d87d041d07ce9434804bc8425', - 'timestamp': 1428179400, - 'upload_date': '20150404', - 'duration': 6592.278, + 'title': 'md5:0a031b81807b3593cffa3c9a87a167a0', }, - 'params': { - # rtmp download - 'skip_download': True, - } + 'playlist_mincount': 2, }, { 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html', 'only_matching': True, @@ -34,45 +41,60 @@ class PhilharmonieDeParisIE(InfoExtractor): 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', 'only_matching': True, }] + _LIVE_URL = 'https://live.philharmoniedeparis.fr' def _real_extract(self, url): video_id = self._match_id(url) - concert = self._download_xml( - 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id, - video_id).find('./concert') + config = self._download_json( + '%s/otoPlayer/config.ashx' % self._LIVE_URL, video_id, query={ + 'id': video_id, + 'lang': 'fr-FR', + }) - formats = [] - info_dict = { - 'id': video_id, - 'title': xpath_text(concert, './titre', 'title', fatal=True), - 'formats': formats, - } - - fichiers = concert.find('./fichiers') - stream = fichiers.attrib['serveurstream'] - for fichier in fichiers.findall('./fichier'): - info_dict['duration'] = float_or_none(fichier.get('timecodefin')) - for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]): - format_url = fichier.get('url%s' % suffix) - if not format_url: + def extract_entry(source): + if not isinstance(source, dict): + return + title = source.get('title') + if not title: + return + files = source.get('files') + if not isinstance(files, dict): + return + format_urls = set() + formats = [] + for format_id in ('mobile', 'desktop'): + format_url = try_get( + files, lambda x: x[format_id]['file'], compat_str) + if not format_url or format_url in format_urls: continue - formats.append({ - 'url': stream, - 'play_path': format_url, - 'ext': 'flv', - 'format_id': format_id, - 'width': int_or_none(concert.get('largeur%s' % suffix)), - 'height': int_or_none(concert.get('hauteur%s' % suffix)), - 'quality': quality, - }) - self._sort_formats(formats) + format_urls.add(format_url) + m3u8_url = urljoin(self._LIVE_URL, format_url) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + if not formats: + return + self._sort_formats(formats) + return { + 'title': title, + 'formats': formats, + } - date, hour = concert.get('date'), concert.get('heure') - if date and hour: - info_dict['timestamp'] = parse_iso8601( - '%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour)) - elif date: - info_dict['upload_date'] = date + thumbnail = urljoin(self._LIVE_URL, config.get('image')) - return info_dict + info = extract_entry(config) + if info: + info.update({ + 'id': video_id, + 'thumbnail': thumbnail, + }) + return info + + entries = [] + for num, chapter in enumerate(config['chapters'], start=1): + entry = extract_entry(chapter) + entry['id'] = '%s-%d' % (video_id, num) + entries.append(entry) + + return self.playlist_result(entries, video_id, config.get('title')) From 05e7c184da85f83b254bc3d138f89b11da802bdb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 2 Oct 2018 06:07:06 +0100 Subject: [PATCH 16/32] [hotstar] fix extraction in python 2(closes #17696) --- youtube_dl/extractor/hotstar.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 354ac00dc..bf5717f1b 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -11,6 +11,7 @@ from ..utils import ( determine_ext, ExtractorError, int_or_none, + try_get, ) @@ -72,7 +73,11 @@ class HotStarIE(HotStarBaseIE): app_state = self._parse_json(self._search_regex( r'<script>window\.APP_STATE\s*=\s*({.+?})</script>', webpage, 'app state'), video_id) - video_data = list(app_state.values())[0]['initialState']['contentData']['content'] + video_data = {} + for v in app_state.values(): + content = try_get(v, lambda x: x['initialState']['contentData']['content'], dict) + if content and content.get('contentId') == video_id: + video_data = content title = video_data['title'] From d98cb62e552ee74079fda2e4173a40b14faac3fe Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 2 Oct 2018 19:43:06 +0100 Subject: [PATCH 17/32] [crunchyroll] switch to HTTPS for RpcApi(closes #17749) --- youtube_dl/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index af786d096..045be0ab5 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -45,7 +45,7 @@ class CrunchyrollBaseIE(InfoExtractor): data['req'] = 'RpcApi' + method data = compat_urllib_parse_urlencode(data).encode('utf-8') return self._download_xml( - 'http://www.crunchyroll.com/xml/', + 'https://www.crunchyroll.com/xml/', video_id, note, fatal=False, data=data, headers={ 'Content-Type': 'application/x-www-form-urlencoded', }) From f60b9803a473da8e324313d01af91e5676792c77 Mon Sep 17 00:00:00 2001 From: Enes <enessolak99@gmail.com> Date: Sat, 29 Sep 2018 13:28:56 +0300 Subject: [PATCH 18/32] [dailymotion] Fix extraction (closes #17699) --- youtube_dl/extractor/dailymotion.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 040f0bd02..842d9a259 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -24,6 +24,7 @@ from ..utils import ( str_to_int, unescapeHTML, urlencode_postdata, + try_get, ) @@ -172,7 +173,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor): webpage, 'player v5', default=None) if player_v5: player = self._parse_json(player_v5, video_id) - metadata = player['metadata'] + metadata = try_get( + player, lambda x: x['metadata'], dict) or self._download_json( + 'http://www.dailymotion.com/player/metadata/video/%s' % video_id, video_id, query={ + 'integration': 'inline', + 'GK_PV5_NEON': '1', + }) if metadata.get('error', {}).get('type') == 'password_protected': password = self._downloader.params.get('videopassword') From 0082f44a08e33712fcd33ceabab15215c962eaac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 5 Oct 2018 02:02:58 +0700 Subject: [PATCH 19/32] [dailymotion] Improve metadata extraction (closes #17706) --- youtube_dl/extractor/dailymotion.py | 32 ++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 842d9a259..1816c559e 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -22,9 +22,11 @@ from ..utils import ( parse_iso8601, sanitized_Request, str_to_int, - unescapeHTML, - urlencode_postdata, try_get, + unescapeHTML, + update_url_query, + url_or_none, + urlencode_postdata, ) @@ -172,15 +174,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor): r'__PLAYER_CONFIG__\s*=\s*({.+?});'], webpage, 'player v5', default=None) if player_v5: - player = self._parse_json(player_v5, video_id) - metadata = try_get( - player, lambda x: x['metadata'], dict) or self._download_json( - 'http://www.dailymotion.com/player/metadata/video/%s' % video_id, video_id, query={ - 'integration': 'inline', - 'GK_PV5_NEON': '1', - }) + player = self._parse_json(player_v5, video_id, fatal=False) or {} + metadata = try_get(player, lambda x: x['metadata'], dict) + if not metadata: + metadata_url = url_or_none(try_get( + player, lambda x: x['context']['metadata_template_url1'])) + if metadata_url: + metadata_url = metadata_url.replace(':videoId', video_id) + else: + metadata_url = update_url_query( + 'https://www.dailymotion.com/player/metadata/video/%s' + % video_id, { + 'embedder': url, + 'integration': 'inline', + 'GK_PV5_NEON': '1', + }) + metadata = self._download_json( + metadata_url, video_id, 'Downloading metadata JSON') - if metadata.get('error', {}).get('type') == 'password_protected': + if try_get(metadata, lambda x: x['error']['type']) == 'password_protected': password = self._downloader.params.get('videopassword') if password: r = int(metadata['id'][1:], 36) From 21c1a00dd7dbb9f7551ca9809a194f6380dee7a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 5 Oct 2018 02:27:14 +0700 Subject: [PATCH 20/32] [pluralsight] Improve authentication (closes #17762) --- youtube_dl/extractor/pluralsight.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index daf172570..eafe56897 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -4,6 +4,7 @@ import collections import json import os import random +import re from .common import InfoExtractor from ..compat import ( @@ -196,7 +197,10 @@ query viewClip { if error: raise ExtractorError('Unable to login: %s' % error, expected=True) - if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')): + if all(not re.search(p, response) for p in ( + r'__INITIAL_STATE__', r'["\']currentUser["\']', + # new layout? + r'>\s*Sign out\s*<')): BLOCKED = 'Your account has been blocked due to suspicious activity' if BLOCKED in response: raise ExtractorError( From 2e7ed29e3429c20e735f3f7dfceb1b13cf757037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 5 Oct 2018 02:29:52 +0700 Subject: [PATCH 21/32] [ChangeLog] Actualize [ci skip] --- ChangeLog | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ChangeLog b/ChangeLog index 241712037..e2757f891 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +version <unreleased> + +Extractors +* [pluralsight] Improve authentication (#17762) +* [dailymotion] Fix extraction (#17699) +* [crunchyroll] Switch to HTTPS for RpcApi (#17749) ++ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705) +* [philharmoniedeparis] Fix extraction (#17705) ++ [jamendo] Add support for licensing.jamendo.com (#17724) ++ [openload] Add support for oload.cloud (#17710) +* [pluralsight] Fix subtitles extraction (#17726, #17728) ++ [vimeo] Add another config regular expression (#17690) +* [spike] Fix Paramount Network extraction (#17677) +* [hotstar] Fix extraction (#14694, #14931, #17637) + + version 2018.09.26 Extractors From d96f976b0c36f65894380d3d831b0520d6260c20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 5 Oct 2018 02:31:30 +0700 Subject: [PATCH 22/32] release 2018.10.05 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 +- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ed3e0a157..058eb4321 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.26** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.05*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.05** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.09.26 +[debug] youtube-dl version 2018.10.05 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e2757f891..86cf489b1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2018.10.05 Extractors * [pluralsight] Improve authentication (#17762) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 736ab6da7..f167a6ddc 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -360,7 +360,7 @@ - **HitRecord** - **HornBunny** - **HotNewHipHop** - - **HotStar** + - **hotstar** - **hotstar:playlist** - **Howcast** - **HowStuffWorks** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6f2cc31df..7d3f25019 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.09.26' +__version__ = '2018.10.05' From c9d891f19a923f53132b49a1f5b97f344d92503c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 5 Oct 2018 20:11:01 +0100 Subject: [PATCH 23/32] [patreon] fix extraction(closes #14502)(closes #10471) --- youtube_dl/extractor/patreon.py | 160 ++++++++++++++++++-------------- 1 file changed, 88 insertions(+), 72 deletions(-) diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index 9eb027679..6f73ed68d 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -2,52 +2,63 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import ( + clean_html, + determine_ext, + int_or_none, + parse_iso8601, +) class PatreonIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)' - _TESTS = [ - { - 'url': 'http://www.patreon.com/creation?hid=743933', - 'md5': 'e25505eec1053a6e6813b8ed369875cc', - 'info_dict': { - 'id': '743933', - 'ext': 'mp3', - 'title': 'Episode 166: David Smalley of Dogma Debate', - 'uploader': 'Cognitive Dissonance Podcast', - 'thumbnail': 're:^https?://.*$', - }, + _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://www.patreon.com/creation?hid=743933', + 'md5': 'e25505eec1053a6e6813b8ed369875cc', + 'info_dict': { + 'id': '743933', + 'ext': 'mp3', + 'title': 'Episode 166: David Smalley of Dogma Debate', + 'description': 'md5:713b08b772cd6271b9f3906683cfacdf', + 'uploader': 'Cognitive Dissonance Podcast', + 'thumbnail': 're:^https?://.*$', + 'timestamp': 1406473987, + 'upload_date': '20140727', }, - { - 'url': 'http://www.patreon.com/creation?hid=754133', - 'md5': '3eb09345bf44bf60451b8b0b81759d0a', - 'info_dict': { - 'id': '754133', - 'ext': 'mp3', - 'title': 'CD 167 Extra', - 'uploader': 'Cognitive Dissonance Podcast', - 'thumbnail': 're:^https?://.*$', - }, + }, { + 'url': 'http://www.patreon.com/creation?hid=754133', + 'md5': '3eb09345bf44bf60451b8b0b81759d0a', + 'info_dict': { + 'id': '754133', + 'ext': 'mp3', + 'title': 'CD 167 Extra', + 'uploader': 'Cognitive Dissonance Podcast', + 'thumbnail': 're:^https?://.*$', }, - { - 'url': 'https://www.patreon.com/creation?hid=1682498', - 'info_dict': { - 'id': 'SU4fj_aEMVw', - 'ext': 'mp4', - 'title': 'I\'m on Patreon!', - 'uploader': 'TraciJHines', - 'thumbnail': 're:^https?://.*$', - 'upload_date': '20150211', - 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4', - 'uploader_id': 'TraciJHines', - }, - 'params': { - 'noplaylist': True, - 'skip_download': True, - } + 'skip': 'Patron-only content', + }, { + 'url': 'https://www.patreon.com/creation?hid=1682498', + 'info_dict': { + 'id': 'SU4fj_aEMVw', + 'ext': 'mp4', + 'title': 'I\'m on Patreon!', + 'uploader': 'TraciJHines', + 'thumbnail': 're:^https?://.*$', + 'upload_date': '20150211', + 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4', + 'uploader_id': 'TraciJHines', + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, } - ] + }, { + 'url': 'https://www.patreon.com/posts/episode-166-of-743933', + 'only_matching': True, + }, { + 'url': 'https://www.patreon.com/posts/743933', + 'only_matching': True, + }] # Currently Patreon exposes download URL via hidden CSS, so login is not # needed. Keeping this commented for when this inevitably changes. @@ -78,38 +89,43 @@ class PatreonIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage).strip() - - attach_fn = self._html_search_regex( - r'<div class="attach"><a target="_blank" href="([^"]+)">', - webpage, 'attachment URL', default=None) - embed = self._html_search_regex( - r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"', - webpage, 'embedded URL', default=None) - - if attach_fn is not None: - video_url = 'http://www.patreon.com' + attach_fn - thumbnail = self._og_search_thumbnail(webpage) - uploader = self._html_search_regex( - r'<strong>(.*?)</strong> is creating', webpage, 'uploader') - elif embed is not None: - return self.url_result(embed) - else: - playlist = self._parse_json(self._search_regex( - r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])', - webpage, 'playlist JSON'), - video_id, transform_source=js_to_json) - data = playlist[0] - video_url = self._proto_relative_url(data['mp3']) - thumbnail = self._proto_relative_url(data.get('cover')) - uploader = data.get('artist') - - return { + post = self._download_json( + 'https://www.patreon.com/api/posts/' + video_id, video_id) + attributes = post['data']['attributes'] + title = attributes['title'].strip() + image = attributes.get('image') or {} + info = { 'id': video_id, - 'url': video_url, - 'ext': 'mp3', 'title': title, - 'uploader': uploader, - 'thumbnail': thumbnail, + 'description': clean_html(attributes.get('content')), + 'thumbnail': image.get('large_url') or image.get('url'), + 'timestamp': parse_iso8601(attributes.get('published_at')), + 'like_count': int_or_none(attributes.get('like_count')), + 'comment_count': int_or_none(attributes.get('comment_count')), } + + for i in post.get('included', []): + i_type = i.get('type') + if i_type == 'attachment': + attachment_attributes = i.get('attributes') or {} + attachment_url = attachment_attributes.get('url') + if attachment_url: + info.update({ + 'url': attachment_url, + 'ext': determine_ext(attachment_attributes.get('name'), 'mp3'), + }) + elif i_type == 'user': + user_attributes = i.get('attributes') + if user_attributes: + info.update({ + 'uploader': user_attributes.get('full_name'), + 'uploader_url': user_attributes.get('url'), + }) + + if not info.get('url'): + info.update({ + '_type': 'url', + 'url': attributes['embed']['url'], + }) + + return info From 19a352854f5143b7cd120e990433d0fd40f617b0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 5 Oct 2018 22:45:04 +0100 Subject: [PATCH 24/32] [patreon] extract post_file url(#17792) --- youtube_dl/extractor/patreon.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index 6f73ed68d..426dd8121 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -104,16 +104,18 @@ class PatreonIE(InfoExtractor): 'comment_count': int_or_none(attributes.get('comment_count')), } + def add_file(file_data): + file_url = file_data.get('url') + if file_url: + info.update({ + 'url': file_url, + 'ext': determine_ext(file_data.get('name'), 'mp3'), + }) + for i in post.get('included', []): i_type = i.get('type') if i_type == 'attachment': - attachment_attributes = i.get('attributes') or {} - attachment_url = attachment_attributes.get('url') - if attachment_url: - info.update({ - 'url': attachment_url, - 'ext': determine_ext(attachment_attributes.get('name'), 'mp3'), - }) + add_file(i.get('attributes') or {}) elif i_type == 'user': user_attributes = i.get('attributes') if user_attributes: @@ -122,6 +124,9 @@ class PatreonIE(InfoExtractor): 'uploader_url': user_attributes.get('url'), }) + if not info.get('url'): + add_file(attributes.get('post_file') or {}) + if not info.get('url'): info.update({ '_type': 'url', From 5d90a8a5f3fef73bb4ccbecd8c61583522b88d79 Mon Sep 17 00:00:00 2001 From: yonaikerlol <39972049+yonaikerlol@users.noreply.github.com> Date: Sun, 7 Oct 2018 09:05:45 -0400 Subject: [PATCH 25/32] [openload] Add support for oload.cc --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index dc01b6346..c652603a5 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -314,6 +314,9 @@ class OpenloadIE(InfoExtractor): # Its title has not got its extension but url has it 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4', 'only_matching': True, + }, { + 'url': 'https://oload.cc/embed/5NEAbI2BDSk', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From a94e7c195e261137461b546c6446033b371dfbbe Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 11:51:40 +0100 Subject: [PATCH 26/32] [ted] fix extraction for http and rtmp formats(closes #5941)(closes #17572)(closes #17894) --- youtube_dl/extractor/ted.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 212ac80ab..f9b6aa48f 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -212,8 +212,6 @@ class TEDIE(InfoExtractor): http_url = None for format_id, resources in resources_.items(): - if not isinstance(resources, dict): - continue if format_id == 'h264': for resource in resources: h264_url = resource.get('file') @@ -242,6 +240,8 @@ class TEDIE(InfoExtractor): 'tbr': int_or_none(resource.get('bitrate')), }) elif format_id == 'hls': + if not isinstance(resources, dict): + continue stream_url = url_or_none(resources.get('stream')) if not stream_url: continue From f0ee386851bb0d53801a27dafbe4e8fee5b43d88 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 16:26:29 +0100 Subject: [PATCH 27/32] [tv3] remove extractor(closes #10461)(closes #15339) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/tv3.py | 34 ------------------------------ 2 files changed, 35 deletions(-) delete mode 100644 youtube_dl/extractor/tv3.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 464c8d690..17b576df3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1153,7 +1153,6 @@ from .tv2 import ( TV2ArticleIE, ) from .tv2hu import TV2HuIE -from .tv3 import TV3IE from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE from .tva import TVAIE diff --git a/youtube_dl/extractor/tv3.py b/youtube_dl/extractor/tv3.py deleted file mode 100644 index 3867ec90d..000000000 --- a/youtube_dl/extractor/tv3.py +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class TV3IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P<id>[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx' - _TEST = { - 'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx', - 'info_dict': { - 'id': '4659127992001', - 'ext': 'mp4', - 'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3', - 'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.', - 'uploader_id': '3812193411001', - 'upload_date': '20151213', - 'timestamp': 1449975272, - }, - 'expected_warnings': [ - 'Failed to download MPD manifest' - ], - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s' - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - brightcove_id = self._search_regex(r'<param\s*name="@videoPlayer"\s*value="(\d+)"', webpage, 'brightcove id') - return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) From ee5fe42e442f04e1c9a388a46e0b4552be4a56d7 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 17:54:38 +0100 Subject: [PATCH 28/32] [brightcove:legacy] fall back to brightcove:new(#13912) --- youtube_dl/extractor/brightcove.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 14f9a14ed..5dbd71e12 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -356,7 +356,9 @@ class BrightcoveLegacyIE(InfoExtractor): def _extract_video_info(self, video_info): video_id = compat_str(video_info['id']) + publisher_id = video_info.get('publisherId') + info = { 'id': video_id, 'title': video_info['displayName'].strip(), @@ -444,8 +446,16 @@ class BrightcoveLegacyIE(InfoExtractor): else: return ad_info - if 'url' not in info and not info.get('formats'): - raise ExtractorError('Unable to extract video url for %s' % video_id) + if not info.get('url') and not info.get('formats'): + uploader_id = info.get('uploader_id') + if uploader_id: + info.update({ + '_type': 'url', + 'url': 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (uploader_id, video_id), + 'ie_key': BrightcoveNewIE.ie_key(), + }) + else: + raise ExtractorError('Unable to extract video url for %s' % video_id) return info From 160c2773f63c72686635533bc2553634b22e7e2e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 18:41:57 +0100 Subject: [PATCH 29/32] [brightcove:legacy] add another fall back to brightcove:new --- youtube_dl/extractor/brightcove.py | 39 ++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 5dbd71e12..40c3959fd 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -1,8 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import base64 import json +import re +import struct from .common import InfoExtractor from .adobepass import AdobePassIE @@ -310,6 +312,10 @@ class BrightcoveLegacyIE(InfoExtractor): 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?', expected=True) + def _brightcove_new_url_result(self, publisher_id, video_id): + brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) + return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) + def _get_video_info(self, video_id, query, referer=None): headers = {} linkBase = query.get('linkBaseURL') @@ -323,6 +329,29 @@ class BrightcoveLegacyIE(InfoExtractor): r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage, 'error message', default=None) if error_msg is not None: + publisher_id = query.get('publisherId') + if publisher_id and publisher_id[0].isdigit(): + publisher_id = publisher_id[0] + if not publisher_id: + valid_key = lambda key: key and ',' in key + player_key = query.get('playerKey') + if player_key and ',' in player_key[0]: + player_key = player_key[0] + else: + player_id = query.get('playerID') + if player_id and player_id[0].isdigit(): + player_page = self._download_webpage( + 'http://link.brightcove.com/services/player/bcpid' + player_id[0], + video_id, headers=headers, fatal=False) + if player_page: + player_key = self._search_regex( + r'<param\s+name="playerKey"\s+value="([\w~,-]+)"', + player_page, 'player key', fatal=False) + if player_key: + enc_pub_id = player_key.split(',')[1].replace('~', '=') + publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] + if publisher_id: + return self._brightcove_new_url_result(publisher_id, video_id) raise ExtractorError( 'brightcove said: %s' % error_msg, expected=True) @@ -356,9 +385,7 @@ class BrightcoveLegacyIE(InfoExtractor): def _extract_video_info(self, video_info): video_id = compat_str(video_info['id']) - publisher_id = video_info.get('publisherId') - info = { 'id': video_id, 'title': video_info['displayName'].strip(), @@ -449,11 +476,7 @@ class BrightcoveLegacyIE(InfoExtractor): if not info.get('url') and not info.get('formats'): uploader_id = info.get('uploader_id') if uploader_id: - info.update({ - '_type': 'url', - 'url': 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (uploader_id, video_id), - 'ie_key': BrightcoveNewIE.ie_key(), - }) + info.update(self._brightcove_new_url_result(uploader_id, video_id)) else: raise ExtractorError('Unable to extract video url for %s' % video_id) return info From 582797d780b6f4857a5b8b6ca8c63915242c0ab9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 20:47:12 +0100 Subject: [PATCH 30/32] [brightcove] remove unused variable --- youtube_dl/extractor/brightcove.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 40c3959fd..465ae396e 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -333,7 +333,6 @@ class BrightcoveLegacyIE(InfoExtractor): if publisher_id and publisher_id[0].isdigit(): publisher_id = publisher_id[0] if not publisher_id: - valid_key = lambda key: key and ',' in key player_key = query.get('playerKey') if player_key and ',' in player_key[0]: player_key = player_key[0] From baeabf77428ad1a6bd5a910e7be07100fcb1eadd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 16 Oct 2018 23:19:44 +0700 Subject: [PATCH 31/32] [rutube] Use geo verification headers (closes #17897) --- youtube_dl/extractor/rutube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 261bcbb83..10ac8ed1f 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -103,7 +103,8 @@ class RutubeIE(RutubeBaseIE): options = self._download_json( 'http://rutube.ru/api/play/options/%s/?format=json' % video_id, - video_id, 'Downloading options JSON') + video_id, 'Downloading options JSON', + headers=self.geo_verification_headers()) formats = [] for format_id, format_url in options['video_balancer'].items(): From b99b0bcfa079a15a988cf931a3ce44bb480dfbdb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 17 Oct 2018 06:22:07 +0100 Subject: [PATCH 32/32] [cwtv] handle api errors(closes #17905) --- youtube_dl/extractor/cwtv.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py index 224a1fb5d..f9bd535f6 100644 --- a/youtube_dl/extractor/cwtv.py +++ b/youtube_dl/extractor/cwtv.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, parse_age_limit, parse_iso8601, @@ -66,9 +67,12 @@ class CWTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_json( + data = self._download_json( 'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id, - video_id)['video'] + video_id) + if data.get('result') != 'ok': + raise ExtractorError(data['msg'], expected=True) + video_data = data['video'] title = video_data['title'] mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id