From 95be29e1c6b7a06ac444d5142582ebece79698ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 15 Sep 2016 20:58:02 +0700 Subject: [PATCH 01/16] [twitch] Fix api calls (Closes #10654, closes #10660) --- youtube_dl/extractor/twitch.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 359a8859c..af6d890b0 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -32,6 +32,7 @@ class TwitchBaseIE(InfoExtractor): _API_BASE = 'https://api.twitch.tv' _USHER_BASE = 'https://usher.ttvnw.net' _LOGIN_URL = 'http://www.twitch.tv/login' + _CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6' _NETRC_MACHINE = 'twitch' def _handle_error(self, response): @@ -44,15 +45,9 @@ class TwitchBaseIE(InfoExtractor): expected=True) def _call_api(self, path, item_id, note): - headers = { - 'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2', - 'X-Requested-With': 'XMLHttpRequest', - } - for cookie in self._downloader.cookiejar: - if cookie.name == 'api_token': - headers['Twitch-Api-Token'] = cookie.value response = self._download_json( - '%s/%s' % (self._API_BASE, path), item_id, note) + '%s/%s' % (self._API_BASE, path), item_id, note, + headers={'Client-ID': self._CLIENT_ID}) self._handle_error(response) return response From eb5b1fc0211e89f386c4f5563cc1d5d4edeb3c55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 15 Sep 2016 21:53:35 +0700 Subject: [PATCH 02/16] [crunchyroll] Fix authentication (Closes #10655) --- youtube_dl/extractor/crunchyroll.py | 47 +++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 6d3abb52f..1b69bd0b6 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -34,22 +34,51 @@ from ..aes import ( class CrunchyrollBaseIE(InfoExtractor): + _LOGIN_URL = 'https://www.crunchyroll.com/login' + _LOGIN_FORM = 'login_form' _NETRC_MACHINE = 'crunchyroll' def _login(self): (username, password) = self._get_login_info() if username is None: return - self.report_login() - login_url = 'https://www.crunchyroll.com/?a=formhandler' - data = urlencode_postdata({ - 'formname': 'RpcApiUser_Login', - 'name': username, - 'password': password, + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form_str = self._search_regex( + r'(?P
]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM, + login_page, 'login form', group='form') + + post_url = extract_attributes(login_form_str).get('action') + if not post_url: + post_url = self._LOGIN_URL + elif not post_url.startswith('http'): + post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + + login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page) + + login_form.update({ + 'login_form[name]': username, + 'login_form[password]': password, }) - login_request = sanitized_Request(login_url, data) - login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - self._download_webpage(login_request, None, False, 'Wrong login info') + + response = self._download_webpage( + post_url, None, 'Logging in', 'Wrong login info', + data=urlencode_postdata(login_form), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) + + # Successful login + if 'Redirecting' in response: + return + + error = self._html_search_regex( + '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>', + response, 'error message', default=None) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + + raise ExtractorError('Unable to log in') def _real_initialize(self): self._login() From c8498368549048a578d5f30773aaa9760454983c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 21:54:48 +0700 Subject: [PATCH 03/16] [utils] Improve _hidden_inputs --- youtube_dl/extractor/common.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ff19270ae..e413799f9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -888,16 +888,16 @@ class InfoExtractor(object): def _hidden_inputs(html): html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html) hidden_inputs = {} - for input in re.findall(r'(?i)<input([^>]+)>', html): - if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): + for input in re.findall(r'(?i)(<input[^>]+>)', html): + attrs = extract_attributes(input) + if not input: continue - name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input) - if not name: + if attrs.get('type') not in ('hidden', 'submit'): continue - value = re.search(r'value=(["\'])(?P<value>.*?)\1', input) - if not value: - continue - hidden_inputs[name.group('value')] = value.group('value') + name = attrs.get('name') or attrs.get('id') + value = attrs.get('value') + if name and value is not None: + hidden_inputs[name] = value return hidden_inputs def _form_hidden_inputs(self, form_id, html): From 537f753399ed9fd07fcb9285a2a3330010394c85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:17:17 +0700 Subject: [PATCH 04/16] [options] Improve Adobe Pass wording --- youtube_dl/options.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index b2e863119..100d21310 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -355,19 +355,19 @@ def parseOpts(overrideArguments=None): adobe_pass.add_option( '--ap-mso', dest='ap_mso', metavar='MSO', - help='Adobe Pass Multiple-system operator Identifier') + help='Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs') adobe_pass.add_option( '--ap-username', dest='ap_username', metavar='USERNAME', - help='TV Provider Login with this account ID') + help='Multiple-system operator account login') adobe_pass.add_option( '--ap-password', dest='ap_password', metavar='PASSWORD', - help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.') + help='Multiple-system operator account password. If this option is left out, youtube-dl will ask interactively.') adobe_pass.add_option( '--ap-list-mso', action='store_true', dest='ap_list_mso', default=False, - help='List all supported TV Providers') + help='List all supported multiple-system operators') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From d2522b86ac7d1eff1f00e21bcd976a2616b6a6d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:18:31 +0700 Subject: [PATCH 05/16] [options] Actually print Adobe Pass options sections in --help --- youtube_dl/options.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 100d21310..53497fbc6 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -831,6 +831,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(video_format) parser.add_option_group(subtitles) parser.add_option_group(authentication) + parser.add_option_group(adobe_pass) parser.add_option_group(postproc) if overrideArguments is not None: From 1da50aa34e9fa0fd927de8197dcf2884551dd800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:24:55 +0700 Subject: [PATCH 06/16] [YoutubeDL] Improve Adobe Pass options' wording --- youtube_dl/YoutubeDL.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 29d8517a3..442aa663b 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,9 +131,9 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. - ap_mso: Adobe Pass Multiple-system operator Identifier. - ap_username: TV Provider username for authentication purposes. - ap_password: TV Provider password for authentication purposes. + ap_mso: Adobe Pass multiple-system operator identifier. + ap_username: Multiple-system operator account username. + ap_password: Multiple-system operator account password. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. From 2133565cec3646680600d314b93e535f6fa52339 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:26:37 +0700 Subject: [PATCH 07/16] [extractor/common] Simplify _get_login_info --- youtube_dl/extractor/common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e413799f9..9627816b4 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -689,8 +689,6 @@ class InfoExtractor(object): if self._downloader is None: return (None, None) - username = None - password = None downloader_params = self._downloader.params # Attempt to use provided username and password or .netrc data @@ -700,7 +698,7 @@ class InfoExtractor(object): else: username, password = self._get_netrc_login_info(netrc_machine) - return (username, password) + return username, password def _get_tfa_info(self, note='two-factor verification code'): """ From 32443dd346594d64b579af714f4828287492c464 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:34:29 +0700 Subject: [PATCH 08/16] [extractor/common] Update _get_login_info's comment --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9627816b4..95ea3fca5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -683,7 +683,10 @@ class InfoExtractor(object): def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None): """ Get the login info as (username, password) - It will look in the netrc file using the _NETRC_MACHINE value + First look for the manually specified credentials using username_option + and password_option as keys in params dictionary. If no such credentials + available look in the netrc file using the netrc_machine or _NETRC_MACHINE + value. If there's no info available, return (None, None) """ if self._downloader is None: From dcce092e0aa92799f1e3a51ce5aae611af4d70d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:35:12 +0700 Subject: [PATCH 09/16] [extractor/common] Simplify _get_netrc_login_info and carry long lines --- youtube_dl/extractor/common.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 95ea3fca5..4f738b9fc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -674,11 +674,13 @@ class InfoExtractor(object): username = info[0] password = info[2] else: - raise netrc.NetrcParseError('No authenticators for %s' % netrc_machine) + raise netrc.NetrcParseError( + 'No authenticators for %s' % netrc_machine) except (IOError, netrc.NetrcParseError) as err: - self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err)) + self._downloader.report_warning( + 'parsing .netrc: %s' % error_to_compat_str(err)) - return (username, password) + return username, password def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None): """ From 1dec2c8a0e00e8ed53ddd030347ce9225df9964e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:47:45 +0700 Subject: [PATCH 10/16] [adobepass] Change mvpd cache section name In order to better emphasize it's relation to Adobe Pass --- youtube_dl/extractor/adobepass.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 8ef5a96ce..01932e5e6 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -32,6 +32,7 @@ MSO_INFO = { class AdobePassIE(InfoExtractor): _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' + _MVPD_CACHE = 'ap-mvpd' @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): @@ -85,7 +86,7 @@ class AdobePassIE(InfoExtractor): guid = xml_text(resource, 'guid') count = 0 while count < 2: - requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} + requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {} authn_token = requestor_info.get('authn_token') if authn_token and is_expired(authn_token, 'simpleTokenExpires'): authn_token = None @@ -125,12 +126,12 @@ class AdobePassIE(InfoExtractor): 'requestor_id': requestor_id, }), headers=mvpd_headers) if '<pendingLogout' in session: - self._downloader.cache.store('mvpd', requestor_id, {}) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue authn_token = unescapeHTML(xml_text(session, 'authnToken')) requestor_info['authn_token'] = authn_token - self._downloader.cache.store('mvpd', requestor_id, requestor_info) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) authz_token = requestor_info.get(guid) if authz_token and is_expired(authz_token, 'simpleTokenTTL'): @@ -146,12 +147,12 @@ class AdobePassIE(InfoExtractor): 'userMeta': '1', }), headers=mvpd_headers) if '<pendingLogout' in authorize: - self._downloader.cache.store('mvpd', requestor_id, {}) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) requestor_info[guid] = authz_token - self._downloader.cache.store('mvpd', requestor_id, requestor_info) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) mvpd_headers.update({ 'ap_19': xml_text(authn_token, 'simpleSamlNameID'), @@ -167,7 +168,7 @@ class AdobePassIE(InfoExtractor): 'hashed_guid': 'false', }), headers=mvpd_headers) if '<pendingLogout' in short_authorize: - self._downloader.cache.store('mvpd', requestor_id, {}) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue return short_authorize From 490b755769a364ca0624390453e36321d5182d3e Mon Sep 17 00:00:00 2001 From: stepshal <nessento@openmailbox.org> Date: Wed, 14 Sep 2016 23:03:26 +0700 Subject: [PATCH 11/16] Improve some id regexes --- youtube_dl/extractor/canvas.py | 2 +- youtube_dl/extractor/nfl.py | 2 +- youtube_dl/extractor/npo.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index ef0691dcd..d183d5d52 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -71,7 +71,7 @@ class CanvasIE(InfoExtractor): webpage)).strip() video_id = self._html_search_regex( - r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id') + r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id') data = self._download_json( 'https://mediazone.vrt.be/api/v1/%s/assets/%s' diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index 200874d68..3930d16f1 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -165,7 +165,7 @@ class NFLIE(InfoExtractor): group='config')) # For articles, the id in the url is not the video id video_id = self._search_regex( - r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>.+?)\1', + r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', default=video_id, group='id') config = self._download_json(config_url, video_id, 'Downloading player config') url_template = NFLIE.prepend_host( diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 87f5675c7..3293bdb17 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -429,7 +429,7 @@ class SchoolTVIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) video_id = self._search_regex( - r'data-mid=(["\'])(?P<id>.+?)\1', webpage, 'video_id', group='id') + r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id') return { '_type': 'url_transparent', 'ie_key': 'NPO', From e6bf3621e703a7cd0d62736a1765b0ccff5adfe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 23:31:16 +0700 Subject: [PATCH 12/16] [ChangeLog] Actualize --- ChangeLog | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index c3c8bf037..cd1f2fdf1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,26 @@ version <unreleased> +Core +* Improve _hidden_inputs ++ Introduce improved explicit Adobe Pass support ++ Add --ap-mso to provide multiple-system operator identifier ++ Add --ap-username to provide MSO account username ++ Add --ap-password to provide MSO account password ++ Add --ap-list-mso to list all supported MSOs ++ Add support for Rogers Cable multiple-system operator (#10606) + Extractors -* [kwuo] Improve error detection (#10650) +* [crunchyroll] Fix authentication (#10655) +* [twitch] Fix API calls (#10654, #10660) ++ [bellmedia] Add support for more Bell Media Television sites +* [franceinter] Fix extraction (#10538, #2105) +* [kuwo] Improve error detection (#10650) ++ [go] Add support for free full episodes (#10439) * [bilibili] Fix extraction for specific videos (#10647) +* [nhk] Fix extraction (#10633) +* [kaltura] Improve audio detection +* [kaltura] Skip chun format ++ [vimeo:ondemand] Pass Referer along with embed URL (#10624) + [nbc] Add support for NBC Olympics (#10361) From f5e008d134f5e69920829cfd7a5ce5ae57d275c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 23:46:11 +0700 Subject: [PATCH 13/16] release 2016.09.15 --- .github/ISSUE_TEMPLATE.md | 8 ++++---- ChangeLog | 2 +- README.md | 11 +++++++++++ docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e87fed573..61cea757c 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.15** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.11.1 +[debug] youtube-dl version 2016.09.15 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} @@ -55,4 +55,4 @@ $ youtube-dl -v <your command line> ### Description of your *issue*, suggested solution and other information Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. -If work on your *issue* required an account credentials please provide them or explain how one can obtain them. +If work on your *issue* requires account credentials please provide them or explain how one can obtain them. diff --git a/ChangeLog b/ChangeLog index cd1f2fdf1..4583537ac 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.09.15 Core * Improve _hidden_inputs diff --git a/README.md b/README.md index 7543f81ac..4debe15fe 100644 --- a/README.md +++ b/README.md @@ -358,6 +358,17 @@ which means you can modify it, redistribute it or use it however you like. -n, --netrc Use .netrc authentication data --video-password PASSWORD Video password (vimeo, smotri, youku) +## Adobe Pass Options: + --ap-mso MSO Adobe Pass multiple-system operator (TV + provider) identifier, use --ap-list-mso for + a list of available MSOs + --ap-username USERNAME Multiple-system operator account login + --ap-password PASSWORD Multiple-system operator account password. + If this option is left out, youtube-dl will + ask interactively. + --ap-list-mso List all supported multiple-system + operators + ## Post-processing Options: -x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7a7b268d3..fcb618561 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -89,6 +89,7 @@ - **BeatportPro** - **Beeg** - **BehindKink** + - **BellMedia** - **Bet** - **Bigflix** - **Bild**: Bild.de @@ -169,7 +170,6 @@ - **CSNNE** - **CSpan**: C-SPAN - **CtsNews**: 華視新聞 - - **CTV** - **CTVNews** - **culturebox.francetvinfo.fr** - **CultureUnplugged** @@ -445,6 +445,7 @@ - **NBA** - **NBC** - **NBCNews** + - **NBCOlympics** - **NBCSports** - **NBCSportsVPlayer** - **ndr**: NDR.de - Norddeutscher Rundfunk diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 903aede58..081fd6ef0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.11.1' +__version__ = '2016.09.15' From 9d8985a165ebdc9fd8d72e7536253c42162b58a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 16 Sep 2016 00:54:34 +0700 Subject: [PATCH 14/16] [tv4] Fix hls and hds formats (Closes #10659) --- youtube_dl/extractor/tv4.py | 49 ++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 343edf206..5d2d8f132 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -2,9 +2,13 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, + int_or_none, parse_iso8601, + try_get, + update_url_query, ) @@ -65,36 +69,47 @@ class TV4IE(InfoExtractor): video_id = self._match_id(url) info = self._download_json( - 'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON') + 'http://www.tv4play.se/player/assets/%s.json' % video_id, + video_id, 'Downloading video info JSON') # If is_geo_restricted is true, it doesn't necessarily mean we can't download it - if info['is_geo_restricted']: + if info.get('is_geo_restricted'): self.report_warning('This content might not be available in your country due to licensing restrictions.') - if info['requires_subscription']: + if info.get('requires_subscription'): raise ExtractorError('This content requires subscription.', expected=True) - sources_data = self._download_json( - 'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON') - sources = sources_data['playback'] + title = info['title'] formats = [] - for item in sources.get('items', {}).get('item', []): - ext, bitrate = item['mediaFormat'], item['bitrate'] - formats.append({ - 'format_id': '%s_%s' % (ext, bitrate), - 'tbr': bitrate, - 'ext': ext, - 'url': item['url'], - }) + # http formats are linked with unresolvable host + for kind in ('hls', ''): + data = self._download_json( + 'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id, + video_id, 'Downloading sources JSON', query={ + 'protocol': kind, + 'videoFormat': 'MP4+WEBVTTS+WEBVTT', + }) + item = try_get(data, lambda x: x['playback']['items']['item'], dict) + manifest_url = item.get('url') + if not isinstance(manifest_url, compat_str): + continue + if kind == 'hls': + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=kind, fatal=False)) + else: + formats.extend(self._extract_f4m_formats( + update_url_query(manifest_url, {'hdcore': '3.8.0'}), + video_id, f4m_id='hds', fatal=False)) self._sort_formats(formats) return { 'id': video_id, - 'title': info['title'], + 'title': title, 'formats': formats, 'description': info.get('description'), 'timestamp': parse_iso8601(info.get('broadcast_date_time')), - 'duration': info.get('duration'), + 'duration': int_or_none(info.get('duration')), 'thumbnail': info.get('image'), - 'is_live': sources.get('live'), + 'is_live': info.get('is_live') is True, } From 52dc8a9b3f1af7abda6652a75b906d70809c475d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 16 Sep 2016 22:02:59 +0700 Subject: [PATCH 15/16] [franceinter] Fix upload date extraction --- youtube_dl/extractor/franceinter.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 0d58f89c5..1a1232ade 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -10,14 +10,14 @@ class FranceInterIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)' _TEST = { - 'url': 'https://www.franceinter.fr/emissions/la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', - 'md5': '4764932e466e6f6c79c317d2e74f6884', + 'url': 'https://www.franceinter.fr/emissions/la-tete-au-carre/la-tete-au-carre-14-septembre-2016', + 'md5': '4e3aeb58fe0e83d7b0581fa213c409d0', 'info_dict': { - 'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', + 'id': 'la-tete-au-carre/la-tete-au-carre-14-septembre-2016', 'ext': 'mp3', - 'title': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 - France Inter', - 'description': 'md5:7f2ce449894d1e585932273080fb410d', - 'upload_date': '20131218', + 'title': 'Et si les rêves pouvaient nous aider à agir dans notre vie quotidienne ?', + 'description': 'md5:a245dd62cf5bf51de915f8d9956d180a', + 'upload_date': '20160914', }, } @@ -39,7 +39,7 @@ class FranceInterIE(InfoExtractor): if upload_date_str: upload_date_list = upload_date_str.split() upload_date_list.reverse() - upload_date_list[1] = compat_str(month_by_name(upload_date_list[1], lang='fr')) + upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0) upload_date = ''.join(upload_date_list) else: upload_date = None From 98b7506e96b5ac107a777d8bb8900623d832fba4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 17:36:22 +0100 Subject: [PATCH 16/16] [toutv] add support for authentication(closes #10669) --- youtube_dl/extractor/radiocanada.py | 55 ++++++++++++++++---------- youtube_dl/extractor/toutv.py | 60 ++++++++++++++++++++++++++++- 2 files changed, 92 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 8ec402646..6751270ee 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -13,6 +13,7 @@ from ..utils import ( xpath_element, ExtractorError, determine_protocol, + unsmuggle_url, ) @@ -35,28 +36,51 @@ class RadioCanadaIE(InfoExtractor): } def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) app_code, video_id = re.match(self._VALID_URL, url).groups() - device_types = ['ipad', 'android'] + metadata = self._download_xml( + 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', + video_id, note='Downloading metadata XML', query={ + 'appCode': app_code, + 'idMedia': video_id, + }) + + def get_meta(name): + el = find_xpath_attr(metadata, './/Meta', 'name', name) + return el.text if el is not None else None + + if get_meta('protectionType'): + raise ExtractorError('This video is DRM protected.', expected=True) + + device_types = ['ipad'] if app_code != 'toutv': device_types.append('flash') + if not smuggled_data: + device_types.append('android') formats = [] # TODO: extract f4m formats # f4m formats can be extracted using flashhd device_type but they produce unplayable file for device_type in device_types: - v_data = self._download_xml( - 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx', - video_id, note='Downloading %s XML' % device_type, query={ - 'appCode': app_code, - 'idMedia': video_id, - 'connectionType': 'broadband', - 'multibitrate': 'true', - 'deviceType': device_type, + validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx' + query = { + 'appCode': app_code, + 'idMedia': video_id, + 'connectionType': 'broadband', + 'multibitrate': 'true', + 'deviceType': device_type, + } + if smuggled_data: + validation_url = 'https://services.radio-canada.ca/media/validation/v2/' + query.update(smuggled_data) + else: + query.update({ # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction 'paysJ391wsHjbOJwvCs26toz': 'CA', 'bypasslock': 'NZt5K62gRqfc', - }, fatal=False) + }) + v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False) v_url = xpath_text(v_data, 'url') if not v_url: continue @@ -101,17 +125,6 @@ class RadioCanadaIE(InfoExtractor): f4m_id='hds', fatal=False)) self._sort_formats(formats) - metadata = self._download_xml( - 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', - video_id, note='Downloading metadata XML', query={ - 'appCode': app_code, - 'idMedia': video_id, - }) - - def get_meta(name): - el = find_xpath_attr(metadata, './/Meta', 'name', name) - return el.text if el is not None else None - return { 'id': video_id, 'title': get_meta('Title'), diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 54c2d0aa6..d2d5c1171 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -2,12 +2,22 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + js_to_json, + ExtractorError, + urlencode_postdata, + extract_attributes, + smuggle_url, +) class TouTvIE(InfoExtractor): + _NETRC_MACHINE = 'toutv' IE_NAME = 'tou.tv' _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)' + _access_token = None + _claims = None _TEST = { 'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', @@ -22,18 +32,64 @@ class TouTvIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + 'skip': '404 Not Found', } + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + return + state = 'http://ici.tou.tv//' + webpage = self._download_webpage(state, None, 'Downloading homepage') + toutvlogin = self._parse_json(self._search_regex( + r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json) + authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize' + login_webpage = self._download_webpage( + authorize_url, None, 'Downloading login page', query={ + 'client_id': toutvlogin['clientId'], + 'redirect_uri': 'https://ici.tou.tv/login/loginCallback', + 'response_type': 'token', + 'scope': 'media-drmt openid profile email id.write media-validation.read.privileged', + 'state': state, + }) + login_form = self._search_regex( + r'(?s)(<form[^>]+id="Form-login".+?</form>)', login_webpage, 'login form') + form_data = self._hidden_inputs(login_form) + form_data.update({ + 'login-email': email, + 'login-password': password, + }) + post_url = extract_attributes(login_form).get('action') or authorize_url + _, urlh = self._download_webpage_handle( + post_url, None, 'Logging in', data=urlencode_postdata(form_data)) + self._access_token = self._search_regex( + r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', + urlh.geturl(), 'access token') + self._claims = self._download_json( + 'https://services.radio-canada.ca/media/validation/v2/getClaims', + None, 'Extracting Claims', query={ + 'token': self._access_token, + 'access_token': self._access_token, + })['claims'] + def _real_extract(self, url): path = self._match_id(url) metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path) + if metadata.get('IsDrm'): + raise ExtractorError('This video is DRM protected.', expected=True) video_id = metadata['IdMedia'] details = metadata['Details'] title = details['OriginalTitle'] + video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id) + if self._access_token and self._claims: + video_url = smuggle_url(video_url, { + 'access_token': self._access_token, + 'claims': self._claims, + }) return { '_type': 'url_transparent', - 'url': 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id), + 'url': video_url, 'id': video_id, 'title': title, 'thumbnail': details.get('ImageUrl'),