Merge remote-tracking branch 'rg3/master'

2016-09-16 15:42:43 +02:00 · 2016-09-16 15:42:43 +02:00 · b6a73938c5
commit b6a73938c5
parent 04167e247c 9d8985a165
15 changed files with 147 additions and 73 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@

 ---

-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11.1**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.15**

 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.09.11.1
+[debug] youtube-dl version 2016.09.15
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
@ -55,4 +55,4 @@ $ youtube-dl -v <your command line>
 ### Description of your *issue*, suggested solution and other information

 Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
-If work on your *issue* required an account credentials please provide them or explain how one can obtain them.
+If work on your *issue* requires account credentials please provide them or explain how one can obtain them.
--- a/22
+++ b/22
@ -1,8 +1,26 @@
-version <unreleased>
+version 2016.09.15
+
+Core
+* Improve _hidden_inputs
+ Introduce improved explicit Adobe Pass support
+ Add --ap-mso to provide multiple-system operator identifier
+ Add --ap-username to provide MSO account username
+ Add --ap-password to provide MSO account password
+ Add --ap-list-mso to list all supported MSOs
+ Add support for Rogers Cable multiple-system operator (#10606)

 Extractors
-* [kwuo] Improve error detection (#10650)
+* [crunchyroll] Fix authentication (#10655)
+* [twitch] Fix API calls (#10654, #10660)
+ [bellmedia] Add support for more Bell Media Television sites
+* [franceinter] Fix extraction (#10538, #2105)
+* [kuwo] Improve error detection (#10650)
+ [go] Add support for free full episodes (#10439)
 * [bilibili] Fix extraction for specific videos (#10647)
+* [nhk] Fix extraction (#10633)
+* [kaltura] Improve audio detection
+* [kaltura] Skip chun format
+ [vimeo:ondemand] Pass Referer along with embed URL (#10624)
 + [nbc] Add support for NBC Olympics (#10361)


--- a/README.md
+++ b/README.md
@ -358,6 +358,17 @@ which means you can modify it, redistribute it or use it however you like.
    -n, --netrc                      Use .netrc authentication data
    --video-password PASSWORD        Video password (vimeo, smotri, youku)

+## Adobe Pass Options:
+    --ap-mso MSO                     Adobe Pass multiple-system operator (TV
+                                     provider) identifier, use --ap-list-mso for
+                                     a list of available MSOs
+    --ap-username USERNAME           Multiple-system operator account login
+    --ap-password PASSWORD           Multiple-system operator account password.
+                                     If this option is left out, youtube-dl will
+                                     ask interactively.
+    --ap-list-mso                    List all supported multiple-system
+                                     operators
+
 ## Post-processing Options:
    -x, --extract-audio              Convert video files to audio-only files
                                     (requires ffmpeg or avconv and ffprobe or
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -89,6 +89,7 @@
 - **BeatportPro**
 - **Beeg**
 - **BehindKink**
+ - **BellMedia**
 - **Bet**
 - **Bigflix**
 - **Bild**: Bild.de
@ -169,7 +170,6 @@
 - **CSNNE**
 - **CSpan**: C-SPAN
 - **CtsNews**: 華視新聞
- - **CTV**
 - **CTVNews**
 - **culturebox.francetvinfo.fr**
 - **CultureUnplugged**
@ -445,6 +445,7 @@
 - **NBA**
 - **NBC**
 - **NBCNews**
+ - **NBCOlympics**
 - **NBCSports**
 - **NBCSportsVPlayer**
 - **ndr**: NDR.de - Norddeutscher Rundfunk
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -131,9 +131,9 @@ class YoutubeDL(object):
    username:          Username for authentication purposes.
    password:          Password for authentication purposes.
    videopassword:     Password for accessing a video.
-    ap_mso:            Adobe Pass Multiple-system operator Identifier.
-    ap_username:       TV Provider username for authentication purposes.
-    ap_password:       TV Provider password for authentication purposes.
+    ap_mso:            Adobe Pass multiple-system operator identifier.
+    ap_username:       Multiple-system operator account username.
+    ap_password:       Multiple-system operator account password.
    usenetrc:          Use netrc for authentication instead.
    verbose:           Print additional info to stdout.
    quiet:             Do not print messages to stdout.
--- a/youtube_dl/extractor/adobepass.py
+++ b/youtube_dl/extractor/adobepass.py
@ -32,6 +32,7 @@ MSO_INFO = {
 class AdobePassIE(InfoExtractor):
    _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
    _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
+    _MVPD_CACHE = 'ap-mvpd'

    @staticmethod
    def _get_mvpd_resource(provider_id, title, guid, rating):
@ -85,7 +86,7 @@ class AdobePassIE(InfoExtractor):
        guid = xml_text(resource, 'guid')
        count = 0
        while count < 2:
-            requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
+            requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {}
            authn_token = requestor_info.get('authn_token')
            if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
                authn_token = None
@ -125,12 +126,12 @@ class AdobePassIE(InfoExtractor):
                        'requestor_id': requestor_id,
                    }), headers=mvpd_headers)
                if '<pendingLogout' in session:
-                    self._downloader.cache.store('mvpd', requestor_id, {})
+                    self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
                    count += 1
                    continue
                authn_token = unescapeHTML(xml_text(session, 'authnToken'))
                requestor_info['authn_token'] = authn_token
-                self._downloader.cache.store('mvpd', requestor_id, requestor_info)
+                self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)

            authz_token = requestor_info.get(guid)
            if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
@ -146,12 +147,12 @@ class AdobePassIE(InfoExtractor):
                        'userMeta': '1',
                    }), headers=mvpd_headers)
                if '<pendingLogout' in authorize:
-                    self._downloader.cache.store('mvpd', requestor_id, {})
+                    self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
                    count += 1
                    continue
                authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
                requestor_info[guid] = authz_token
-                self._downloader.cache.store('mvpd', requestor_id, requestor_info)
+                self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)

            mvpd_headers.update({
                'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
@ -167,7 +168,7 @@ class AdobePassIE(InfoExtractor):
                    'hashed_guid': 'false',
                }), headers=mvpd_headers)
            if '<pendingLogout' in short_authorize:
-                self._downloader.cache.store('mvpd', requestor_id, {})
+                self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
                count += 1
                continue
            return short_authorize
--- a/youtube_dl/extractor/canvas.py
+++ b/youtube_dl/extractor/canvas.py
@ -71,7 +71,7 @@ class CanvasIE(InfoExtractor):
            webpage)).strip()

        video_id = self._html_search_regex(
-            r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id')
+            r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')

        data = self._download_json(
            'https://mediazone.vrt.be/api/v1/%s/assets/%s'
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -674,23 +674,26 @@ class InfoExtractor(object):
                    username = info[0]
                    password = info[2]
                else:
-                    raise netrc.NetrcParseError('No authenticators for %s' % netrc_machine)
+                    raise netrc.NetrcParseError(
+                        'No authenticators for %s' % netrc_machine)
            except (IOError, netrc.NetrcParseError) as err:
-                self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
+                self._downloader.report_warning(
+                    'parsing .netrc: %s' % error_to_compat_str(err))

-        return (username, password)
+        return username, password

    def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
        """
        Get the login info as (username, password)
-        It will look in the netrc file using the _NETRC_MACHINE value
+        First look for the manually specified credentials using username_option
+        and password_option as keys in params dictionary. If no such credentials
+        available look in the netrc file using the netrc_machine or _NETRC_MACHINE
+        value.
        If there's no info available, return (None, None)
        """
        if self._downloader is None:
            return (None, None)

-        username = None
-        password = None
        downloader_params = self._downloader.params

        # Attempt to use provided username and password or .netrc data
@ -700,7 +703,7 @@ class InfoExtractor(object):
        else:
            username, password = self._get_netrc_login_info(netrc_machine)

-        return (username, password)
+        return username, password

    def _get_tfa_info(self, note='two-factor verification code'):
        """
@ -888,16 +891,16 @@ class InfoExtractor(object):
    def _hidden_inputs(html):
        html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
        hidden_inputs = {}
-        for input in re.findall(r'(?i)<input([^>]+)>', html):
-            if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
+        for input in re.findall(r'(?i)(<input[^>]+>)', html):
+            attrs = extract_attributes(input)
+            if not input:
                continue
-            name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input)
-            if not name:
+            if attrs.get('type') not in ('hidden', 'submit'):
                continue
-            value = re.search(r'value=(["\'])(?P<value>.*?)\1', input)
-            if not value:
-                continue
-            hidden_inputs[name.group('value')] = value.group('value')
+            name = attrs.get('name') or attrs.get('id')
+            value = attrs.get('value')
+            if name and value is not None:
+                hidden_inputs[name] = value
        return hidden_inputs

    def _form_hidden_inputs(self, form_id, html):
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@ -34,22 +34,51 @@ from ..aes import (


 class CrunchyrollBaseIE(InfoExtractor):
+    _LOGIN_URL = 'https://www.crunchyroll.com/login'
+    _LOGIN_FORM = 'login_form'
    _NETRC_MACHINE = 'crunchyroll'

    def _login(self):
        (username, password) = self._get_login_info()
        if username is None:
            return
-        self.report_login()
-        login_url = 'https://www.crunchyroll.com/?a=formhandler'
-        data = urlencode_postdata({
-            'formname': 'RpcApiUser_Login',
-            'name': username,
-            'password': password,
+
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None, 'Downloading login page')
+
+        login_form_str = self._search_regex(
+            r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
+            login_page, 'login form', group='form')
+
+        post_url = extract_attributes(login_form_str).get('action')
+        if not post_url:
+            post_url = self._LOGIN_URL
+        elif not post_url.startswith('http'):
+            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+        login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)
+
+        login_form.update({
+            'login_form[name]': username,
+            'login_form[password]': password,
        })
-        login_request = sanitized_Request(login_url, data)
-        login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        self._download_webpage(login_request, None, False, 'Wrong login info')
+
+        response = self._download_webpage(
+            post_url, None, 'Logging in', 'Wrong login info',
+            data=urlencode_postdata(login_form),
+            headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+        # Successful login
+        if '<title>Redirecting' in response:
+            return
+
+        error = self._html_search_regex(
+            '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
+            response, 'error message', default=None)
+        if error:
+            raise ExtractorError('Unable to login: %s' % error, expected=True)
+
+        raise ExtractorError('Unable to log in')

    def _real_initialize(self):
        self._login()
--- a/youtube_dl/extractor/nfl.py
+++ b/youtube_dl/extractor/nfl.py
@ -165,7 +165,7 @@ class NFLIE(InfoExtractor):
            group='config'))
        # For articles, the id in the url is not the video id
        video_id = self._search_regex(
-            r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>.+?)\1',
+            r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>(?:(?!\1).)+)\1',
            webpage, 'video id', default=video_id, group='id')
        config = self._download_json(config_url, video_id, 'Downloading player config')
        url_template = NFLIE.prepend_host(
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@ -429,7 +429,7 @@ class SchoolTVIE(InfoExtractor):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        video_id = self._search_regex(
-            r'data-mid=(["\'])(?P<id>.+?)\1', webpage, 'video_id', group='id')
+            r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id')
        return {
            '_type': 'url_transparent',
            'ie_key': 'NPO',
--- a/youtube_dl/extractor/tv4.py
+++ b/youtube_dl/extractor/tv4.py
@ -2,9 +2,13 @@
 from __future__ import unicode_literals

 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
    ExtractorError,
+    int_or_none,
    parse_iso8601,
+    try_get,
+    update_url_query,
 )


@ -65,36 +69,47 @@ class TV4IE(InfoExtractor):
        video_id = self._match_id(url)

        info = self._download_json(
-            'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON')
+            'http://www.tv4play.se/player/assets/%s.json' % video_id,
+            video_id, 'Downloading video info JSON')

        # If is_geo_restricted is true, it doesn't necessarily mean we can't download it
-        if info['is_geo_restricted']:
+        if info.get('is_geo_restricted'):
            self.report_warning('This content might not be available in your country due to licensing restrictions.')
-        if info['requires_subscription']:
+        if info.get('requires_subscription'):
            raise ExtractorError('This content requires subscription.', expected=True)

-        sources_data = self._download_json(
-            'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON')
-        sources = sources_data['playback']
+        title = info['title']

        formats = []
-        for item in sources.get('items', {}).get('item', []):
-            ext, bitrate = item['mediaFormat'], item['bitrate']
-            formats.append({
-                'format_id': '%s_%s' % (ext, bitrate),
-                'tbr': bitrate,
-                'ext': ext,
-                'url': item['url'],
+        # http formats are linked with unresolvable host
+        for kind in ('hls', ''):
+            data = self._download_json(
+                'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id,
+                video_id, 'Downloading sources JSON', query={
+                    'protocol': kind,
+                    'videoFormat': 'MP4+WEBVTTS+WEBVTT',
                })
+            item = try_get(data, lambda x: x['playback']['items']['item'], dict)
+            manifest_url = item.get('url')
+            if not isinstance(manifest_url, compat_str):
+                continue
+            if kind == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id=kind, fatal=False))
+            else:
+                formats.extend(self._extract_f4m_formats(
+                    update_url_query(manifest_url, {'hdcore': '3.8.0'}),
+                    video_id, f4m_id='hds', fatal=False))
        self._sort_formats(formats)

        return {
            'id': video_id,
-            'title': info['title'],
+            'title': title,
            'formats': formats,
            'description': info.get('description'),
            'timestamp': parse_iso8601(info.get('broadcast_date_time')),
-            'duration': info.get('duration'),
+            'duration': int_or_none(info.get('duration')),
            'thumbnail': info.get('image'),
-            'is_live': sources.get('live'),
+            'is_live': info.get('is_live') is True,
        }
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@ -32,6 +32,7 @@ class TwitchBaseIE(InfoExtractor):
    _API_BASE = 'https://api.twitch.tv'
    _USHER_BASE = 'https://usher.ttvnw.net'
    _LOGIN_URL = 'http://www.twitch.tv/login'
+    _CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6'
    _NETRC_MACHINE = 'twitch'

    def _handle_error(self, response):
@ -44,15 +45,9 @@ class TwitchBaseIE(InfoExtractor):
                expected=True)

    def _call_api(self, path, item_id, note):
-        headers = {
-            'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2',
-            'X-Requested-With': 'XMLHttpRequest',
-        }
-        for cookie in self._downloader.cookiejar:
-            if cookie.name == 'api_token':
-                headers['Twitch-Api-Token'] = cookie.value
        response = self._download_json(
-            '%s/%s' % (self._API_BASE, path), item_id, note)
+            '%s/%s' % (self._API_BASE, path), item_id, note,
+            headers={'Client-ID': self._CLIENT_ID})
        self._handle_error(response)
        return response

--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@ -355,19 +355,19 @@ def parseOpts(overrideArguments=None):
    adobe_pass.add_option(
        '--ap-mso',
        dest='ap_mso', metavar='MSO',
-        help='Adobe Pass Multiple-system operator Identifier')
+        help='Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs')
    adobe_pass.add_option(
        '--ap-username',
        dest='ap_username', metavar='USERNAME',
-        help='TV Provider Login with this account ID')
+        help='Multiple-system operator account login')
    adobe_pass.add_option(
        '--ap-password',
        dest='ap_password', metavar='PASSWORD',
-        help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.')
+        help='Multiple-system operator account password. If this option is left out, youtube-dl will ask interactively.')
    adobe_pass.add_option(
        '--ap-list-mso',
        action='store_true', dest='ap_list_mso', default=False,
-        help='List all supported TV Providers')
+        help='List all supported multiple-system operators')

    video_format = optparse.OptionGroup(parser, 'Video Format Options')
    video_format.add_option(
@ -831,6 +831,7 @@ def parseOpts(overrideArguments=None):
    parser.add_option_group(video_format)
    parser.add_option_group(subtitles)
    parser.add_option_group(authentication)
+    parser.add_option_group(adobe_pass)
    parser.add_option_group(postproc)

    if overrideArguments is not None:
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2016.09.11.1'
+__version__ = '2016.09.15'