Merge remote-tracking branch 'upstream/master' into myversion

2018-05-13 03:19:01 -04:00 · 2018-05-13 03:19:01 -04:00 · 4ce6a125fc
commit 4ce6a125fc
parent 1004198099 07acdc5afc
12 changed files with 225 additions and 147 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@
 ---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.05.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.05.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.05.01**
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.05.09**
 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2018.05.01
+[debug] youtube-dl version 2018.05.09
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/18
+++ b/18
@ -1,3 +1,21 @@
 version 2018.05.09
 Core
 * [YoutubeDL] Ensure ext exists for automatic captions
 * Introduce --geo-bypass-ip-block
 Extractors
 + [udemy] Extract asset captions
 + [udemy] Extract stream URLs (#16372)
 + [businessinsider] Add support for businessinsider.com (#16387, #16388, #16389)
 + [cloudflarestream] Add support for cloudflarestream.com (#16375)
 * [watchbox] Fix extraction (#16356)
 * [discovery] Extract Affiliate/Anonymous Auth Token from cookies (#14954)
 + [itv:btcc] Add support for itv.com/btcc (#16139)
 * [tunein] Use live title for live streams (#16347)
 * [itv] Improve extraction (#16253)
 version 2018.05.01
 Core
--- a/README.md
+++ b/README.md
@ -116,6 +116,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
    --geo-bypass-country CODE        Force bypass geographic restriction with
                                     explicitly provided two-letter ISO 3166-2
                                     country code (experimental)
    --geo-bypass-ip-block IP_BLOCK   Force bypass geographic restriction with
                                     explicitly provided IP block in CIDR
                                     notation (experimental)
 ## Video Selection:
    --playlist-start NUMBER          Playlist video to start at (default is 1)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -122,6 +122,7 @@
 - **BRMediathek**: Bayerischer Rundfunk Mediathek
 - **bt:article**: Bergens Tidende Articles
 - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
 - **BusinessInsider**
 - **BuzzFeed**
 - **BYUtv**
 - **Camdemy**
@ -163,6 +164,7 @@
 - **ClipRs**
 - **Clipsyndicate**
 - **CloserToTruth**
 - **CloudflareStream**
 - **cloudtime**: CloudTime
 - **Cloudy**
 - **Clubic**
@ -373,6 +375,7 @@
 - **Ir90Tv**
 - **ITTF**
 - **ITV**
 - **ITVBTCC**
 - **ivi**: ivi.ru
 - **ivi:compilation**: ivi.ru compilations
 - **ivideon**: Ivideon TV
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -1482,23 +1482,28 @@ class YoutubeDL(object):
            if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
                info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
-        subtitles = info_dict.get('subtitles')
+        for cc_kind in ('subtitles', 'automatic_captions'):
-        if subtitles:
+            cc = info_dict.get(cc_kind)
-            for _, subtitle in subtitles.items():
+            if cc:
                for _, subtitle in cc.items():
                    for subtitle_format in subtitle:
                        if subtitle_format.get('url'):
                            subtitle_format['url'] = sanitize_url(subtitle_format['url'])
                        if subtitle_format.get('ext') is None:
                            subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
        automatic_captions = info_dict.get('automatic_captions')
        subtitles = info_dict.get('subtitles')
        if self.params.get('listsubtitles', False):
            if 'automatic_captions' in info_dict:
-                self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+                self.list_subtitles(
                    info_dict['id'], automatic_captions, 'automatic captions')
            self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
            return
        info_dict['requested_subtitles'] = self.process_subtitles(
-            info_dict['id'], subtitles,
+            info_dict['id'], subtitles, automatic_captions)
            info_dict.get('automatic_captions'))
        # We now pick which formats have to be downloaded
        if info_dict.get('formats') is None:
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@ -179,6 +179,10 @@ class MixcloudIE(InfoExtractor):
                    formats.append({
                        'format_id': 'http',
                        'url': decrypted,
                        'downloader_options': {
                            # Mixcloud starts throttling at >~5M
                            'http_chunk_size': 5242880,
                        },
                    })
            self._sort_formats(formats)
--- a/youtube_dl/extractor/nick.py
+++ b/youtube_dl/extractor/nick.py
@ -85,7 +85,7 @@ class NickBrIE(MTVServicesInfoExtractor):
                    https?://
                        (?:
                            (?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
-                            (?:www\.)?nickjr\.nl
+                            (?:www\.)?nickjr\.[a-z]{2}
                        )
                        /(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
                    '''
@ -98,6 +98,9 @@ class NickBrIE(MTVServicesInfoExtractor):
    }, {
        'url': 'http://www.nickjr.nl/paw-patrol/videos/311-ge-wol-dig-om-terug-te-zijn/',
        'only_matching': True,
    }, {
        'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
--- a/youtube_dl/extractor/reddit.py
+++ b/youtube_dl/extractor/reddit.py
@ -47,7 +47,7 @@ class RedditIE(InfoExtractor):
 class RedditRIE(InfoExtractor):
-    _VALID_URL = r'(?P<url>https?://(?:(?:www|old)\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
+    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
    _TESTS = [{
        'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
        'info_dict': {
@ -86,6 +86,10 @@ class RedditRIE(InfoExtractor):
        # youtube
        'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
        'only_matching': True,
    }, {
        # reddit video @ nm reddit
        'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@ -1,35 +1,34 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import binascii
 import re
 import json
 from .common import InfoExtractor
 from ..compat import (
    compat_b64decode,
    compat_ord,
 )
 from ..utils import (
    ExtractorError,
    qualities,
    determine_ext,
    ExtractorError,
    int_or_none,
    mimetype2ext,
    parse_duration,
    parse_iso8601,
    qualities,
 )
 class TeamcocoIE(InfoExtractor):
-    _VALID_URL = r'https?://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
+    _VALID_URL = r'https?://teamcoco\.com/video/(?P<id>[^/?#]+)'
    _TESTS = [
        {
-            'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
+            'url': 'http://teamcoco.com/video/mary-kay-remote',
-            'md5': '3f7746aa0dc86de18df7539903d399ea',
+            'md5': '55d532f81992f5c92046ad02fec34d7d',
            'info_dict': {
                'id': '80187',
                'ext': 'mp4',
                'title': 'Conan Becomes A Mary Kay Beauty Consultant',
                'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
-                'duration': 504,
+                'duration': 495.0,
-                'age_limit': 0,
+                'upload_date': '20140402',
                'timestamp': 1396407600,
            }
        }, {
            'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
@ -40,7 +39,8 @@ class TeamcocoIE(InfoExtractor):
                'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
                'title': 'Louis C.K. Interview Pt. 1 11/3/11',
                'duration': 288,
-                'age_limit': 0,
+                'upload_date': '20111104',
                'timestamp': 1320405840,
            }
        }, {
            'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
@ -49,6 +49,8 @@ class TeamcocoIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'Timothy Olyphant Raises A Toast To “Justified”',
                'description': 'md5:15501f23f020e793aeca761205e42c24',
                'upload_date': '20150415',
                'timestamp': 1429088400,
            },
            'params': {
                'skip_download': True,  # m3u8 downloads
@ -63,110 +65,93 @@ class TeamcocoIE(InfoExtractor):
            },
            'params': {
                'skip_download': True,  # m3u8 downloads
-            }
+            },
            'skip': 'This video is no longer available.',
        }
    ]
-    _VIDEO_ID_REGEXES = (
+
-        r'"eVar42"\s*:\s*(\d+)',
+    def _graphql_call(self, query_template, object_type, object_id):
-        r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"',
+        find_object = 'find' + object_type
-        r'"id_not"\s*:\s*(\d+)'
+        return self._download_json(
-    )
+            'http://teamcoco.com/graphql/', object_id, data=json.dumps({
                'query': query_template % (find_object, object_id)
            }))['data'][find_object]
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        display_id = self._match_id(url)
-        display_id = mobj.group('display_id')
+        response = self._graphql_call('''{
-        webpage, urlh = self._download_webpage_handle(url, display_id)
+  %s(slug: "video/%s") {
-        if 'src=expired' in urlh.geturl():
+    ... on RecordSlug {
-            raise ExtractorError('This video is expired.', expected=True)
+      record {
        id
        title
        teaser
        publishOn
        thumb {
          preview
        }
        tags {
          name
        }
        duration
      }
    }
    ... on NotFoundSlug {
      status
    }
  }
 }''', 'Slug', display_id)
        if response.get('status'):
            raise ExtractorError('This video is no longer available.', expected=True)
-        video_id = mobj.group('video_id')
+        record = response['record']
-        if not video_id:
+        video_id = record['id']
            video_id = self._html_search_regex(
                self._VIDEO_ID_REGEXES, webpage, 'video id')
-        data = None
+        srcs = self._graphql_call('''{
-
+  %s(id: "%s") {
-        preload_codes = self._html_search_regex(
+    src
-            r'(function.+)setTimeout\(function\(\)\{playlist',
+  }
-            webpage, 'preload codes')
+}''', 'RecordVideoSource', video_id)['src']
        base64_fragments = re.findall(r'"([a-zA-Z0-9+/=]+)"', preload_codes)
        base64_fragments.remove('init')
        def _check_sequence(cur_fragments):
            if not cur_fragments:
                return
            for i in range(len(cur_fragments)):
                cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii')
                try:
                    raw_data = compat_b64decode(cur_sequence)
                    if compat_ord(raw_data[0]) == compat_ord('{'):
                        return json.loads(raw_data.decode('utf-8'))
                except (TypeError, binascii.Error, UnicodeDecodeError, ValueError):
                    continue
        def _check_data():
            for i in range(len(base64_fragments) + 1):
                for j in range(i, len(base64_fragments) + 1):
                    data = _check_sequence(base64_fragments[:i] + base64_fragments[j:])
                    if data:
                        return data
        self.to_screen('Try to compute possible data sequence. This may take some time.')
        data = _check_data()
        if not data:
            raise ExtractorError(
                'Preload information could not be extracted', expected=True)
        formats = []
-        get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
+        get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
-        for filed in data['files']:
+        for format_id, src in srcs.items():
-            if determine_ext(filed['url']) == 'm3u8':
+            if not isinstance(src, dict):
                # compat_urllib_parse.urljoin does not work here
                if filed['url'].startswith('/'):
                    m3u8_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + filed['url']
                else:
                    m3u8_url = filed['url']
                m3u8_formats = self._extract_m3u8_formats(
                    m3u8_url, video_id, ext='mp4')
                for m3u8_format in m3u8_formats:
                    if m3u8_format not in formats:
                        formats.append(m3u8_format)
            elif determine_ext(filed['url']) == 'f4m':
                # TODO Correct f4m extraction
                continue
            src_url = src.get('src')
            if not src_url:
                continue
            ext = determine_ext(src_url, mimetype2ext(src.get('type')))
            if format_id == 'hls' or ext == 'm3u8':
                # compat_urllib_parse.urljoin does not work here
                if src_url.startswith('/'):
                    src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
                formats.extend(self._extract_m3u8_formats(
                    src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
            else:
-                if filed['url'].startswith('/mp4:protected/'):
+                if src_url.startswith('/mp4:protected/'):
                    # TODO Correct extraction for these files
                    continue
-                m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
+                tbr = int_or_none(self._search_regex(
-                if m_format is not None:
+                    r'(\d+)k\.mp4', src_url, 'tbr', default=None))
                    format_id = m_format.group(1)
                else:
                    format_id = filed['bitrate']
                tbr = (
                    int(filed['bitrate'])
                    if filed['bitrate'].isdigit()
                    else None)
                formats.append({
-                    'url': filed['url'],
+                    'url': src_url,
-                    'ext': 'mp4',
+                    'ext': ext,
                    'tbr': tbr,
                    'format_id': format_id,
                    'quality': get_quality(format_id),
                })
        self._sort_formats(formats)
        return {
            'id': video_id,
            'display_id': display_id,
            'formats': formats,
-            'title': data['title'],
+            'title': record['title'],
-            'thumbnail': data.get('thumb', {}).get('href'),
+            'thumbnail': record.get('thumb', {}).get('preview'),
-            'description': data.get('teaser'),
+            'description': record.get('teaser'),
-            'duration': data.get('duration'),
+            'duration': parse_duration(record.get('duration')),
-            'age_limit': self._family_friendly_search(webpage),
+            'timestamp': parse_iso8601(record.get('publishOn')),
        }
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@ -8,6 +8,7 @@ import random
 from .common import InfoExtractor
 from ..compat import (
    compat_HTTPError,
    compat_kwargs,
    compat_parse_qs,
    compat_str,
    compat_urllib_parse_urlencode,
@ -16,11 +17,14 @@ from ..compat import (
 from ..utils import (
    clean_html,
    ExtractorError,
    float_or_none,
    int_or_none,
    js_to_json,
    orderedSet,
    parse_duration,
    parse_iso8601,
    qualities,
    try_get,
    unified_timestamp,
    update_url_query,
    urlencode_postdata,
    urljoin,
@ -45,10 +49,11 @@ class TwitchBaseIE(InfoExtractor):
                '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
                expected=True)
-    def _call_api(self, path, item_id, note):
+    def _call_api(self, path, item_id, *args, **kwargs):
        kwargs.setdefault('headers', {})['Client-ID'] = self._CLIENT_ID
        response = self._download_json(
-            '%s/%s' % (self._API_BASE, path), item_id, note,
+            '%s/%s' % (self._API_BASE, path), item_id,
-            headers={'Client-ID': self._CLIENT_ID})
+            *args, **compat_kwargs(kwargs))
        self._handle_error(response)
        return response
@ -622,21 +627,23 @@ class TwitchStreamIE(TwitchBaseIE):
        }
-class TwitchClipsIE(InfoExtractor):
+class TwitchClipsIE(TwitchBaseIE):
    IE_NAME = 'twitch:clips'
    _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
    _TESTS = [{
-        'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound',
+        'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
        'md5': '761769e1eafce0ffebfb4089cb3847cd',
        'info_dict': {
-            'id': 'AggressiveCobraPoooound',
+            'id': '42850523',
            'ext': 'mp4',
            'title': 'EA Play 2016 Live from the Novo Theatre',
            'thumbnail': r're:^https?://.*\.jpg',
            'timestamp': 1465767393,
            'upload_date': '20160612',
            'creator': 'EA',
            'uploader': 'stereotype_',
-            'uploader_id': 'stereotype_',
+            'uploader_id': '43566419',
        },
    }, {
        # multiple formats
@ -647,34 +654,63 @@ class TwitchClipsIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        status = self._download_json(
            'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id,
            video_id)
-        clip = self._parse_json(
+        formats = []
            self._search_regex(
                r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
            video_id, transform_source=js_to_json)
-        title = clip.get('title') or clip.get('channel_title') or self._og_search_title(webpage)
+        for option in status['quality_options']:
-
+            if not isinstance(option, dict):
-        formats = [{
+                continue
-            'url': option['source'],
+            source = option.get('source')
            if not source or not isinstance(source, compat_str):
                continue
            formats.append({
                'url': source,
                'format_id': option.get('quality'),
                'height': int_or_none(option.get('quality')),
-        } for option in clip.get('quality_options', []) if option.get('source')]
+                'fps': int_or_none(option.get('frame_rate')),
-
+            })
        if not formats:
            formats = [{
                'url': clip['clip_video_url'],
            }]
        self._sort_formats(formats)
-        return {
+        info = {
            'id': video_id,
            'title': title,
            'thumbnail': self._og_search_thumbnail(webpage),
            'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'),
            'uploader': clip.get('curator_login'),
            'uploader_id': clip.get('curator_display_name'),
            'formats': formats,
        }
        clip = self._call_api(
            'kraken/clips/%s' % video_id, video_id, fatal=False, headers={
                'Accept': 'application/vnd.twitchtv.v5+json',
            })
        if clip:
            quality_key = qualities(('tiny', 'small', 'medium'))
            thumbnails = []
            thumbnails_dict = clip.get('thumbnails')
            if isinstance(thumbnails_dict, dict):
                for thumbnail_id, thumbnail_url in thumbnails_dict.items():
                    thumbnails.append({
                        'id': thumbnail_id,
                        'url': thumbnail_url,
                        'preference': quality_key(thumbnail_id),
                    })
            info.update({
                'id': clip.get('tracking_id') or video_id,
                'title': clip.get('title') or video_id,
                'duration': float_or_none(clip.get('duration')),
                'views': int_or_none(clip.get('views')),
                'timestamp': unified_timestamp(clip.get('created_at')),
                'thumbnails': thumbnails,
                'creator': try_get(clip, lambda x: x['broadcaster']['display_name'], compat_str),
                'uploader': try_get(clip, lambda x: x['curator']['display_name'], compat_str),
                'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
            })
        else:
            info.update({
                'title': video_id,
                'id': video_id,
            })
        return info
--- a/youtube_dl/extractor/udemy.py
+++ b/youtube_dl/extractor/udemy.py
@ -18,6 +18,7 @@ from ..utils import (
    int_or_none,
    js_to_json,
    sanitized_Request,
    try_get,
    unescapeHTML,
    urlencode_postdata,
 )
@ -105,7 +106,7 @@ class UdemyIE(InfoExtractor):
            % (course_id, lecture_id),
            lecture_id, 'Downloading lecture JSON', query={
                'fields[lecture]': 'title,description,view_html,asset',
-                'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data',
+                'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data',
            })
    def _handle_error(self, response):
@ -303,9 +304,25 @@ class UdemyIE(InfoExtractor):
                    'url': src,
                })
-        download_urls = asset.get('download_urls')
+        for url_kind in ('download', 'stream'):
-        if isinstance(download_urls, dict):
+            urls = asset.get('%s_urls' % url_kind)
-            extract_formats(download_urls.get('Video'))
+            if isinstance(urls, dict):
                extract_formats(urls.get('Video'))
        captions = asset.get('captions')
        if isinstance(captions, list):
            for cc in captions:
                if not isinstance(cc, dict):
                    continue
                cc_url = cc.get('url')
                if not cc_url or not isinstance(cc_url, compat_str):
                    continue
                lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
                sub_dict = (automatic_captions if cc.get('source') == 'auto'
                            else subtitles)
                sub_dict.setdefault(lang or 'en', []).append({
                    'url': cc_url,
                })
        view_html = lecture.get('view_html')
        if view_html:
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2018.05.01'
+__version__ = '2018.05.09'
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2018.05.01'`	`__version__ = '2018.05.09'`