Merge pull request #328 from ytdl-org/master

[pull] master from ytdl-org:master
2020-06-05 18:03:38 +00:00 · 2020-06-05 18:03:38 +00:00 · 407a988445
commit 407a988445
parent cd781f1518 b37e47a3f9
2 changed files with 62 additions and 25 deletions
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -5,32 +5,34 @@ import base64
 import re
 import struct
 from .common import InfoExtractor
 from .adobepass import AdobePassIE
 from .common import InfoExtractor
 from ..compat import (
    compat_etree_fromstring,
    compat_HTTPError,
    compat_parse_qs,
    compat_urllib_parse_urlparse,
    compat_urlparse,
    compat_xml_parse_error,
    compat_HTTPError,
 )
 from ..utils import (
-    ExtractorError,
+    clean_html,
    extract_attributes,
    ExtractorError,
    find_xpath_attr,
    fix_xml_ampersands,
    float_or_none,
    js_to_json,
    int_or_none,
    js_to_json,
    mimetype2ext,
    parse_iso8601,
    smuggle_url,
    str_or_none,
    unescapeHTML,
    unsmuggle_url,
    update_url_query,
    clean_html,
    mimetype2ext,
    UnsupportedError,
    update_url_query,
    url_or_none,
 )
@ -553,10 +555,16 @@ class BrightcoveNewIE(AdobePassIE):
        subtitles = {}
        for text_track in json_data.get('text_tracks', []):
-            if text_track.get('src'):
+            if text_track.get('kind') != 'captions':
-                subtitles.setdefault(text_track.get('srclang'), []).append({
+                continue
-                    'url': text_track['src'],
+            text_track_url = url_or_none(text_track.get('src'))
-                })
+            if not text_track_url:
                continue
            lang = (str_or_none(text_track.get('srclang'))
                    or str_or_none(text_track.get('label')) or 'en').lower()
            subtitles.setdefault(lang, []).append({
                'url': text_track_url,
            })
        is_live = False
        duration = float_or_none(json_data.get('duration'), 1000)
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@ -21,6 +21,8 @@ from ..utils import (
    orderedSet,
    parse_duration,
    parse_iso8601,
    qualities,
    str_or_none,
    try_get,
    unified_timestamp,
    update_url_query,
@ -50,8 +52,14 @@ class TwitchBaseIE(InfoExtractor):
    def _call_api(self, path, item_id, *args, **kwargs):
        headers = kwargs.get('headers', {}).copy()
-        headers['Client-ID'] = self._CLIENT_ID
+        headers.update({
-        kwargs['headers'] = headers
+            'Accept': 'application/vnd.twitchtv.v5+json; charset=UTF-8',
            'Client-ID': self._CLIENT_ID,
        })
        kwargs.update({
            'headers': headers,
            'expected_status': (400, 410),
        })
        response = self._download_json(
            '%s/%s' % (self._API_BASE, path), item_id,
            *args, **compat_kwargs(kwargs))
@ -186,12 +194,27 @@ class TwitchItemBaseIE(TwitchBaseIE):
            is_live = False
        else:
            is_live = None
        _QUALITIES = ('small', 'medium', 'large')
        quality_key = qualities(_QUALITIES)
        thumbnails = []
        preview = info.get('preview')
        if isinstance(preview, dict):
            for thumbnail_id, thumbnail_url in preview.items():
                thumbnail_url = url_or_none(thumbnail_url)
                if not thumbnail_url:
                    continue
                if thumbnail_id not in _QUALITIES:
                    continue
                thumbnails.append({
                    'url': thumbnail_url,
                    'preference': quality_key(thumbnail_id),
                })
        return {
            'id': info['_id'],
            'title': info.get('title') or 'Untitled Broadcast',
            'description': info.get('description'),
            'duration': int_or_none(info.get('length')),
-            'thumbnail': info.get('preview'),
+            'thumbnails': thumbnails,
            'uploader': info.get('channel', {}).get('display_name'),
            'uploader_id': info.get('channel', {}).get('name'),
            'timestamp': parse_iso8601(info.get('recorded_at')),
@ -583,10 +606,18 @@ class TwitchStreamIE(TwitchBaseIE):
                else super(TwitchStreamIE, cls).suitable(url))
    def _real_extract(self, url):
-        channel_id = self._match_id(url)
+        channel_name = self._match_id(url)
        access_token = self._call_api(
            'api/channels/%s/access_token' % channel_name, channel_name,
            'Downloading access token JSON')
        token = access_token['token']
        channel_id = compat_str(self._parse_json(
            token, channel_name)['channel_id'])
        stream = self._call_api(
-            'kraken/streams/%s?stream_type=all' % channel_id.lower(),
+            'kraken/streams/%s?stream_type=all' % channel_id,
            channel_id, 'Downloading stream JSON').get('stream')
        if not stream:
@ -596,11 +627,9 @@ class TwitchStreamIE(TwitchBaseIE):
        # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing
        # an invalid m3u8 URL. Working around by use of original channel name from stream
        # JSON and fallback to lowercase if it's not available.
-        channel_id = stream.get('channel', {}).get('name') or channel_id.lower()
+        channel_name = try_get(
-
+            stream, lambda x: x['channel']['name'],
-        access_token = self._call_api(
+            compat_str) or channel_name.lower()
            'api/channels/%s/access_token' % channel_id, channel_id,
            'Downloading channel access token')
        query = {
            'allow_source': 'true',
@ -611,11 +640,11 @@ class TwitchStreamIE(TwitchBaseIE):
            'playlist_include_framerate': 'true',
            'segment_preference': '4',
            'sig': access_token['sig'].encode('utf-8'),
-            'token': access_token['token'].encode('utf-8'),
+            'token': token.encode('utf-8'),
        }
        formats = self._extract_m3u8_formats(
            '%s/api/channel/hls/%s.m3u8?%s'
-            % (self._USHER_BASE, channel_id, compat_urllib_parse_urlencode(query)),
+            % (self._USHER_BASE, channel_name, compat_urllib_parse_urlencode(query)),
            channel_id, 'mp4')
        self._prefer_source(formats)
@ -638,8 +667,8 @@ class TwitchStreamIE(TwitchBaseIE):
            })
        return {
-            'id': compat_str(stream['_id']),
+            'id': str_or_none(stream.get('_id')) or channel_id,
-            'display_id': channel_id,
+            'display_id': channel_name,
            'title': title,
            'description': description,
            'thumbnails': thumbnails,