From b4eb0bc7bd2524a63e3a6441fe82a6cfd8ebc365 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 5 Jun 2020 23:33:14 +0700 Subject: [PATCH 1/5] [brightcove] Fix subtitles extraction (closes #25540) --- youtube_dl/extractor/brightcove.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 85001b3ad..462815317 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -25,9 +25,11 @@ from ..utils import ( int_or_none, parse_iso8601, smuggle_url, + str_or_none, unescapeHTML, unsmuggle_url, update_url_query, + url_or_none, clean_html, mimetype2ext, UnsupportedError, @@ -553,10 +555,16 @@ class BrightcoveNewIE(AdobePassIE): subtitles = {} for text_track in json_data.get('text_tracks', []): - if text_track.get('src'): - subtitles.setdefault(text_track.get('srclang'), []).append({ - 'url': text_track['src'], - }) + if text_track.get('kind') != 'captions': + continue + text_track_url = url_or_none(text_track.get('src')) + if not text_track_url: + continue + lang = (str_or_none(text_track.get('srclang')) + or str_or_none(text_track.get('label')) or 'en').lower() + subtitles.setdefault(lang, []).append({ + 'url': text_track_url, + }) is_live = False duration = float_or_none(json_data.get('duration'), 1000) From c8b232cc48858713d9f5c88300ffcbd022d740b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 5 Jun 2020 23:35:57 +0700 Subject: [PATCH 2/5] [brightcove] Sort imports --- youtube_dl/extractor/brightcove.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 462815317..5c22a730d 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -5,34 +5,34 @@ import base64 import re import struct -from .common import InfoExtractor from .adobepass import AdobePassIE +from .common import InfoExtractor from ..compat import ( compat_etree_fromstring, + compat_HTTPError, compat_parse_qs, compat_urllib_parse_urlparse, compat_urlparse, compat_xml_parse_error, - compat_HTTPError, ) from ..utils import ( - ExtractorError, + clean_html, extract_attributes, + ExtractorError, find_xpath_attr, fix_xml_ampersands, float_or_none, - js_to_json, int_or_none, + js_to_json, + mimetype2ext, parse_iso8601, smuggle_url, str_or_none, unescapeHTML, unsmuggle_url, + UnsupportedError, update_url_query, url_or_none, - clean_html, - mimetype2ext, - UnsupportedError, ) From a0455d0ffd93b069b8ab1aa95b7fa7d0bc526302 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Jun 2020 00:12:47 +0700 Subject: [PATCH 3/5] [twitch] Pass v5 accept header and fix thumbnails extraction (closes #25531) --- youtube_dl/extractor/twitch.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 78ee0115c..45b8a7236 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -21,6 +21,7 @@ from ..utils import ( orderedSet, parse_duration, parse_iso8601, + qualities, try_get, unified_timestamp, update_url_query, @@ -50,7 +51,10 @@ class TwitchBaseIE(InfoExtractor): def _call_api(self, path, item_id, *args, **kwargs): headers = kwargs.get('headers', {}).copy() - headers['Client-ID'] = self._CLIENT_ID + headers.update({ + 'Accept': 'application/vnd.twitchtv.v5+json; charset=UTF-8', + 'Client-ID': self._CLIENT_ID, + }) kwargs['headers'] = headers response = self._download_json( '%s/%s' % (self._API_BASE, path), item_id, @@ -186,12 +190,27 @@ class TwitchItemBaseIE(TwitchBaseIE): is_live = False else: is_live = None + _QUALITIES = ('small', 'medium', 'large') + quality_key = qualities(_QUALITIES) + thumbnails = [] + preview = info.get('preview') + if isinstance(preview, dict): + for thumbnail_id, thumbnail_url in preview.items(): + thumbnail_url = url_or_none(thumbnail_url) + if not thumbnail_url: + continue + if thumbnail_id not in _QUALITIES: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'preference': quality_key(thumbnail_id), + }) return { 'id': info['_id'], 'title': info.get('title') or 'Untitled Broadcast', 'description': info.get('description'), 'duration': int_or_none(info.get('length')), - 'thumbnail': info.get('preview'), + 'thumbnails': thumbnails, 'uploader': info.get('channel', {}).get('display_name'), 'uploader_id': info.get('channel', {}).get('name'), 'timestamp': parse_iso8601(info.get('recorded_at')), From ce3735df0270ef4dfd86a527c4d0edff822dd920 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Jun 2020 00:55:29 +0700 Subject: [PATCH 4/5] [twitch:stream] Fix extraction (closes #25528) --- youtube_dl/extractor/twitch.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 45b8a7236..4cd5f0db4 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -22,6 +22,7 @@ from ..utils import ( parse_duration, parse_iso8601, qualities, + str_or_none, try_get, unified_timestamp, update_url_query, @@ -591,10 +592,18 @@ class TwitchStreamIE(TwitchBaseIE): else super(TwitchStreamIE, cls).suitable(url)) def _real_extract(self, url): - channel_id = self._match_id(url) + channel_name = self._match_id(url) + + access_token = self._call_api( + 'api/channels/%s/access_token' % channel_name, channel_name, + 'Downloading access token JSON') + + token = access_token['token'] + channel_id = compat_str(self._parse_json( + token, channel_name)['channel_id']) stream = self._call_api( - 'kraken/streams/%s?stream_type=all' % channel_id.lower(), + 'kraken/streams/%s?stream_type=all' % channel_id, channel_id, 'Downloading stream JSON').get('stream') if not stream: @@ -604,11 +613,9 @@ class TwitchStreamIE(TwitchBaseIE): # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing # an invalid m3u8 URL. Working around by use of original channel name from stream # JSON and fallback to lowercase if it's not available. - channel_id = stream.get('channel', {}).get('name') or channel_id.lower() - - access_token = self._call_api( - 'api/channels/%s/access_token' % channel_id, channel_id, - 'Downloading channel access token') + channel_name = try_get( + stream, lambda x: x['channel']['name'], + compat_str) or channel_name.lower() query = { 'allow_source': 'true', @@ -619,11 +626,11 @@ class TwitchStreamIE(TwitchBaseIE): 'playlist_include_framerate': 'true', 'segment_preference': '4', 'sig': access_token['sig'].encode('utf-8'), - 'token': access_token['token'].encode('utf-8'), + 'token': token.encode('utf-8'), } formats = self._extract_m3u8_formats( '%s/api/channel/hls/%s.m3u8?%s' - % (self._USHER_BASE, channel_id, compat_urllib_parse_urlencode(query)), + % (self._USHER_BASE, channel_name, compat_urllib_parse_urlencode(query)), channel_id, 'mp4') self._prefer_source(formats) @@ -646,8 +653,8 @@ class TwitchStreamIE(TwitchBaseIE): }) return { - 'id': compat_str(stream['_id']), - 'display_id': channel_id, + 'id': str_or_none(stream.get('_id')) or channel_id, + 'display_id': channel_name, 'title': title, 'description': description, 'thumbnails': thumbnails, From b37e47a3f980c2470882ec83dda43c8166ddb3cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Jun 2020 00:57:40 +0700 Subject: [PATCH 5/5] [twitch:stream] Expect 400 and 410 HTTP errors from API --- youtube_dl/extractor/twitch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 4cd5f0db4..e211cd4c8 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -56,7 +56,10 @@ class TwitchBaseIE(InfoExtractor): 'Accept': 'application/vnd.twitchtv.v5+json; charset=UTF-8', 'Client-ID': self._CLIENT_ID, }) - kwargs['headers'] = headers + kwargs.update({ + 'headers': headers, + 'expected_status': (400, 410), + }) response = self._download_json( '%s/%s' % (self._API_BASE, path), item_id, *args, **compat_kwargs(kwargs))