diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py index 2179133ca..aa4013952 100644 --- a/youtube_dl/extractor/redbulltv.py +++ b/youtube_dl/extractor/redbulltv.py @@ -2,11 +2,9 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - float_or_none, - ExtractorError, -) +from ..utils import RegexNotFoundError +import json +import time class RedBullTVIE(InfoExtractor): @@ -47,8 +45,52 @@ class RedBullTVIE(InfoExtractor): }] def _real_extract(self, url): + # video_id is "AP-..." ID video_id = self._match_id(url) + # Try downloading the webpage multiple times in order to get a repsonse + # cache which will contain the result of a query to + # 'https://www.redbull.com/v3/api/composition/v3/query/en-INT?rb3Schema=v1:pageConfig&filter[uriSlug]=%s' % video_id + # We use the response cache to get the rrn ID and other metadata. We do + # this instead of simply querying the API in order to preserve the + # provided URL's locale. (Annoyingly, the locale in the input URL + # ('en-us', for example) is of a different format than the locale + # required for the API request.) + tries = 3 + for i in range(tries): + try: + if i == 0: + webpage = self._download_webpage(url, video_id) + else: + webpage = self._download_webpage(url, video_id, note='Redownloading webpage') + # extract response cache + response_cache = json.loads(self._html_search_regex(r'', webpage, 'response-cache')) + break + except RegexNotFoundError: + if i < tries - 1: + self.to_screen('Waiting before redownloading webpage') + time.sleep(2) + else: + raise + + # select the key that includes the string 'pageConfig' + metadata = json.loads( + response_cache[ + [key for key in response_cache.keys() if 'pageConfig' in key][0] + ]['response'] + )['data'] + + # extract rrn ID + rrn_id_ext = metadata['analytics']['asset']['trackingDimensions']['masterID'] + # trim locale from the end of rrn_id_ext + rrn_id = ':'.join(rrn_id_ext.split(':')[:-1]) + + # extract metadata + title = metadata['analytics']['asset']['title'] + short_description = metadata['pageMeta']['og:title'] + long_description = metadata['pageMeta']['og:description'] + + # get access token for download session = self._download_json( 'https://api.redbull.tv/v3/session', video_id, note='Downloading access token', query={ @@ -60,55 +102,16 @@ class RedBullTVIE(InfoExtractor): self.IE_NAME, session['message'])) token = session['token'] - try: - video = self._download_json( - 'https://api.redbull.tv/v3/products/' + video_id, - video_id, note='Downloading video information', - headers={'Authorization': token} - ) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: - error_message = self._parse_json( - e.cause.read().decode(), video_id)['error'] - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, error_message), expected=True) - raise - - title = video['title'].strip() - - # use an 'rrn:...' ID instead of an 'AP-...' ID if necessary - content_path = video.get('status', {}).get('play') - if content_path: - # trim '/content/' from '/content/rrn:...' - video_id = content_path[9:] - formats = self._extract_m3u8_formats( - 'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token), + 'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (rrn_id, token), video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') self._sort_formats(formats) - subtitles = {} - for resource in video.get('resources', []): - if resource.startswith('closed_caption_'): - splitted_resource = resource.split('_') - if splitted_resource[2]: - subtitles.setdefault('en', []).append({ - 'url': 'https://resources.redbull.tv/%s/%s' % (video_id, resource), - 'ext': splitted_resource[2], - }) - - subheading = video.get('subheading') - if subheading: - title += ' - %s' % subheading - return { 'id': video_id, 'title': title, - 'description': video.get('long_description') or video.get( - 'short_description'), - 'duration': float_or_none(video.get('duration'), scale=1000), + 'description': long_description or short_description, 'formats': formats, - 'subtitles': subtitles, }