l1ving_youtube-dl/youtube_dl/extractor/redbulltv.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import (
    RegexNotFoundError,
    float_or_none,
)
import json
import time


class RedBullTVIE(InfoExtractor):
    _VALID_URL = r"""(?x)^
                     https?://
                     (?:www\.)?redbull\.com/
                     [^/]+/                                                   # locale/language code
                     (?:videos|recap-videos|events|episodes|films)/
                     (?P<id>AP-\w{13})(?:/live/AP-\w{13})?
                     (?:\?playlist)?(?:\?playlistId=rrn:content:collections:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[\w-]+)?
                     $"""
    _TESTS = [{
        # videos
        'url': 'https://www.redbull.com/int-en/videos/AP-1YM911N612111',
        'md5': 'e2d92baecce184ecd521fa3d72f36aa8',
        'info_dict': {
            'id': 'AP-1YM911N612111',
            'ext': 'mp4',
            'title': 'md5:fa027630eb511593fe91e4323762e95d',
            'description': 'md5:7f769874c63e45f9b6f43315a99094c7',
            'duration': 255.0,
            'release_date': '20190809',
        },
    }, {
        # recap-videos
        'url': 'https://www.redbull.com/int-en/recap-videos/AP-1YM8YXTC52111?playlistId=rrn:content:collections:e916768e-7b47-413d-a254-bc97d7f808f7:en-INT',
        'md5': 'aa7c6ab92ea6103f61d5fc5cbb85fd53',
        'info_dict': {
            'id': 'AP-1YM8YXTC52111',
            'ext': 'mp4',
            'title': 'md5:dc9aec63e687a534a6bb13adbb86571c',
            'description': 'md5:3774af48bf6fbc5fb6c8ebad6891f728',
            'duration': 1560.0,
            'release_date': '20190808',
        },
    }, {
        # events
        'url': 'https://www.redbull.com/int-en/recap-videos/AP-1ZYQN7WNW2111',
        'md5': '0f2043deef92405249c8ca96ba197901',
        'info_dict': {
            'id': 'AP-1ZYQN7WNW2111',
            'ext': 'mp4',
            'title': 'md5:c2a490a9db25823c2c9790093e3563ab',
            'description': 'md5:fb7e7a8cfaa72f7dc139238186d69800',
            'duration': 933.0,
            'release_date': '20190727',
        },
    }, {
        # episodes
        'url': 'https://www.redbull.com/int-en/episodes/AP-1PMHKJFCW1W11',
        'md5': 'db8271a7200d40053a1809ed0dd574ff',
        'info_dict': {
            'id': 'AP-1PMHKJFCW1W11',
            'ext': 'mp4',
            'title': 'md5:f767c9809c12c3411632cb7de9d30608',
            'description': 'md5:b5f522b89b72e1e23216e5018810bb25',
            'duration': 904.0,
            'release_date': '20170221',
        },
    }, {
        # films
        'url': 'https://www.redbull.com/int-en/films/AP-1ZSMAW8FH2111',
        'md5': '3a753f7c3c1f9966ae660e05c3c7862b',
        'info_dict': {
            'id': 'AP-1ZSMAW8FH2111',
            'ext': 'mp4',
            'title': 'md5:47478de1e62dadcda748c2b58ae7e343',
            'description': 'md5:9a885f6f5344b98c684f8aaf6bdfbc38',
            'duration': 4837.0,
            'release_date': '20190801',
        },
    }]

    def _real_extract(self, url):
        # video_id is 'AP-...' ID
        video_id = self._match_id(url)

        # Try downloading the webpage multiple times in order to get a repsonse
        # cache which will contain the result of a query to 
        # 'https://www.redbull.com/v3/api/composition/v3/query/en-INT?rb3Schema=v1:pageConfig&filter[uriSlug]=%s' % video_id
        # We use the response cache to get the rrn ID and other metadata. We do
        # this instead of simply querying the API in order to preserve the
        # provided URL's locale. (Annoyingly, the locale in the input URL 
        # ('en-us', for example) is of a different format than the locale
        # required for the API request.)
        tries = 3
        for i in range(tries):
            try:
                if i == 0:
                    webpage = self._download_webpage(url, video_id)
                else:
                    webpage = self._download_webpage(url, video_id,
                        note='Redownloading webpage')
                # extract response cache
                response_cache = json.loads(self._html_search_regex(
                    r'<script type="application/json" id="response-cache">(.+?)</script>',
                    webpage, 'response-cache'))
            except RegexNotFoundError:
                if i < tries - 1:
                    self.to_screen('Waiting before redownloading webpage')
                    time.sleep(2)
                    continue
                else:
                    self.to_screen('Failed to download/locate response cache. Wait a few seconds and try running the command again.')
                    raise
            break

        # select the key that includes the string 'pageConfig'
        metadata = json.loads(
                response_cache[
                    [key for key in response_cache.keys() if 'pageConfig' in key][0]
                ]['response']
            )['data']

        # extract rrn ID
        rrn_id_ext = metadata['analytics']['asset']['trackingDimensions']['masterID']
        # trim locale from the end of rrn_id_ext
        rrn_id = ':'.join(rrn_id_ext.split(':')[:-1])

        # get access token for download
        session = self._download_json(
            'https://api.redbull.tv/v3/session', video_id,
            note='Downloading access token', query={
                'category': 'personal_computer',
                'os_family': 'http',
            })
        if session.get('code') == 'error':
            raise ExtractorError('%s said: %s' % (
                self.IE_NAME, session['message']))
        token = session['token']

        # extract formats from m3u8
        # subtitle tracks are also listed in this m3u8, but yt-dl does not
        # currently implement an easy way to download m3u8 VTT subtitles
        formats = self._extract_m3u8_formats(
            'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (rrn_id, token),
            video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
        self._sort_formats(formats)

        # download more metadata
        metadata2 = self._download_json(
            'https://api.redbull.tv/v3/products/%s' % rrn_id,
            video_id, note='Downloading video information',
            headers={'Authorization': token}
        )

        # extract metadata
        title = metadata2.get('title').strip() or \
            metadata.get('analytics', {}).get('asset', {}).get(['title'])

        subheading = metadata2.get('subheading')
        if subheading:
            title += ' - %s' % subheading

        long_description = metadata2.get('long_description')
        short_description = metadata2.get('short_description') or \
            metadata['pageMeta']['og:description']

        duration = float_or_none(metadata2.get('duration'), scale=1000)

        release_dates = [metadata.get('analytics', {}).get('asset', {}) \
            .get('publishDate')]
        release_dates.append(metadata.get('analytics', {}).get('asset', {}) \
            .get('trackingDimensions', {}).get('publishingDate'))

        if release_dates[0]:
            release_date = release_dates[0][:10].replace('-', '')
        elif release_dates[1]:
            release_date = ''.join(release_dates[1].split('-')[::-1])
        else:
            release_date = None

        return {
            'id': video_id,
            'title': title,
            'description': long_description or short_description,
            'duration': duration,
            'release_date': release_date,
            'formats': formats,
        }
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`
[redbulltv] Pull (most) metadata from products API instead of JSON cache Adds support for duration, release_data metadata. Re-implements concatenating subheading onto title if present. 2019-08-10 12:41:05 -07:00			`from ..utils import (`
			`RegexNotFoundError,`
			`float_or_none,`
			`)`
[redbulltv] Use rrn ID for all video downloads All tests from 8682f76 work. 2019-08-09 21:38:14 -07:00			`import json`
			`import time`
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00

			`class RedBullTVIE(InfoExtractor):`
[redbulltv] Rewrite _VALID_URL regex for readability 2019-08-10 14:05:48 -07:00			`_VALID_URL = r"""(?x)^`
			`https?://`
			`(?:www\.)?redbull\.com/`
			`[^/]+/ # locale/language code`
			`(?:videos\|recap-videos\|events\|episodes\|films)/`
			`(?P<id>AP-\w{13})(?:/live/AP-\w{13})?`
			`(?:\?playlist)?(?:\?playlistId=rrn:content:collections:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[\w-]+)?`
			`$"""`
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00			`_TESTS = [{`
[redbulltv] Fix/add tests 2019-08-09 23:49:19 -07:00			`# videos`
			`'url': 'https://www.redbull.com/int-en/videos/AP-1YM911N612111',`
			`'md5': 'e2d92baecce184ecd521fa3d72f36aa8',`
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00			`'info_dict': {`
[redbulltv] Fix/add tests 2019-08-09 23:49:19 -07:00			`'id': 'AP-1YM911N612111',`
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00			`'ext': 'mp4',`
[redbulltv] Pull (most) metadata from products API instead of JSON cache Adds support for duration, release_data metadata. Re-implements concatenating subheading onto title if present. 2019-08-10 12:41:05 -07:00			`'title': 'md5:fa027630eb511593fe91e4323762e95d',`
			`'description': 'md5:7f769874c63e45f9b6f43315a99094c7',`
			`'duration': 255.0,`
			`'release_date': '20190809',`
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00			`},`
			`}, {`
[redbulltv] Fix/add tests 2019-08-09 23:49:19 -07:00			`# recap-videos`
			`'url': 'https://www.redbull.com/int-en/recap-videos/AP-1YM8YXTC52111?playlistId=rrn:content:collections:e916768e-7b47-413d-a254-bc97d7f808f7:en-INT',`
			`'md5': 'aa7c6ab92ea6103f61d5fc5cbb85fd53',`
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00			`'info_dict': {`
[redbulltv] Fix/add tests 2019-08-09 23:49:19 -07:00			`'id': 'AP-1YM8YXTC52111',`
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00			`'ext': 'mp4',`
[redbulltv] Pull (most) metadata from products API instead of JSON cache Adds support for duration, release_data metadata. Re-implements concatenating subheading onto title if present. 2019-08-10 12:41:05 -07:00			`'title': 'md5:dc9aec63e687a534a6bb13adbb86571c',`
			`'description': 'md5:3774af48bf6fbc5fb6c8ebad6891f728',`
			`'duration': 1560.0,`
			`'release_date': '20190808',`
[redbulltv] Add support for lives and segments (closes #13486)) 2017-06-25 01:09:12 +07:00			`},`
[redbulltv] add support redbull.com tv URLs(closes #17218) 2018-08-12 05:31:18 +01:00			`}, {`
[redbulltv] Fix/add tests 2019-08-09 23:49:19 -07:00			`# events`
			`'url': 'https://www.redbull.com/int-en/recap-videos/AP-1ZYQN7WNW2111',`
			`'md5': '0f2043deef92405249c8ca96ba197901',`
			`'info_dict': {`
			`'id': 'AP-1ZYQN7WNW2111',`
			`'ext': 'mp4',`
[redbulltv] Pull (most) metadata from products API instead of JSON cache Adds support for duration, release_data metadata. Re-implements concatenating subheading onto title if present. 2019-08-10 12:41:05 -07:00			`'title': 'md5:c2a490a9db25823c2c9790093e3563ab',`
			`'description': 'md5:fb7e7a8cfaa72f7dc139238186d69800',`
			`'duration': 933.0,`
			`'release_date': '20190727',`
[redbulltv] Fix/add tests 2019-08-09 23:49:19 -07:00			`},`
[redbulltv] Extend _VALID_URL (closes #20922) 2019-05-01 21:36:19 +07:00			`}, {`
[redbulltv] Fix/add tests 2019-08-09 23:49:19 -07:00			`# episodes`
			`'url': 'https://www.redbull.com/int-en/episodes/AP-1PMHKJFCW1W11',`
			`'md5': 'db8271a7200d40053a1809ed0dd574ff',`
			`'info_dict': {`
			`'id': 'AP-1PMHKJFCW1W11',`
			`'ext': 'mp4',`
[redbulltv] Pull (most) metadata from products API instead of JSON cache Adds support for duration, release_data metadata. Re-implements concatenating subheading onto title if present. 2019-08-10 12:41:05 -07:00			`'title': 'md5:f767c9809c12c3411632cb7de9d30608',`
			`'description': 'md5:b5f522b89b72e1e23216e5018810bb25',`
			`'duration': 904.0,`
			`'release_date': '20170221',`
[redbulltv] Fix/add tests 2019-08-09 23:49:19 -07:00			`},`
[redbulltv] Extend _VALID_URL (closes #20922) 2019-05-01 21:36:19 +07:00			`}, {`
[redbulltv] Fix/add tests 2019-08-09 23:49:19 -07:00			`# films`
			`'url': 'https://www.redbull.com/int-en/films/AP-1ZSMAW8FH2111',`
			`'md5': '3a753f7c3c1f9966ae660e05c3c7862b',`
			`'info_dict': {`
			`'id': 'AP-1ZSMAW8FH2111',`
			`'ext': 'mp4',`
[redbulltv] Pull (most) metadata from products API instead of JSON cache Adds support for duration, release_data metadata. Re-implements concatenating subheading onto title if present. 2019-08-10 12:41:05 -07:00			`'title': 'md5:47478de1e62dadcda748c2b58ae7e343',`
			`'description': 'md5:9a885f6f5344b98c684f8aaf6bdfbc38',`
			`'duration': 4837.0,`
			`'release_date': '20190801',`
[redbulltv] Fix/add tests 2019-08-09 23:49:19 -07:00			`},`
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00			`}]`

			`def _real_extract(self, url):`
[redbulltv] Pull (most) metadata from products API instead of JSON cache Adds support for duration, release_data metadata. Re-implements concatenating subheading onto title if present. 2019-08-10 12:41:05 -07:00			`# video_id is 'AP-...' ID`
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00			`video_id = self._match_id(url)`

[redbulltv] Use rrn ID for all video downloads All tests from 8682f76 work. 2019-08-09 21:38:14 -07:00			`# Try downloading the webpage multiple times in order to get a repsonse`
			`# cache which will contain the result of a query to`
			`# 'https://www.redbull.com/v3/api/composition/v3/query/en-INT?rb3Schema=v1:pageConfig&filter[uriSlug]=%s' % video_id`
			`# We use the response cache to get the rrn ID and other metadata. We do`
			`# this instead of simply querying the API in order to preserve the`
			`# provided URL's locale. (Annoyingly, the locale in the input URL`
			`# ('en-us', for example) is of a different format than the locale`
			`# required for the API request.)`
			`tries = 3`
			`for i in range(tries):`
			`try:`
			`if i == 0:`
			`webpage = self._download_webpage(url, video_id)`
			`else:`
[redbulltv] Adhere to soft 80 char limit 2019-08-10 14:41:28 -07:00			`webpage = self._download_webpage(url, video_id,`
			`note='Redownloading webpage')`
[redbulltv] Use rrn ID for all video downloads All tests from 8682f76 work. 2019-08-09 21:38:14 -07:00			`# extract response cache`
[redbulltv] Adhere to soft 80 char limit 2019-08-10 14:41:28 -07:00			`response_cache = json.loads(self._html_search_regex(`
			`r'<script type="application/json" id="response-cache">(.+?)</script>',`
			`webpage, 'response-cache'))`
[redbulltv] Use rrn ID for all video downloads All tests from 8682f76 work. 2019-08-09 21:38:14 -07:00			`except RegexNotFoundError:`
			`if i < tries - 1:`
			`self.to_screen('Waiting before redownloading webpage')`
			`time.sleep(2)`
[redbulltv] Fix cache retry system printing error multiple times 2019-08-10 13:14:39 -07:00			`continue`
[redbulltv] Use rrn ID for all video downloads All tests from 8682f76 work. 2019-08-09 21:38:14 -07:00			`else:`
[redbulltv] Add user message on failing to dl/locate response cache 2019-08-10 12:42:02 -07:00			`self.to_screen('Failed to download/locate response cache. Wait a few seconds and try running the command again.')`
[redbulltv] Use rrn ID for all video downloads All tests from 8682f76 work. 2019-08-09 21:38:14 -07:00			`raise`
[redbulltv] Fix cache retry system printing error multiple times 2019-08-10 13:14:39 -07:00			`break`
[redbulltv] Use rrn ID for all video downloads All tests from 8682f76 work. 2019-08-09 21:38:14 -07:00
			`# select the key that includes the string 'pageConfig'`
			`metadata = json.loads(`
			`response_cache[`
			`[key for key in response_cache.keys() if 'pageConfig' in key][0]`
			`]['response']`
			`)['data']`

			`# extract rrn ID`
			`rrn_id_ext = metadata['analytics']['asset']['trackingDimensions']['masterID']`
			`# trim locale from the end of rrn_id_ext`
			`rrn_id = ':'.join(rrn_id_ext.split(':')[:-1])`

			`# get access token for download`
[redbull] improve extraction - extract 1080p quality - correct ttml subtitle ext - catch api errors - reduce request size 2017-03-15 01:40:54 +01:00			`session = self._download_json(`
[redbulltv] fix extraction(closes #15481) 2018-02-03 15:42:57 +01:00			`'https://api.redbull.tv/v3/session', video_id,`
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00			`note='Downloading access token', query={`
[redbull] improve extraction - extract 1080p quality - correct ttml subtitle ext - catch api errors - reduce request size 2017-03-15 01:40:54 +01:00			`'category': 'personal_computer',`
			`'os_family': 'http',`
			`})`
			`if session.get('code') == 'error':`
			`raise ExtractorError('%s said: %s' % (`
			`self.IE_NAME, session['message']))`
[redbulltv] fix extraction(closes #15481) 2018-02-03 15:42:57 +01:00			`token = session['token']`
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00
[redbulltv] Pull (most) metadata from products API instead of JSON cache Adds support for duration, release_data metadata. Re-implements concatenating subheading onto title if present. 2019-08-10 12:41:05 -07:00			`# extract formats from m3u8`
			`# subtitle tracks are also listed in this m3u8, but yt-dl does not`
			`# currently implement an easy way to download m3u8 VTT subtitles`
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00			`formats = self._extract_m3u8_formats(`
[redbulltv] Use rrn ID for all video downloads All tests from 8682f76 work. 2019-08-09 21:38:14 -07:00			`'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (rrn_id, token),`
[redbulltv] fix extraction(closes #15481) 2018-02-03 15:42:57 +01:00			`video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')`
[redbull] improve extraction - extract 1080p quality - correct ttml subtitle ext - catch api errors - reduce request size 2017-03-15 01:40:54 +01:00			`self._sort_formats(formats)`
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00
[redbulltv] Pull (most) metadata from products API instead of JSON cache Adds support for duration, release_data metadata. Re-implements concatenating subheading onto title if present. 2019-08-10 12:41:05 -07:00			`# download more metadata`
			`metadata2 = self._download_json(`
			`'https://api.redbull.tv/v3/products/%s' % rrn_id,`
			`video_id, note='Downloading video information',`
			`headers={'Authorization': token}`
			`)`

			`# extract metadata`
[redbulltv] Add redundancy for title, short_desc, and release_date 2019-08-10 13:44:14 -07:00			`title = metadata2.get('title').strip() or \`
			`metadata.get('analytics', {}).get('asset', {}).get(['title'])`

[redbulltv] Pull (most) metadata from products API instead of JSON cache Adds support for duration, release_data metadata. Re-implements concatenating subheading onto title if present. 2019-08-10 12:41:05 -07:00			`subheading = metadata2.get('subheading')`
			`if subheading:`
			`title += ' - %s' % subheading`
[redbulltv] Change [key] to .get(key) to comply with coding conventions (For optional keys only) 2019-08-10 12:56:03 -07:00
[redbulltv] Pull (most) metadata from products API instead of JSON cache Adds support for duration, release_data metadata. Re-implements concatenating subheading onto title if present. 2019-08-10 12:41:05 -07:00			`long_description = metadata2.get('long_description')`
[redbulltv] Add redundancy for title, short_desc, and release_date 2019-08-10 13:44:14 -07:00			`short_description = metadata2.get('short_description') or \`
			`metadata['pageMeta']['og:description']`
[redbulltv] Change [key] to .get(key) to comply with coding conventions (For optional keys only) 2019-08-10 12:56:03 -07:00
[redbulltv] Pull (most) metadata from products API instead of JSON cache Adds support for duration, release_data metadata. Re-implements concatenating subheading onto title if present. 2019-08-10 12:41:05 -07:00			`duration = float_or_none(metadata2.get('duration'), scale=1000)`
[redbulltv] Change [key] to .get(key) to comply with coding conventions (For optional keys only) 2019-08-10 12:56:03 -07:00
[redbulltv] Add redundancy for title, short_desc, and release_date 2019-08-10 13:44:14 -07:00			`release_dates = [metadata.get('analytics', {}).get('asset', {}) \`
			`.get('publishDate')]`
			`release_dates.append(metadata.get('analytics', {}).get('asset', {}) \`
			`.get('trackingDimensions', {}).get('publishingDate'))`

			`if release_dates[0]:`
			`release_date = release_dates[0][:10].replace('-', '')`
			`elif release_dates[1]:`
			`release_date = ''.join(release_dates[1].split('-')[::-1])`
			`else:`
			`release_date = None`
[redbulltv] Pull (most) metadata from products API instead of JSON cache Adds support for duration, release_data metadata. Re-implements concatenating subheading onto title if present. 2019-08-10 12:41:05 -07:00
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00			`return {`
			`'id': video_id,`
			`'title': title,`
[redbulltv] Use rrn ID for all video downloads All tests from 8682f76 work. 2019-08-09 21:38:14 -07:00			`'description': long_description or short_description,`
[redbulltv] Pull (most) metadata from products API instead of JSON cache Adds support for duration, release_data metadata. Re-implements concatenating subheading onto title if present. 2019-08-10 12:41:05 -07:00			`'duration': duration,`
			`'release_date': release_date,`
[redbulltv] Improve extraction (closes #11948, closes #3919) 2017-03-04 23:25:09 +07:00			`'formats': formats,`
			`}`