l1ving_youtube-dl/youtube_dl/extractor/sbs.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import (
    smuggle_url,
    ExtractorError,
)


class SBSBaseIE(InfoExtractor):
    def _video_id(self, url):
        raise NotImplementedError('SBS InfoExtractor classes must implement _video_id()')

    def _real_extract(self, url):
        video_id = self._video_id(url)
        player_params = self._download_json(
            'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id)

        error = player_params.get('error')
        if error:
            error_message = 'Sorry, The video you are looking for does not exist.'
            video_data = error.get('results') or {}
            error_code = error.get('errorCode')
            if error_code == 'ComingSoon':
                error_message = '%s is not yet available.' % video_data.get('title', '')
            elif error_code in ('Forbidden', 'intranetAccessOnly'):
                error_message = 'Sorry, This video cannot be accessed via this website'
            elif error_code == 'Expired':
                error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '')
            raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)

        urls = player_params['releaseUrls']
        theplatform_url = (urls.get('progressive') or urls.get('html')
                           or urls.get('standard') or player_params['relatedItemsURL'])

        return {
            '_type': 'url_transparent',
            'ie_key': 'ThePlatform',
            'id': video_id,
            'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}),
        }


class SBSIE(SBSBaseIE):
    IE_DESC = 'sbs.com.au'
    _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand|news)/video/(?:single/)?(?P<id>[0-9]+)'

    _TESTS = [{
        # Original URL is handled by the generic IE which finds the iframe:
        # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
        'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
        'md5': '3150cf278965eeabb5b4cea1c963fe0a',
        'info_dict': {
            'id': '_rFBPRPO4pMR',
            'ext': 'mp4',
            'title': 'Dingo Conservation (The Feed)',
            'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
            'thumbnail': r're:http://.*\.jpg',
            'duration': 308,
            'timestamp': 1408613220,
            'upload_date': '20140821',
            'uploader': 'SBSC',
        },
    }, {
        'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
        'only_matching': True,
    }, {
        'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9',
        'only_matching': True,
    }]

    def _video_id(self, url):
        return self._match_id(url)


class SBSNewsIE(SBSBaseIE):
    IE_DESC = 'sbs.com.au:news'
    _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/news/(?P<id>[0-9a-z-]+)'

    _TESTS = [{
        'url': 'https://www.sbs.com.au/news/rio-s-christ-the-redeemer-dons-doctor-s-coat-to-honour-coronavirus-medics',
        'only_matching': True,
    }, {
        'url': 'https://www.sbs.com.au/news/catch-up-sbs-world-news-11-april-2020',
        'only_matching': True,
    }]

    @classmethod
    def suitable(cls, url):
        return False if SBSIE.suitable(url) else super(SBSNewsIE, cls).suitable(url)

    def _video_id(self, url):
        slug = self._match_id(url)
        page_contents = self._download_webpage(url, slug)
        return self._search_regex(r'id="video-(\d+)"', page_contents, 'video id')
Unify coding cookie 2016-10-02 13:39:18 +02:00			`# coding: utf-8`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`
[sbs] improve extraction(fixes #3811) - extract error messages - force the platform smil url(previously the manifest param in the query is not respected which make theplatform return non working mp4 files for some videos) 2016-03-17 02:02:18 +01:00			`from ..utils import (`
			`smuggle_url,`
			`ExtractorError,`
			`)`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00

Extract SBSBaseIE with common _real_extract(), which follows established BaseIE pattern 2020-04-13 15:04:27 +10:00			`class SBSBaseIE(InfoExtractor):`
			`def _video_id(self, url):`
			`raise NotImplementedError('SBS InfoExtractor classes must implement _video_id()')`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00
			`def _real_extract(self, url):`
Extract SBSNewsIE from SBSIE 2019-05-12 20:42:56 +10:00			`video_id = self._video_id(url)`
[sbs] improve extraction(fixes #3811) - extract error messages - force the platform smil url(previously the manifest param in the query is not respected which make theplatform return non working mp4 files for some videos) 2016-03-17 02:02:18 +01:00			`player_params = self._download_json(`
			`'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id)`

			`error = player_params.get('error')`
			`if error:`
			`error_message = 'Sorry, The video you are looking for does not exist.'`
			`video_data = error.get('results') or {}`
			`error_code = error.get('errorCode')`
			`if error_code == 'ComingSoon':`
			`error_message = '%s is not yet available.' % video_data.get('title', '')`
			`elif error_code in ('Forbidden', 'intranetAccessOnly'):`
			`error_message = 'Sorry, This video cannot be accessed via this website'`
			`elif error_code == 'Expired':`
			`error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '')`
			`raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00
[sbs] Simplify 2015-07-18 02:43:18 +06:00			`urls = player_params['releaseUrls']`
Fix W504 and disable W503 (closes #20863) 2019-05-11 03:56:22 +07:00			`theplatform_url = (urls.get('progressive') or urls.get('html')`
			`or urls.get('standard') or player_params['relatedItemsURL'])`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00
			`return {`
			`'_type': 'url_transparent',`
[theplatform] extract timestamp and uploader 2016-04-01 18:06:11 +01:00			`'ie_key': 'ThePlatform',`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00			`'id': video_id,`
[theplatform] extract timestamp and uploader 2016-04-01 18:06:11 +01:00			`'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}),`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00			`}`
Extract SBSNewsIE from SBSIE 2019-05-12 20:42:56 +10:00
Extract SBSBaseIE with common _real_extract(), which follows established BaseIE pattern 2020-04-13 15:04:27 +10:00
			`class SBSIE(SBSBaseIE):`
			`IE_DESC = 'sbs.com.au'`
			`_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand\|news)/video/(?:single/)?(?P<id>[0-9]+)'`

			`_TESTS = [{`
			`# Original URL is handled by the generic IE which finds the iframe:`
			`# http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation`
			`'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',`
			`'md5': '3150cf278965eeabb5b4cea1c963fe0a',`
			`'info_dict': {`
			`'id': '_rFBPRPO4pMR',`
			`'ext': 'mp4',`
			`'title': 'Dingo Conservation (The Feed)',`
			`'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',`
			`'thumbnail': r're:http://.*\.jpg',`
			`'duration': 308,`
			`'timestamp': 1408613220,`
			`'upload_date': '20140821',`
			`'uploader': 'SBSC',`
			`},`
			`}, {`
			`'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',`
			`'only_matching': True,`
			`}, {`
			`'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9',`
			`'only_matching': True,`
			`}]`

Extract SBSNewsIE from SBSIE 2019-05-12 20:42:56 +10:00			`def _video_id(self, url):`
			`return self._match_id(url)`


Extract SBSBaseIE with common _real_extract(), which follows established BaseIE pattern 2020-04-13 15:04:27 +10:00			`class SBSNewsIE(SBSBaseIE):`
			`IE_DESC = 'sbs.com.au:news'`
_VALID_URL regular expressions are not anchored 2020-04-13 13:23:08 +10:00			`_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/news/(?P<id>[0-9a-z-]+)'`
Extract SBSNewsIE from SBSIE 2019-05-12 20:42:56 +10:00
			`_TESTS = [{`
Extract SBSBaseIE with common _real_extract(), which follows established BaseIE pattern 2020-04-13 15:04:27 +10:00			`'url': 'https://www.sbs.com.au/news/rio-s-christ-the-redeemer-dons-doctor-s-coat-to-honour-coronavirus-medics',`
Extract SBSNewsIE from SBSIE 2019-05-12 20:42:56 +10:00			`'only_matching': True,`
			`}, {`
Extract SBSBaseIE with common _real_extract(), which follows established BaseIE pattern 2020-04-13 15:04:27 +10:00			`'url': 'https://www.sbs.com.au/news/catch-up-sbs-world-news-11-april-2020',`
Extract SBSNewsIE from SBSIE 2019-05-12 20:42:56 +10:00			`'only_matching': True,`
			`}]`

Use suitable() to determine which InfoExtractor class to use 2020-04-13 13:26:23 +10:00			`@classmethod`
			`def suitable(cls, url):`
			`return False if SBSIE.suitable(url) else super(SBSNewsIE, cls).suitable(url)`

Extract SBSNewsIE from SBSIE 2019-05-12 20:42:56 +10:00			`def _video_id(self, url):`
			`slug = self._match_id(url)`
			`page_contents = self._download_webpage(url, slug)`
			`return self._search_regex(r'id="video-(\d+)"', page_contents, 'video id')`