l1ving_youtube-dl/youtube_dl/extractor/sbs.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    smuggle_url,
    ExtractorError,
)


class SBSIE(InfoExtractor):
    IE_DESC = 'sbs.com.au'
    _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand|news)/(?:video/)?(?:single/)?([0-9]+|[0-9a-z-]+)'

    _TESTS = [{
        'url': 'https://www.sbs.com.au/news/are-the-campaigns-working-voters-speak-out',
        'md5': '2b73ddcbb597f24a87167826c47398f8',
        'info_dict': {
            'id': 'Vznr2YGb83mF',
            'ext': 'mp4',
            'title': 'Are the campaigns cutting through?',
            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
            'thumbnail': r're:http://.*\.jpg',
            'duration': 146,
            'timestamp': 1557552900,
            'upload_date': '20190511',
            'uploader': 'SBSC',
        }
    }, {
        # Original URL is handled by the generic IE which finds the iframe:
        # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
        'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
        'md5': '3150cf278965eeabb5b4cea1c963fe0a',
        'info_dict': {
            'id': '_rFBPRPO4pMR',
            'ext': 'mp4',
            'title': 'Dingo Conservation (The Feed)',
            'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
            'thumbnail': r're:http://.*\.jpg',
            'duration': 308,
            'timestamp': 1408613220,
            'upload_date': '20140821',
            'uploader': 'SBSC',
        },
    }, {
        'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
        'only_matching': True,
    }, {
        'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9',
        'only_matching': True,
    }]

    def video_id_from_page_contents(self, url):
        page_contents = self._download_webpage(url, None)
        video_id = self._search_regex(r'id="video-(\d+)"', page_contents, 'video id')
        return video_id

    def video_id(self, url):
        ID_BEARING_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand|news)/video/(?:single/)?(?P<id>[0-9]+)'
        match = re.match(ID_BEARING_URL, url)
        if match:
            return match.group('id')
        else:
            return self.video_id_from_page_contents(url)

    def _real_extract(self, url):
        video_id = self.video_id(url)
        player_params = self._download_json(
            'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id)

        error = player_params.get('error')
        if error:
            error_message = 'Sorry, The video you are looking for does not exist.'
            video_data = error.get('results') or {}
            error_code = error.get('errorCode')
            if error_code == 'ComingSoon':
                error_message = '%s is not yet available.' % video_data.get('title', '')
            elif error_code in ('Forbidden', 'intranetAccessOnly'):
                error_message = 'Sorry, This video cannot be accessed via this website'
            elif error_code == 'Expired':
                error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '')
            raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)

        urls = player_params['releaseUrls']
        theplatform_url = (urls.get('progressive') or urls.get('html')
                           or urls.get('standard') or player_params['relatedItemsURL'])

        return {
            '_type': 'url_transparent',
            'ie_key': 'ThePlatform',
            'id': video_id,
            'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}),
        }
Unify coding cookie 2016-10-02 13:39:18 +02:00			`# coding: utf-8`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00			`from __future__ import unicode_literals`

Handle SBS News URLs without IDs 2019-05-12 13:21:42 +10:00			`import re`

[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00			`from .common import InfoExtractor`
[sbs] improve extraction(fixes #3811) - extract error messages - force the platform smil url(previously the manifest param in the query is not respected which make theplatform return non working mp4 files for some videos) 2016-03-17 02:02:18 +01:00			`from ..utils import (`
			`smuggle_url,`
			`ExtractorError,`
			`)`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00

			`class SBSIE(InfoExtractor):`
			`IE_DESC = 'sbs.com.au'`
Handle SBS News URLs without IDs 2019-05-12 13:21:42 +10:00			`_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand\|news)/(?:video/)?(?:single/)?([0-9]+\|[0-9a-z-]+)'`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00
			`_TESTS = [{`
Handle SBS News URLs without IDs 2019-05-12 13:21:42 +10:00			`'url': 'https://www.sbs.com.au/news/are-the-campaigns-working-voters-speak-out',`
			`'md5': '2b73ddcbb597f24a87167826c47398f8',`
			`'info_dict': {`
			`'id': 'Vznr2YGb83mF',`
			`'ext': 'mp4',`
			`'title': 'Are the campaigns cutting through?',`
			`'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',`
			`'thumbnail': r're:http://.*\.jpg',`
			`'duration': 146,`
			`'timestamp': 1557552900,`
			`'upload_date': '20190511',`
			`'uploader': 'SBSC',`
			`}`
			`}, {`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00			`# Original URL is handled by the generic IE which finds the iframe:`
			`# http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation`
			`'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',`
			`'md5': '3150cf278965eeabb5b4cea1c963fe0a',`
			`'info_dict': {`
Handle SBS News URLs without IDs 2019-05-12 13:21:42 +10:00			`'id': '_rFBPRPO4pMR',`
[theplatform] Correctly extract videos that don't use f4m or rtmp (reported in #3176) 2014-09-21 16:08:38 +02:00			`'ext': 'mp4',`
[sbs] Simplify 2015-07-18 02:43:18 +06:00			`'title': 'Dingo Conservation (The Feed)',`
			`'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',`
Fix "invalid escape sequences" error on Python 3.6 2017-01-02 20:08:07 +08:00			`'thumbnail': r're:http://.*\.jpg',`
[sbs] Simplify 2015-07-18 02:43:18 +06:00			`'duration': 308,`
[theplatform] extract timestamp and uploader 2016-04-01 18:06:11 +01:00			`'timestamp': 1408613220,`
			`'upload_date': '20140821',`
			`'uploader': 'SBSC',`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00			`},`
PEP8: applied even more rules 2014-11-23 21:39:15 +01:00			`}, {`
[sbs] Recognize urls with format 'http://www.sbs.com.au/ondemand/video/<id>' (#3811) 2014-09-22 14:11:08 +02:00			`'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',`
			`'only_matching': True,`
[sbs] Simplify 2015-07-18 02:43:18 +06:00			`}, {`
			`'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9',`
			`'only_matching': True,`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00			`}]`

Handle SBS News URLs without IDs 2019-05-12 13:21:42 +10:00			`def video_id_from_page_contents(self, url):`
			`page_contents = self._download_webpage(url, None)`
			`video_id = self._search_regex(r'id="video-(\d+)"', page_contents, 'video id')`
			`return video_id`

			`def video_id(self, url):`
			`ID_BEARING_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand\|news)/video/(?:single/)?(?P<id>[0-9]+)'`
			`match = re.match(ID_BEARING_URL, url)`
			`if match:`
			`return match.group('id')`
			`else:`
			`return self.video_id_from_page_contents(url)`

[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00			`def _real_extract(self, url):`
Handle SBS News URLs without IDs 2019-05-12 13:21:42 +10:00			`video_id = self.video_id(url)`
[sbs] improve extraction(fixes #3811) - extract error messages - force the platform smil url(previously the manifest param in the query is not respected which make theplatform return non working mp4 files for some videos) 2016-03-17 02:02:18 +01:00			`player_params = self._download_json(`
			`'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id)`

			`error = player_params.get('error')`
			`if error:`
			`error_message = 'Sorry, The video you are looking for does not exist.'`
			`video_data = error.get('results') or {}`
			`error_code = error.get('errorCode')`
			`if error_code == 'ComingSoon':`
			`error_message = '%s is not yet available.' % video_data.get('title', '')`
			`elif error_code in ('Forbidden', 'intranetAccessOnly'):`
			`error_message = 'Sorry, This video cannot be accessed via this website'`
			`elif error_code == 'Expired':`
			`error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '')`
			`raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00
[sbs] Simplify 2015-07-18 02:43:18 +06:00			`urls = player_params['releaseUrls']`
Fix W504 and disable W503 (closes #20863) 2019-05-11 03:56:22 +07:00			`theplatform_url = (urls.get('progressive') or urls.get('html')`
			`or urls.get('standard') or player_params['relatedItemsURL'])`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00
			`return {`
			`'_type': 'url_transparent',`
[theplatform] extract timestamp and uploader 2016-04-01 18:06:11 +01:00			`'ie_key': 'ThePlatform',`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00			`'id': video_id,`
[theplatform] extract timestamp and uploader 2016-04-01 18:06:11 +01:00			`'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}),`
[sbs] Add new extractor (Fixes #3566) 2014-08-23 15:20:49 +02:00			`}`