l1ving_youtube-dl/youtube_dl/extractor/fusion.py

from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import ExtractorError, int_or_none


class FusionIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/video/(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
        'info_dict': {
            'id': '0eaph8eeMwQ',
            'ext': 'mp4',
            'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',
            'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',
            'uploader': 'FUSION',
            'uploader_id': 'thisisfusion',
            'upload_date': '20150918'
        },
        'params': {
            'skip_download': True,
        },
        'add_ie': ['Youtube'],
    }, {
        'url': 'http://fusion.tv/video/201781',
        'only_matching': True,
    }, {
        'url': 'https://fusion.tv/video/584520/dreaming-of-the-whitest-christmas/',
        'info_dict': {
            'id': '584520',
            'ext': 'm3u8',
            'title': 'Dreaming of the Whitest Christmas',
            'description': 'md5:350a32da86dc05a2179c9694d9d61feb',
            'release_date': '20171211',
            'thumbnail': r're:http.*.jpg[?]?',
        },
        'params': {
            'skip_download': True,
        }
    }]

    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)

        fusionData = self._parse_json(
            self._search_regex(
                r'(?si)fusionData\s*=\s*({.*?});', webpage,
                'fusionData'),
            display_id)

        data = fusionData.get('single')

        info = {
            'id': display_id,
            'title': data.get('title'),
            'display_id': data.get('slug'),
            'description': data.get('excerpt'),
        }

        published = data.get('published')
        if published and 'T' in published:
            info['release_date'] = published.split('T')[0].replace('-', '')

        if 'images' in data:
            info['thumbnails'] = [{'id': image, 'url': url} for image, url in data.get('images').items()]

        srcs = data.get('src')

        if not srcs:
            youtube_id = data.get('video_ids').get('youtube')
            if not youtube_id:
                raise ExtractorError('Could not find alternate youtube url')

            info['_type'] = 'url'
            info['url'] = youtube_id
            info['ie_key'] = 'Youtube'
            return info

        formats = []
        for format in srcs.keys():
            if format not in ['m3u8-hp-v3', 'm3u8-variant', 'mp4']:
                continue

            for vid in srcs.get(format).values():
                formats.append(
                    {
                        'url': vid.get('url'),
                        'width': int_or_none(vid.get('width')),
                        'height': int_or_none(vid.get('height')),
                        'format_note': vid.get('type').split('/')[1],
                        'protocol': 'm3u8' if format.startswith('m3u8') else None,
                        'quality': int_or_none(vid.get('width', 0)) * int_or_none(vid.get('height', 0))
                    }
                )

        formats.sort(key=lambda format: format['quality'])
        info['formats'] = formats
        return info
[Fusion] Add new extractor 2016-06-30 14:57:42 +02:00			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`
[fusion] use direct links instead of Ooyala(closes #17775) The Ooyala ids don't seem to resolve correctly anymore. But the video pages contain direct links to the files. Use those links instead and fallback to YouTube if the links are not available. Populate other metadata that is present as well. 2018-10-22 19:41:17 -04:00			`from ..utils import ExtractorError, int_or_none`
[Fusion] Add new extractor 2016-06-30 14:57:42 +02:00

			`class FusionIE(InfoExtractor):`
[fusion] Add support for fusion.tv 2018-02-17 05:54:52 -08:00			`_VALID_URL = r'https?://(?:www\.)?fusion\.(?:net\|tv)/video/(?P<id>\d+)'`
[fusion] Improve 2016-07-02 02:44:37 +07:00			`_TESTS = [{`
[fusion] Add support for fusion.tv 2018-02-17 05:54:52 -08:00			`'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',`
[Fusion] Add new extractor 2016-06-30 14:57:42 +02:00			`'info_dict': {`
[fusion] use direct links instead of Ooyala(closes #17775) The Ooyala ids don't seem to resolve correctly anymore. But the video pages contain direct links to the files. Use those links instead and fallback to YouTube if the links are not available. Populate other metadata that is present as well. 2018-10-22 19:41:17 -04:00			`'id': '0eaph8eeMwQ',`
[Fusion] Add new extractor 2016-06-30 14:57:42 +02:00			`'ext': 'mp4',`
			`'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',`
			`'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',`
[fusion] use direct links instead of Ooyala(closes #17775) The Ooyala ids don't seem to resolve correctly anymore. But the video pages contain direct links to the files. Use those links instead and fallback to YouTube if the links are not available. Populate other metadata that is present as well. 2018-10-22 19:41:17 -04:00			`'uploader': 'FUSION',`
			`'uploader_id': 'thisisfusion',`
			`'upload_date': '20150918'`
[Fusion] Add new extractor 2016-06-30 14:57:42 +02:00			`},`
[fusion] Improve 2016-07-02 02:44:37 +07:00			`'params': {`
			`'skip_download': True,`
			`},`
[fusion] use direct links instead of Ooyala(closes #17775) The Ooyala ids don't seem to resolve correctly anymore. But the video pages contain direct links to the files. Use those links instead and fallback to YouTube if the links are not available. Populate other metadata that is present as well. 2018-10-22 19:41:17 -04:00			`'add_ie': ['Youtube'],`
[fusion] Improve 2016-07-02 02:44:37 +07:00			`}, {`
[fusion] Add support for fusion.tv 2018-02-17 05:54:52 -08:00			`'url': 'http://fusion.tv/video/201781',`
[fusion] Improve 2016-07-02 02:44:37 +07:00			`'only_matching': True,`
[fusion] use direct links instead of Ooyala(closes #17775) The Ooyala ids don't seem to resolve correctly anymore. But the video pages contain direct links to the files. Use those links instead and fallback to YouTube if the links are not available. Populate other metadata that is present as well. 2018-10-22 19:41:17 -04:00			`}, {`
			`'url': 'https://fusion.tv/video/584520/dreaming-of-the-whitest-christmas/',`
			`'info_dict': {`
			`'id': '584520',`
			`'ext': 'm3u8',`
			`'title': 'Dreaming of the Whitest Christmas',`
			`'description': 'md5:350a32da86dc05a2179c9694d9d61feb',`
			`'release_date': '20171211',`
			`'thumbnail': r're:http.*.jpg[?]?',`
			`},`
			`'params': {`
			`'skip_download': True,`
			`}`
[fusion] Improve 2016-07-02 02:44:37 +07:00			`}]`
[Fusion] Add new extractor 2016-06-30 14:57:42 +02:00
			`def _real_extract(self, url):`
			`display_id = self._match_id(url)`
			`webpage = self._download_webpage(url, display_id)`

[fusion] use direct links instead of Ooyala(closes #17775) The Ooyala ids don't seem to resolve correctly anymore. But the video pages contain direct links to the files. Use those links instead and fallback to YouTube if the links are not available. Populate other metadata that is present as well. 2018-10-22 19:41:17 -04:00			`fusionData = self._parse_json(`
			`self._search_regex(`
			`r'(?si)fusionData\s=\s({.*?});', webpage,`
			`'fusionData'),`
			`display_id)`

			`data = fusionData.get('single')`

			`info = {`
			`'id': display_id,`
			`'title': data.get('title'),`
			`'display_id': data.get('slug'),`
			`'description': data.get('excerpt'),`
			`}`

			`published = data.get('published')`
			`if published and 'T' in published:`
			`info['release_date'] = published.split('T')[0].replace('-', '')`

			`if 'images' in data:`
			`info['thumbnails'] = [{'id': image, 'url': url} for image, url in data.get('images').items()]`

			`srcs = data.get('src')`

			`if not srcs:`
			`youtube_id = data.get('video_ids').get('youtube')`
			`if not youtube_id:`
			`raise ExtractorError('Could not find alternate youtube url')`

			`info['_type'] = 'url'`
			`info['url'] = youtube_id`
			`info['ie_key'] = 'Youtube'`
			`return info`

			`formats = []`
			`for format in srcs.keys():`
			`if format not in ['m3u8-hp-v3', 'm3u8-variant', 'mp4']:`
			`continue`

			`for vid in srcs.get(format).values():`
			`formats.append(`
			`{`
			`'url': vid.get('url'),`
			`'width': int_or_none(vid.get('width')),`
			`'height': int_or_none(vid.get('height')),`
			`'format_note': vid.get('type').split('/')[1],`
			`'protocol': 'm3u8' if format.startswith('m3u8') else None,`
			`'quality': int_or_none(vid.get('width', 0)) * int_or_none(vid.get('height', 0))`
			`}`
			`)`
[Fusion] Add new extractor 2016-06-30 14:57:42 +02:00
[fusion] use direct links instead of Ooyala(closes #17775) The Ooyala ids don't seem to resolve correctly anymore. But the video pages contain direct links to the files. Use those links instead and fallback to YouTube if the links are not available. Populate other metadata that is present as well. 2018-10-22 19:41:17 -04:00			`formats.sort(key=lambda format: format['quality'])`
			`info['formats'] = formats`
			`return info`