From 55fe572d0a824e2bd5367a70882b37f4c07b512a Mon Sep 17 00:00:00 2001 From: ispedals Date: Mon, 22 Oct 2018 19:41:17 -0400 Subject: [PATCH] [fusion] use direct links instead of Ooyala(closes #17775) The Ooyala ids don't seem to resolve correctly anymore. But the video pages contain direct links to the files. Use those links instead and fallback to YouTube if the links are not available. Populate other metadata that is present as well. --- youtube_dl/extractor/fusion.py | 80 ++++++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/fusion.py b/youtube_dl/extractor/fusion.py index 25e284d46..ea78c3315 100644 --- a/youtube_dl/extractor/fusion.py +++ b/youtube_dl/extractor/fusion.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .ooyala import OoyalaIE +from ..utils import ExtractorError, int_or_none class FusionIE(InfoExtractor): @@ -9,27 +9,91 @@ class FusionIE(InfoExtractor): _TESTS = [{ 'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/', 'info_dict': { - 'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P', + 'id': '0eaph8eeMwQ', 'ext': 'mp4', 'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs', 'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7', - 'duration': 140.0, + 'uploader': 'FUSION', + 'uploader_id': 'thisisfusion', + 'upload_date': '20150918' }, 'params': { 'skip_download': True, }, - 'add_ie': ['Ooyala'], + 'add_ie': ['Youtube'], }, { 'url': 'http://fusion.tv/video/201781', 'only_matching': True, + }, { + 'url': 'https://fusion.tv/video/584520/dreaming-of-the-whitest-christmas/', + 'info_dict': { + 'id': '584520', + 'ext': 'm3u8', + 'title': 'Dreaming of the Whitest Christmas', + 'description': 'md5:350a32da86dc05a2179c9694d9d61feb', + 'release_date': '20171211', + 'thumbnail': r're:http.*.jpg[?]?', + }, + 'params': { + 'skip_download': True, + } }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - ooyala_code = self._search_regex( - r'data-ooyala-id=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'ooyala code', group='code') + fusionData = self._parse_json( + self._search_regex( + r'(?si)fusionData\s*=\s*({.*?});', webpage, + 'fusionData'), + display_id) - return OoyalaIE._build_url_result(ooyala_code) + data = fusionData.get('single') + + info = { + 'id': display_id, + 'title': data.get('title'), + 'display_id': data.get('slug'), + 'description': data.get('excerpt'), + } + + published = data.get('published') + if published and 'T' in published: + info['release_date'] = published.split('T')[0].replace('-', '') + + if 'images' in data: + info['thumbnails'] = [{'id': image, 'url': url} for image, url in data.get('images').items()] + + srcs = data.get('src') + + if not srcs: + youtube_id = data.get('video_ids').get('youtube') + if not youtube_id: + raise ExtractorError('Could not find alternate youtube url') + + info['_type'] = 'url' + info['url'] = youtube_id + info['ie_key'] = 'Youtube' + return info + + formats = [] + for format in srcs.keys(): + if format not in ['m3u8-hp-v3', 'm3u8-variant', 'mp4']: + continue + + for vid in srcs.get(format).values(): + formats.append( + { + 'url': vid.get('url'), + 'width': int_or_none(vid.get('width')), + 'height': int_or_none(vid.get('height')), + 'format_note': vid.get('type').split('/')[1], + 'protocol': 'm3u8' if format.startswith('m3u8') else None, + 'quality': int_or_none(vid.get('width', 0)) * int_or_none(vid.get('height', 0)) + } + ) + + formats.sort(key=lambda format: format['quality']) + info['formats'] = formats + return info