[Viddler] Add support for generic embeds

2020-05-27 22:03:34 +10:00 · 2020-05-27 22:03:34 +10:00 · e488cfaf0e
commit e488cfaf0e
parent 8ae114ea93
4 changed files with 91 additions and 96 deletions
--- a/youtube_dl/extractor/britishcouncil.py
+++ b/youtube_dl/extractor/britishcouncil.py
@ -1,30 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .viddler import ViddlerBaseIE
 class BritishCouncilIE(ViddlerBaseIE):
    _VALID_URL = r'https?://(?:www\.)?(?:learnenglish\.)?britishcouncil.org/(?P<id>.*)'
    _TEST = {
        'url': 'https://learnenglish.britishcouncil.org/episode-01-they-meet',
        'md5': '796e9c4fa07017e3da79d5e99ef36fe8',
        'info_dict': {
            'id': '34d5e84c',
            'ext': 'mp4',
            'title': 'StartingOut.s01e01',
            'upload_date': '20160927',
            'uploader': 'BCLearnenglish',
            'timestamp': 1474975664,
            'view_count': int,
            'comment_count': int,
        }
    }
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        video_id = self._html_search_regex(
            r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
            webpage, 'video ID', group='id', default=None)
        return self._extract_viddler_info(url, video_id, None)
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -132,7 +132,6 @@ from .brightcove import (
    BrightcoveLegacyIE,
    BrightcoveNewIE,
 )
 from .britishcouncil import BritishCouncilIE
 from .businessinsider import BusinessInsiderIE
 from .buzzfeed import BuzzFeedIE
 from .byutv import BYUtvIE
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -119,6 +119,7 @@ from .expressen import ExpressenIE
 from .zype import ZypeIE
 from .odnoklassniki import OdnoklassnikiIE
 from .kinja import KinjaEmbedIE
 from .viddler import ViddlerIE
 class GenericIE(InfoExtractor):
@ -1098,6 +1099,20 @@ class GenericIE(InfoExtractor):
            },
            'add_ie': ['Viddler'],
        },
        {
            'url': 'https://learnenglish.britishcouncil.org/episode-01-they-meet',
            'md5': '796e9c4fa07017e3da79d5e99ef36fe8',
            'info_dict': {
                'id': '34d5e84c',
                'ext': 'mp4',
                'title': 'StartingOut.s01e01',
                'upload_date': '20160927',
                'uploader': 'BCLearnenglish',
                'timestamp': 1474975664,
                'view_count': int,
                'comment_count': int,
            },
        },
        # Libsyn embed
        {
            'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
@ -2580,6 +2595,12 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group('url'))
        mobj = re.search(
            r'<div class="viddler-auto-embed" data-video-id=([\'"])(?P<id>[^\'"]+)\1',
            webpage)
        if mobj is not None:
            return ViddlerIE._build_url_result(mobj.group('id'))
        # Look for NYTimes player
        mobj = re.search(
            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
--- a/youtube_dl/extractor/viddler.py
+++ b/youtube_dl/extractor/viddler.py
@ -9,70 +9,7 @@ from ..utils import (
 )
-class ViddlerBaseIE(InfoExtractor):
+class ViddlerIE(InfoExtractor):
    def _extract_viddler_info(self, url, video_id, secret):
        query = {
            'video_id': video_id,
            'key': 'v0vhrt7bg2xq1vyxhkct',
        }
        if secret:
            query['secret'] = secret
        data = self._download_json(
            'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json',
            video_id, headers={'Referer': url}, query=query)['video']
        formats = []
        for filed in data['files']:
            if filed.get('status', 'ready') != 'ready':
                continue
            format_id = filed.get('profile_id') or filed['profile_name']
            f = {
                'format_id': format_id,
                'format_note': filed['profile_name'],
                'url': self._proto_relative_url(filed['url']),
                'width': int_or_none(filed.get('width')),
                'height': int_or_none(filed.get('height')),
                'filesize': int_or_none(filed.get('size')),
                'ext': filed.get('ext'),
                'source_preference': -1,
            }
            formats.append(f)
            if filed.get('cdn_url'):
                f = f.copy()
                f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:')
                f['format_id'] = format_id + '-cdn'
                f['source_preference'] = 1
                formats.append(f)
            if filed.get('html5_video_source'):
                f = f.copy()
                f['url'] = self._proto_relative_url(filed['html5_video_source'])
                f['format_id'] = format_id + '-html5'
                f['source_preference'] = 0
                formats.append(f)
        self._sort_formats(formats)
        categories = [
            t.get('text') for t in data.get('tags', []) if 'text' in t]
        return {
            'id': video_id,
            'title': data['title'],
            'formats': formats,
            'description': data.get('description'),
            'timestamp': int_or_none(data.get('upload_time')),
            'thumbnail': self._proto_relative_url(data.get('thumbnail_url')),
            'uploader': data.get('author'),
            'duration': float_or_none(data.get('length')),
            'view_count': int_or_none(data.get('view_count')),
            'comment_count': int_or_none(data.get('comment_count')),
            'categories': categories,
        }
 class ViddlerIE(ViddlerBaseIE):
    _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)(?:.+?\bsecret=(\d+))?'
    _TESTS = [{
        'url': 'http://www.viddler.com/v/43903784',
@ -137,6 +74,74 @@ class ViddlerIE(ViddlerBaseIE):
        },
    }]
    @staticmethod
    def _url_for_id(id):
        return 'http://www.viddler.com/v/%s' % id
    @classmethod
    def _build_url_result(cls, id):
        return cls.url_result(cls._url_for_id(id),
                              ie=cls.ie_key())
    def _real_extract(self, url):
        video_id, secret = re.match(self._VALID_URL, url).groups()
-        return self._extract_viddler_info(url, video_id, secret)
+
        query = {
            'video_id': video_id,
            'key': 'v0vhrt7bg2xq1vyxhkct',
        }
        if secret:
            query['secret'] = secret
        data = self._download_json(
            'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json',
            video_id, headers={'Referer': url}, query=query)['video']
        formats = []
        for filed in data['files']:
            if filed.get('status', 'ready') != 'ready':
                continue
            format_id = filed.get('profile_id') or filed['profile_name']
            f = {
                'format_id': format_id,
                'format_note': filed['profile_name'],
                'url': self._proto_relative_url(filed['url']),
                'width': int_or_none(filed.get('width')),
                'height': int_or_none(filed.get('height')),
                'filesize': int_or_none(filed.get('size')),
                'ext': filed.get('ext'),
                'source_preference': -1,
            }
            formats.append(f)
            if filed.get('cdn_url'):
                f = f.copy()
                f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:')
                f['format_id'] = format_id + '-cdn'
                f['source_preference'] = 1
                formats.append(f)
            if filed.get('html5_video_source'):
                f = f.copy()
                f['url'] = self._proto_relative_url(filed['html5_video_source'])
                f['format_id'] = format_id + '-html5'
                f['source_preference'] = 0
                formats.append(f)
        self._sort_formats(formats)
        categories = [
            t.get('text') for t in data.get('tags', []) if 'text' in t]
        return {
            'id': video_id,
            'title': data['title'],
            'formats': formats,
            'description': data.get('description'),
            'timestamp': int_or_none(data.get('upload_time')),
            'thumbnail': self._proto_relative_url(data.get('thumbnail_url')),
            'uploader': data.get('author'),
            'duration': float_or_none(data.get('length')),
            'view_count': int_or_none(data.get('view_count')),
            'comment_count': int_or_none(data.get('comment_count')),
            'categories': categories,
        }