[Viddler] Add support for generic embeds

This commit is contained in:
jgilf 2020-05-27 22:03:34 +10:00
parent 8ae114ea93
commit e488cfaf0e
4 changed files with 91 additions and 96 deletions

View File

@ -1,30 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .viddler import ViddlerBaseIE
class BritishCouncilIE(ViddlerBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:learnenglish\.)?britishcouncil.org/(?P<id>.*)'
_TEST = {
'url': 'https://learnenglish.britishcouncil.org/episode-01-they-meet',
'md5': '796e9c4fa07017e3da79d5e99ef36fe8',
'info_dict': {
'id': '34d5e84c',
'ext': 'mp4',
'title': 'StartingOut.s01e01',
'upload_date': '20160927',
'uploader': 'BCLearnenglish',
'timestamp': 1474975664,
'view_count': int,
'comment_count': int,
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(
r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
webpage, 'video ID', group='id', default=None)
return self._extract_viddler_info(url, video_id, None)

View File

@ -132,7 +132,6 @@ from .brightcove import (
BrightcoveLegacyIE, BrightcoveLegacyIE,
BrightcoveNewIE, BrightcoveNewIE,
) )
from .britishcouncil import BritishCouncilIE
from .businessinsider import BusinessInsiderIE from .businessinsider import BusinessInsiderIE
from .buzzfeed import BuzzFeedIE from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE from .byutv import BYUtvIE

View File

@ -119,6 +119,7 @@ from .expressen import ExpressenIE
from .zype import ZypeIE from .zype import ZypeIE
from .odnoklassniki import OdnoklassnikiIE from .odnoklassniki import OdnoklassnikiIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .viddler import ViddlerIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1098,6 +1099,20 @@ class GenericIE(InfoExtractor):
}, },
'add_ie': ['Viddler'], 'add_ie': ['Viddler'],
}, },
{
'url': 'https://learnenglish.britishcouncil.org/episode-01-they-meet',
'md5': '796e9c4fa07017e3da79d5e99ef36fe8',
'info_dict': {
'id': '34d5e84c',
'ext': 'mp4',
'title': 'StartingOut.s01e01',
'upload_date': '20160927',
'uploader': 'BCLearnenglish',
'timestamp': 1474975664,
'view_count': int,
'comment_count': int,
},
},
# Libsyn embed # Libsyn embed
{ {
'url': 'http://thedailyshow.cc.com/podcast/episodetwelve', 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
@ -2580,6 +2595,12 @@ class GenericIE(InfoExtractor):
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url')) return self.url_result(mobj.group('url'))
mobj = re.search(
r'<div class="viddler-auto-embed" data-video-id=([\'"])(?P<id>[^\'"]+)\1',
webpage)
if mobj is not None:
return ViddlerIE._build_url_result(mobj.group('id'))
# Look for NYTimes player # Look for NYTimes player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>', r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',

View File

@ -9,70 +9,7 @@ from ..utils import (
) )
class ViddlerBaseIE(InfoExtractor): class ViddlerIE(InfoExtractor):
def _extract_viddler_info(self, url, video_id, secret):
query = {
'video_id': video_id,
'key': 'v0vhrt7bg2xq1vyxhkct',
}
if secret:
query['secret'] = secret
data = self._download_json(
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json',
video_id, headers={'Referer': url}, query=query)['video']
formats = []
for filed in data['files']:
if filed.get('status', 'ready') != 'ready':
continue
format_id = filed.get('profile_id') or filed['profile_name']
f = {
'format_id': format_id,
'format_note': filed['profile_name'],
'url': self._proto_relative_url(filed['url']),
'width': int_or_none(filed.get('width')),
'height': int_or_none(filed.get('height')),
'filesize': int_or_none(filed.get('size')),
'ext': filed.get('ext'),
'source_preference': -1,
}
formats.append(f)
if filed.get('cdn_url'):
f = f.copy()
f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:')
f['format_id'] = format_id + '-cdn'
f['source_preference'] = 1
formats.append(f)
if filed.get('html5_video_source'):
f = f.copy()
f['url'] = self._proto_relative_url(filed['html5_video_source'])
f['format_id'] = format_id + '-html5'
f['source_preference'] = 0
formats.append(f)
self._sort_formats(formats)
categories = [
t.get('text') for t in data.get('tags', []) if 'text' in t]
return {
'id': video_id,
'title': data['title'],
'formats': formats,
'description': data.get('description'),
'timestamp': int_or_none(data.get('upload_time')),
'thumbnail': self._proto_relative_url(data.get('thumbnail_url')),
'uploader': data.get('author'),
'duration': float_or_none(data.get('length')),
'view_count': int_or_none(data.get('view_count')),
'comment_count': int_or_none(data.get('comment_count')),
'categories': categories,
}
class ViddlerIE(ViddlerBaseIE):
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)(?:.+?\bsecret=(\d+))?' _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)(?:.+?\bsecret=(\d+))?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.viddler.com/v/43903784', 'url': 'http://www.viddler.com/v/43903784',
@ -137,6 +74,74 @@ class ViddlerIE(ViddlerBaseIE):
}, },
}] }]
@staticmethod
def _url_for_id(id):
return 'http://www.viddler.com/v/%s' % id
@classmethod
def _build_url_result(cls, id):
return cls.url_result(cls._url_for_id(id),
ie=cls.ie_key())
def _real_extract(self, url): def _real_extract(self, url):
video_id, secret = re.match(self._VALID_URL, url).groups() video_id, secret = re.match(self._VALID_URL, url).groups()
return self._extract_viddler_info(url, video_id, secret)
query = {
'video_id': video_id,
'key': 'v0vhrt7bg2xq1vyxhkct',
}
if secret:
query['secret'] = secret
data = self._download_json(
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json',
video_id, headers={'Referer': url}, query=query)['video']
formats = []
for filed in data['files']:
if filed.get('status', 'ready') != 'ready':
continue
format_id = filed.get('profile_id') or filed['profile_name']
f = {
'format_id': format_id,
'format_note': filed['profile_name'],
'url': self._proto_relative_url(filed['url']),
'width': int_or_none(filed.get('width')),
'height': int_or_none(filed.get('height')),
'filesize': int_or_none(filed.get('size')),
'ext': filed.get('ext'),
'source_preference': -1,
}
formats.append(f)
if filed.get('cdn_url'):
f = f.copy()
f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:')
f['format_id'] = format_id + '-cdn'
f['source_preference'] = 1
formats.append(f)
if filed.get('html5_video_source'):
f = f.copy()
f['url'] = self._proto_relative_url(filed['html5_video_source'])
f['format_id'] = format_id + '-html5'
f['source_preference'] = 0
formats.append(f)
self._sort_formats(formats)
categories = [
t.get('text') for t in data.get('tags', []) if 'text' in t]
return {
'id': video_id,
'title': data['title'],
'formats': formats,
'description': data.get('description'),
'timestamp': int_or_none(data.get('upload_time')),
'thumbnail': self._proto_relative_url(data.get('thumbnail_url')),
'uploader': data.get('author'),
'duration': float_or_none(data.get('length')),
'view_count': int_or_none(data.get('view_count')),
'comment_count': int_or_none(data.get('comment_count')),
'categories': categories,
}