diff --git a/youtube_dl/extractor/britishcouncil.py b/youtube_dl/extractor/britishcouncil.py deleted file mode 100644 index c6afe6b61..000000000 --- a/youtube_dl/extractor/britishcouncil.py +++ /dev/null @@ -1,30 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .viddler import ViddlerBaseIE - - -class BritishCouncilIE(ViddlerBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?:learnenglish\.)?britishcouncil.org/(?P.*)' - _TEST = { - 'url': 'https://learnenglish.britishcouncil.org/episode-01-they-meet', - 'md5': '796e9c4fa07017e3da79d5e99ef36fe8', - 'info_dict': { - 'id': '34d5e84c', - 'ext': 'mp4', - 'title': 'StartingOut.s01e01', - 'upload_date': '20160927', - 'uploader': 'BCLearnenglish', - 'timestamp': 1474975664, - 'view_count': int, - 'comment_count': int, - } - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._html_search_regex( - r'data-video-id=([\'"])(?P[^\'"]+)\1', - webpage, 'video ID', group='id', default=None) - return self._extract_viddler_info(url, video_id, None) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 58e2d0514..4b3092028 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -132,7 +132,6 @@ from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) -from .britishcouncil import BritishCouncilIE from .businessinsider import BusinessInsiderIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ce8252f6a..e8e87b356 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -119,6 +119,7 @@ from .expressen import ExpressenIE from .zype import ZypeIE from .odnoklassniki import OdnoklassnikiIE from .kinja import KinjaEmbedIE +from .viddler import ViddlerIE class GenericIE(InfoExtractor): @@ -1098,6 +1099,20 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Viddler'], }, + { + 'url': 'https://learnenglish.britishcouncil.org/episode-01-they-meet', + 'md5': '796e9c4fa07017e3da79d5e99ef36fe8', + 'info_dict': { + 'id': '34d5e84c', + 'ext': 'mp4', + 'title': 'StartingOut.s01e01', + 'upload_date': '20160927', + 'uploader': 'BCLearnenglish', + 'timestamp': 1474975664, + 'view_count': int, + 'comment_count': int, + }, + }, # Libsyn embed { 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve', @@ -2580,6 +2595,12 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url')) + mobj = re.search( + r'
[^\'"]+)\1', + webpage) + if mobj is not None: + return ViddlerIE._build_url_result(mobj.group('id')) + # Look for NYTimes player mobj = re.search( r']+src=(["\'])(?P(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>', diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py index a7f7ab063..421ec7d09 100644 --- a/youtube_dl/extractor/viddler.py +++ b/youtube_dl/extractor/viddler.py @@ -9,70 +9,7 @@ from ..utils import ( ) -class ViddlerBaseIE(InfoExtractor): - def _extract_viddler_info(self, url, video_id, secret): - query = { - 'video_id': video_id, - 'key': 'v0vhrt7bg2xq1vyxhkct', - } - if secret: - query['secret'] = secret - - data = self._download_json( - 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json', - video_id, headers={'Referer': url}, query=query)['video'] - - formats = [] - for filed in data['files']: - if filed.get('status', 'ready') != 'ready': - continue - format_id = filed.get('profile_id') or filed['profile_name'] - f = { - 'format_id': format_id, - 'format_note': filed['profile_name'], - 'url': self._proto_relative_url(filed['url']), - 'width': int_or_none(filed.get('width')), - 'height': int_or_none(filed.get('height')), - 'filesize': int_or_none(filed.get('size')), - 'ext': filed.get('ext'), - 'source_preference': -1, - } - formats.append(f) - - if filed.get('cdn_url'): - f = f.copy() - f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:') - f['format_id'] = format_id + '-cdn' - f['source_preference'] = 1 - formats.append(f) - - if filed.get('html5_video_source'): - f = f.copy() - f['url'] = self._proto_relative_url(filed['html5_video_source']) - f['format_id'] = format_id + '-html5' - f['source_preference'] = 0 - formats.append(f) - self._sort_formats(formats) - - categories = [ - t.get('text') for t in data.get('tags', []) if 'text' in t] - - return { - 'id': video_id, - 'title': data['title'], - 'formats': formats, - 'description': data.get('description'), - 'timestamp': int_or_none(data.get('upload_time')), - 'thumbnail': self._proto_relative_url(data.get('thumbnail_url')), - 'uploader': data.get('author'), - 'duration': float_or_none(data.get('length')), - 'view_count': int_or_none(data.get('view_count')), - 'comment_count': int_or_none(data.get('comment_count')), - 'categories': categories, - } - - -class ViddlerIE(ViddlerBaseIE): +class ViddlerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P[a-z0-9]+)(?:.+?\bsecret=(\d+))?' _TESTS = [{ 'url': 'http://www.viddler.com/v/43903784', @@ -137,6 +74,74 @@ class ViddlerIE(ViddlerBaseIE): }, }] + @staticmethod + def _url_for_id(id): + return 'http://www.viddler.com/v/%s' % id + + @classmethod + def _build_url_result(cls, id): + return cls.url_result(cls._url_for_id(id), + ie=cls.ie_key()) + def _real_extract(self, url): video_id, secret = re.match(self._VALID_URL, url).groups() - return self._extract_viddler_info(url, video_id, secret) + + query = { + 'video_id': video_id, + 'key': 'v0vhrt7bg2xq1vyxhkct', + } + if secret: + query['secret'] = secret + + data = self._download_json( + 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json', + video_id, headers={'Referer': url}, query=query)['video'] + + formats = [] + for filed in data['files']: + if filed.get('status', 'ready') != 'ready': + continue + format_id = filed.get('profile_id') or filed['profile_name'] + f = { + 'format_id': format_id, + 'format_note': filed['profile_name'], + 'url': self._proto_relative_url(filed['url']), + 'width': int_or_none(filed.get('width')), + 'height': int_or_none(filed.get('height')), + 'filesize': int_or_none(filed.get('size')), + 'ext': filed.get('ext'), + 'source_preference': -1, + } + formats.append(f) + + if filed.get('cdn_url'): + f = f.copy() + f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:') + f['format_id'] = format_id + '-cdn' + f['source_preference'] = 1 + formats.append(f) + + if filed.get('html5_video_source'): + f = f.copy() + f['url'] = self._proto_relative_url(filed['html5_video_source']) + f['format_id'] = format_id + '-html5' + f['source_preference'] = 0 + formats.append(f) + self._sort_formats(formats) + + categories = [ + t.get('text') for t in data.get('tags', []) if 'text' in t] + + return { + 'id': video_id, + 'title': data['title'], + 'formats': formats, + 'description': data.get('description'), + 'timestamp': int_or_none(data.get('upload_time')), + 'thumbnail': self._proto_relative_url(data.get('thumbnail_url')), + 'uploader': data.get('author'), + 'duration': float_or_none(data.get('length')), + 'view_count': int_or_none(data.get('view_count')), + 'comment_count': int_or_none(data.get('comment_count')), + 'categories': categories, + }