diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c0020dd7d..0c68e30c3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -359,6 +359,7 @@ from .francetv import ( from .freesound import FreesoundIE from .freespeech import FreespeechIE from .freshlive import FreshLiveIE +from .full30 import Full30IE from .funimation import FunimationIE from .funnyordie import FunnyOrDieIE from .fusion import FusionIE diff --git a/youtube_dl/extractor/full30.py b/youtube_dl/extractor/full30.py new file mode 100644 index 000000000..c06ced2fb --- /dev/null +++ b/youtube_dl/extractor/full30.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class Full30IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?full30\.com/video/(?P[a-f0-9]+)' + _TEST = { + 'url': 'http://www.full30.com/video/b2a28b99494164ddd55e91a6c4648cbc', + 'md5': '88f6812042afaf60f74dbcd84d4491c2', + 'info_dict': { + 'id': 'b2a28b99494164ddd55e91a6c4648cbc', + 'ext': 'webm', + 'title': 'Flamethrower Q&A with Charlie Hobson', + 'thumbnail': r're:^https?://.*52130\.jpg$', + 'uploader' : 'Forgotten Weapons', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # TODO more code goes here, for example ... + title = self._html_search_regex(r'

]*class=.video-title[^>]*>([^<]+?)

', webpage, 'title') + uploader = self._html_search_regex(r'

]*>([^<]+)<', webpage, 'uploader', fatal=False) + description = self._og_search_description(webpage) + thumbnail = self._html_search_regex(r'<[^>]*property=.og:image. ?content="([^>]*thumbnails[^">]*)"\/>', webpage, 'thumbnail', fatal=False) or self._og_search_thumbnail(webpage) + + vidpath = self._html_search_regex(r']*value=["\']([^"\']*)["\'][^>]*>', webpage, 'video_path', fatal=False) + vidjson = self._download_webpage(vidpath, video_id) + # this is robust + vidjson = vidjson.rstrip() + vidjson = "[" + vidjson + "]" + vidjson = vidjson.replace("}", "},").replace(",]","]") + parsed = self._parse_json(vidjson, video_id) + + formats = [] + for d in parsed: + if d["type"] == "object": + formats.append({ + "url" : vidpath + d["name"], + "resolution" : d["name"][:d["name"].rfind(".")], + "filesize" : d["size"], + "protocol" : "https" + }) + + return { + 'id': video_id, + 'title': title, + # 'description': description, + 'uploader': uploader, + # 'url' : url, + 'formats' : formats, + # TODO more properties (see youtube_dl/extractor/common.py) + 'ext': 'mp4', + 'thumbnail' : thumbnail, + }