l1ving_youtube-dl/youtube_dl/extractor/full30.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import int_or_none


class Full30IE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?full30\.com/video/(?P<id>[a-f0-9]+)'
    _TEST = {
        'url': 'http://www.full30.com/video/b2a28b99494164ddd55e91a6c4648cbc',
        'md5': 'f5aa3862cbe35c2083ce050ac1a5eb06',
        'info_dict': {
            'id': 'b2a28b99494164ddd55e91a6c4648cbc',
            'title': 'Flamethrower Q&A with Charlie Hobson',
            'uploader': 'Forgotten Weapons',
            'thumbnail': r're:^https?://.*52130\.jpg$',
            'ext': 'ogv',
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        title = self._html_search_regex(r'<h1 [^>]*class=.video-title[^>]*>([^<]+?)</h1>', webpage, 'title', fatal=False, default=None) or self._og_search_title(webpage) or video_id
        uploader = self._html_search_regex(r'<h1 class=.channel-title[^>]*>([^<]+)<', webpage, 'uploader', fatal=False, default=None) or None
        thumbnail = self._html_search_regex(r'<[^>]*property=.og:image. ?content="([^>]*thumbnails[^">]*)"\/>', webpage, 'thumbnail', fatal=False, default=None) or self._og_search_thumbnail(webpage)

        # looking for a line like the following
        # <input id="video-path" type="hidden" name="video_path" value="https://videos.full30.com/bitmotive/public/full30/v1.0/videos/forgottenweapons/b2a28b99494164ddd55e91a6c4648cbc/" />
        # there's also a full30.com/cdn which appears to have the same sort of structure. it's possible that either of these may go away so as a backup I'll build the cdn link out from channel slug
        vid_path = self._html_search_regex(r'<input id=.video-path[^>]*value=["\']([^"\']*)["\'][^>]*>', webpage, 'video_path', fatal=False, default=None)
        if not vid_path:
            channel_slug = self._html_search_regex(r'<input id=.channel-slug[^>]*value=["\']([^"\']*)["\'][^>]*>', webpage, 'channel_slug', fatal=True)
            vid_path = "https://www.full30.com/cdn/videos/" + channel_slug + "/" + video_id + "/"

        vid_json = self._download_webpage(vid_path, video_id)
        # turn sequence of json entries into an actual list
        vid_json = vid_json.rstrip()
        vid_json = "[" + vid_json + "]"
        vid_json = vid_json.replace("}", "},").replace(",]", "]")
        parsed = self._parse_json(vid_json, video_id)

        formats = [{
            "url": vid_path + entry["name"],
            "resolution": entry["name"][:entry["name"].rfind(".")],
            "filesize": int_or_none(entry["size"]),
        } for entry in parsed if entry.get("type") == "object"]

        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'uploader': uploader,
            'thumbnail': thumbnail,
            'formats': formats,
        }
inital attempt at adding full30 support 2017-05-04 22:41:01 -04:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`
flake8 compliant, allow non-essential uploader field to be None, fallback to video_id as title (current behavior with generic exractor), make filesize int_or_none 2017-05-06 02:17:17 -04:00			`from ..utils import int_or_none`
inital attempt at adding full30 support 2017-05-04 22:41:01 -04:00

			`class Full30IE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:www\.)?full30\.com/video/(?P<id>[a-f0-9]+)'`
			`_TEST = {`
			`'url': 'http://www.full30.com/video/b2a28b99494164ddd55e91a6c4648cbc',`
formats sorted 2017-05-04 22:59:11 -04:00			`'md5': 'f5aa3862cbe35c2083ce050ac1a5eb06',`
inital attempt at adding full30 support 2017-05-04 22:41:01 -04:00			`'info_dict': {`
			`'id': 'b2a28b99494164ddd55e91a6c4648cbc',`
			`'title': 'Flamethrower Q&A with Charlie Hobson',`
flake8 compliant, allow non-essential uploader field to be None, fallback to video_id as title (current behavior with generic exractor), make filesize int_or_none 2017-05-06 02:17:17 -04:00			`'uploader': 'Forgotten Weapons',`
cleaned up, added fallbacks 2017-05-05 00:42:22 -04:00			`'thumbnail': r're:^https?://.*52130\.jpg$',`
			`'ext': 'ogv',`
inital attempt at adding full30 support 2017-05-04 22:41:01 -04:00			`}`
			`}`

			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`
			`webpage = self._download_webpage(url, video_id)`

flake8 compliant, allow non-essential uploader field to be None, fallback to video_id as title (current behavior with generic exractor), make filesize int_or_none 2017-05-06 02:17:17 -04:00			`title = self._html_search_regex(r'<h1 [^>]class=.video-title[^>]>([^<]+?)</h1>', webpage, 'title', fatal=False, default=None) or self._og_search_title(webpage) or video_id`
			`uploader = self._html_search_regex(r'<h1 class=.channel-title[^>]*>([^<]+)<', webpage, 'uploader', fatal=False, default=None) or None`
cleaned up, added fallbacks 2017-05-05 00:42:22 -04:00			`thumbnail = self._html_search_regex(r'<[^>]property=.og:image. ?content="([^>]thumbnails[^">]*)"\/>', webpage, 'thumbnail', fatal=False, default=None) or self._og_search_thumbnail(webpage)`

			`# looking for a line like the following`
			`# <input id="video-path" type="hidden" name="video_path" value="https://videos.full30.com/bitmotive/public/full30/v1.0/videos/forgottenweapons/b2a28b99494164ddd55e91a6c4648cbc/" />`
			`# there's also a full30.com/cdn which appears to have the same sort of structure. it's possible that either of these may go away so as a backup I'll build the cdn link out from channel slug`
			`vid_path = self._html_search_regex(r'<input id=.video-path[^>]value=["\']([^"\'])["\'][^>]*>', webpage, 'video_path', fatal=False, default=None)`
			`if not vid_path:`
			`channel_slug = self._html_search_regex(r'<input id=.channel-slug[^>]value=["\']([^"\'])["\'][^>]*>', webpage, 'channel_slug', fatal=True)`
			`vid_path = "https://www.full30.com/cdn/videos/" + channel_slug + "/" + video_id + "/"`

			`vid_json = self._download_webpage(vid_path, video_id)`
			`# turn sequence of json entries into an actual list`
			`vid_json = vid_json.rstrip()`
			`vid_json = "[" + vid_json + "]"`
flake8 compliant, allow non-essential uploader field to be None, fallback to video_id as title (current behavior with generic exractor), make filesize int_or_none 2017-05-06 02:17:17 -04:00			`vid_json = vid_json.replace("}", "},").replace(",]", "]")`
cleaned up, added fallbacks 2017-05-05 00:42:22 -04:00			`parsed = self._parse_json(vid_json, video_id)`
inital attempt at adding full30 support 2017-05-04 22:41:01 -04:00
flake8 compliant, allow non-essential uploader field to be None, fallback to video_id as title (current behavior with generic exractor), make filesize int_or_none 2017-05-06 02:17:17 -04:00			`formats = [{`
			`"url": vid_path + entry["name"],`
			`"resolution": entry["name"][:entry["name"].rfind(".")],`
			`"filesize": int_or_none(entry["size"]),`
			`} for entry in parsed if entry.get("type") == "object"]`
inital attempt at adding full30 support 2017-05-04 22:41:01 -04:00
formats sorted 2017-05-04 22:59:11 -04:00			`self._sort_formats(formats)`

inital attempt at adding full30 support 2017-05-04 22:41:01 -04:00			`return {`
			`'id': video_id,`
			`'title': title,`
			`'uploader': uploader,`
flake8 compliant, allow non-essential uploader field to be None, fallback to video_id as title (current behavior with generic exractor), make filesize int_or_none 2017-05-06 02:17:17 -04:00			`'thumbnail': thumbnail,`
			`'formats': formats,`
inital attempt at adding full30 support 2017-05-04 22:41:01 -04:00			`}`