2017-05-04 22:41:01 -04:00
# coding: utf-8
from __future__ import unicode_literals
from . common import InfoExtractor
2017-05-06 02:17:17 -04:00
from . . utils import int_or_none
2017-05-04 22:41:01 -04:00
class Full30IE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:www \ .)?full30 \ .com/video/(?P<id>[a-f0-9]+) '
_TEST = {
' url ' : ' http://www.full30.com/video/b2a28b99494164ddd55e91a6c4648cbc ' ,
2017-05-04 22:59:11 -04:00
' md5 ' : ' f5aa3862cbe35c2083ce050ac1a5eb06 ' ,
2017-05-04 22:41:01 -04:00
' info_dict ' : {
' id ' : ' b2a28b99494164ddd55e91a6c4648cbc ' ,
' title ' : ' Flamethrower Q&A with Charlie Hobson ' ,
2017-05-06 02:17:17 -04:00
' uploader ' : ' Forgotten Weapons ' ,
2017-05-05 00:42:22 -04:00
' thumbnail ' : r ' re:^https?://.*52130 \ .jpg$ ' ,
' ext ' : ' ogv ' ,
2017-05-04 22:41:01 -04:00
}
}
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
2017-05-06 02:17:17 -04:00
title = self . _html_search_regex ( r ' <h1 [^>]*class=.video-title[^>]*>([^<]+?)</h1> ' , webpage , ' title ' , fatal = False , default = None ) or self . _og_search_title ( webpage ) or video_id
uploader = self . _html_search_regex ( r ' <h1 class=.channel-title[^>]*>([^<]+)< ' , webpage , ' uploader ' , fatal = False , default = None ) or None
2017-05-05 00:42:22 -04:00
thumbnail = self . _html_search_regex ( r ' <[^>]*property=.og:image. ?content= " ([^>]*thumbnails[^ " >]*) " \ /> ' , webpage , ' thumbnail ' , fatal = False , default = None ) or self . _og_search_thumbnail ( webpage )
# looking for a line like the following
# <input id="video-path" type="hidden" name="video_path" value="https://videos.full30.com/bitmotive/public/full30/v1.0/videos/forgottenweapons/b2a28b99494164ddd55e91a6c4648cbc/" />
# there's also a full30.com/cdn which appears to have the same sort of structure. it's possible that either of these may go away so as a backup I'll build the cdn link out from channel slug
vid_path = self . _html_search_regex ( r ' <input id=.video-path[^>]*value=[ " \' ]([^ " \' ]*)[ " \' ][^>]*> ' , webpage , ' video_path ' , fatal = False , default = None )
if not vid_path :
channel_slug = self . _html_search_regex ( r ' <input id=.channel-slug[^>]*value=[ " \' ]([^ " \' ]*)[ " \' ][^>]*> ' , webpage , ' channel_slug ' , fatal = True )
vid_path = " https://www.full30.com/cdn/videos/ " + channel_slug + " / " + video_id + " / "
vid_json = self . _download_webpage ( vid_path , video_id )
# turn sequence of json entries into an actual list
vid_json = vid_json . rstrip ( )
vid_json = " [ " + vid_json + " ] "
2017-05-06 02:17:17 -04:00
vid_json = vid_json . replace ( " } " , " }, " ) . replace ( " ,] " , " ] " )
2017-05-05 00:42:22 -04:00
parsed = self . _parse_json ( vid_json , video_id )
2017-05-04 22:41:01 -04:00
2017-05-06 02:17:17 -04:00
formats = [ {
" url " : vid_path + entry [ " name " ] ,
" resolution " : entry [ " name " ] [ : entry [ " name " ] . rfind ( " . " ) ] ,
" filesize " : int_or_none ( entry [ " size " ] ) ,
} for entry in parsed if entry . get ( " type " ) == " object " ]
2017-05-04 22:41:01 -04:00
2017-05-04 22:59:11 -04:00
self . _sort_formats ( formats )
2017-05-04 22:41:01 -04:00
return {
' id ' : video_id ,
' title ' : title ,
' uploader ' : uploader ,
2017-05-06 02:17:17 -04:00
' thumbnail ' : thumbnail ,
' formats ' : formats ,
2017-05-04 22:41:01 -04:00
}