2017-05-04 22:41:01 -04:00
# coding: utf-8
from __future__ import unicode_literals
from . common import InfoExtractor
2017-05-06 02:17:17 -04:00
from . . utils import int_or_none
2017-05-04 22:41:01 -04:00
class Full30IE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:www \ .)?full30 \ .com/video/(?P<id>[a-f0-9]+) '
_TEST = {
' url ' : ' http://www.full30.com/video/b2a28b99494164ddd55e91a6c4648cbc ' ,
2017-05-04 22:59:11 -04:00
' md5 ' : ' f5aa3862cbe35c2083ce050ac1a5eb06 ' ,
2017-05-04 22:41:01 -04:00
' info_dict ' : {
' id ' : ' b2a28b99494164ddd55e91a6c4648cbc ' ,
' title ' : ' Flamethrower Q&A with Charlie Hobson ' ,
2017-05-06 02:17:17 -04:00
' uploader ' : ' Forgotten Weapons ' ,
2017-05-05 00:42:22 -04:00
' thumbnail ' : r ' re:^https?://.*52130 \ .jpg$ ' ,
' ext ' : ' ogv ' ,
2017-05-04 22:41:01 -04:00
}
}
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
2017-05-06 02:17:17 -04:00
title = self . _html_search_regex ( r ' <h1 [^>]*class=.video-title[^>]*>([^<]+?)</h1> ' , webpage , ' title ' , fatal = False , default = None ) or self . _og_search_title ( webpage ) or video_id
uploader = self . _html_search_regex ( r ' <h1 class=.channel-title[^>]*>([^<]+)< ' , webpage , ' uploader ' , fatal = False , default = None ) or None
2017-05-05 00:42:22 -04:00
thumbnail = self . _html_search_regex ( r ' <[^>]*property=.og:image. ?content= " ([^>]*thumbnails[^ " >]*) " \ /> ' , webpage , ' thumbnail ' , fatal = False , default = None ) or self . _og_search_thumbnail ( webpage )
# looking for a line like the following
# <input id="video-path" type="hidden" name="video_path" value="https://videos.full30.com/bitmotive/public/full30/v1.0/videos/forgottenweapons/b2a28b99494164ddd55e91a6c4648cbc/" />
# there's also a full30.com/cdn which appears to have the same sort of structure. it's possible that either of these may go away so as a backup I'll build the cdn link out from channel slug
vid_path = self . _html_search_regex ( r ' <input id=.video-path[^>]*value=[ " \' ]([^ " \' ]*)[ " \' ][^>]*> ' , webpage , ' video_path ' , fatal = False , default = None )
if not vid_path :
channel_slug = self . _html_search_regex ( r ' <input id=.channel-slug[^>]*value=[ " \' ]([^ " \' ]*)[ " \' ][^>]*> ' , webpage , ' channel_slug ' , fatal = True )
2017-05-07 19:10:29 -04:00
vid_path = ' https://www.full30.com/cdn/videos/ ' + channel_slug + ' / ' + video_id + ' / '
2017-05-05 00:42:22 -04:00
vid_json = self . _download_webpage ( vid_path , video_id )
# turn sequence of json entries into an actual list
vid_json = vid_json . rstrip ( )
2017-05-07 19:10:29 -04:00
vid_json = ' [ ' + vid_json + ' ] '
vid_json = vid_json . replace ( ' } ' , ' }, ' ) . replace ( ' ,] ' , ' ] ' )
2017-05-05 00:42:22 -04:00
parsed = self . _parse_json ( vid_json , video_id )
2017-05-04 22:41:01 -04:00
2017-05-06 02:17:17 -04:00
formats = [ {
2017-05-07 19:10:29 -04:00
' url ' : vid_path + entry [ ' name ' ] ,
' resolution ' : entry [ ' name ' ] [ : entry [ ' name ' ] . rfind ( ' . ' ) ] ,
' filesize ' : int_or_none ( entry [ ' size ' ] ) ,
} for entry in parsed if entry . get ( ' type ' ) == ' object ' ]
2017-05-04 22:41:01 -04:00
2017-05-04 22:59:11 -04:00
self . _sort_formats ( formats )
2017-05-04 22:41:01 -04:00
return {
' id ' : video_id ,
' title ' : title ,
' uploader ' : uploader ,
2017-05-06 02:17:17 -04:00
' thumbnail ' : thumbnail ,
' formats ' : formats ,
2017-05-04 22:41:01 -04:00
}