2015-11-04 18:14:45 +09:00
# coding: utf-8
from __future__ import unicode_literals
from . common import InfoExtractor
2015-11-08 11:03:01 +09:00
import re
2015-11-04 18:14:45 +09:00
class tvpleIE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?P<url>(?:www \ .)?tvple \ .com/(?P<id>[0-9]+)) '
_TEST = {
' url ' : ' http://tvple.com/311090 ' ,
2015-12-13 11:12:14 +09:00
' md5 ' : ' 46329fca94a29b5517a30d7e88f48dbf ' ,
2015-11-04 18:14:45 +09:00
' info_dict ' : {
' id ' : ' 311090 ' ,
' ext ' : ' mp4 ' ,
' uploader ' : ' [디지털 드럭] 나비붙이 ' ,
2015-11-08 11:03:01 +09:00
' uploader_id ' : ' jack1609 ' ,
2015-11-04 18:14:45 +09:00
' title ' : ' 팜플렛으로 yee를 연주하는 김병만 ' ,
' description ' : ' 자작입니다. 첫 조교..인가..? 조교라긴 애매하지만, 어쨋든 노래로 만드는 건 이번이 처음입니다. \n 원본 영상 출처: https://www.youtube.com/watch?v=E4BPHBL35dE \n yee는 유튜브에 치면 원본 영상이 나오는데 다들 아시죠??? 저작권 문제가 될 경우는 지우겠습니다... \n \n 병만로이드라고 불러야 하나?? '
# TODO more properties, either as:
# * A value
# * MD5 checksum; start the string with md5:
# * A regular expression; start the string with re:
# * Any Python type (for example int or float)
}
}
2015-12-13 11:12:14 +09:00
def _convert_srt_subtitle ( self , json , duration ) :
sec = [ ]
sub = " "
timecode = [ ]
text = [ ]
for i in json :
sec . append ( int ( i ) )
2015-11-08 11:03:01 +09:00
2015-12-13 11:12:14 +09:00
sec . sort ( )
for second in sec :
msec = [ ]
for i in json [ unicode ( second ) ] :
msec . append ( int ( i ) )
msec . sort ( )
for millisecond in msec :
timecode . append ( " %02d : %02d : %02d , %03d " % ( second / / 60 / / 60 , second / / 60 % 60 , second % 60 , millisecond ) )
text . append ( json [ unicode ( second ) ] [ unicode ( millisecond ) ] . replace ( ' <BR> ' , ' \n ' ) . replace ( ' ' , ' ' ) )
2015-11-08 11:03:01 +09:00
2015-12-13 11:12:14 +09:00
timecode . append ( " %02d : %02d : %02d , %03d " % ( duration / / 60 / / 60 , duration / / 60 % 60 , duration % 60 , int ( ( " %0.3f " % duration ) [ - 3 : ] ) ) )
2015-11-08 11:03:01 +09:00
2015-12-13 11:12:14 +09:00
for i in range ( 1 , len ( timecode ) ) :
sub + = str ( i ) + ' \n ' + timecode [ i - 1 ] + ' --> ' + timecode [ i ] + ' \n ' + text [ i - 1 ] + ' \n \n '
return sub
2015-11-08 11:03:01 +09:00
2015-12-13 11:12:14 +09:00
def _convert_ass_cloud ( self , json , videoid , title , width , height ) :
sec = [ ]
2015-11-08 11:03:01 +09:00
2015-12-13 11:12:14 +09:00
asstemp1 = " [Script Info] \n Title: %s \n ScriptType: v4.00+ \n WrapStyle: 0 \n PlayResX: %d \n PlayResY: %d \n ScaledBorderAndShadow: yes \n \n [V4+ Styles] \n Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding \n Style: Default,Arial,14,&H23FFFFFF,&H000000FF,&HC8000000,&HC8000000,-1,0,0,0,100,100,0,0,1,2,2,5,10,10,10,1 \n \n " % ( title + ' - ' + videoid , width , height )
for i in json :
if ( i != ' _warning ' ) :
sec . append ( int ( i ) )
sec . sort ( )
2015-11-08 11:03:01 +09:00
asstemp2 = " [Events] \n Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text \n "
2015-12-13 11:12:14 +09:00
for second in sec :
for subs in json [ str ( second ) ] :
timecodea = " %02d : %02d : %02d .00 " % ( second / / 60 / / 60 , second / / 60 % 60 , second % 60 )
timecodeb = " %02d : %02d : %02d .00 " % ( ( second + 2 ) / / 60 / / 60 , ( second + 2 ) / / 60 % 60 , ( second + 2 ) % 60 )
asstemp2 + = " Dialogue: 0, %s , %s ,Default,,0,0,0,, { \\ an4 \ pos( %d , %d ) \\ fad(0,50)} %s \n " % ( timecodea , timecodeb , subs [ ' x ' ] * width , subs [ ' y ' ] * height , subs [ ' text ' ] )
return ( asstemp1 + asstemp2 )
2015-11-08 11:03:01 +09:00
2015-12-13 11:12:14 +09:00
def _get_subtitles ( self , json , title , videoid , duration , width , height ) :
subs = { }
subs [ ' tvple ' ] = [ ]
if json [ ' cloud ' ] [ ' read_url ' ] [ 0 ] != ' ' :
subs [ ' tvple ' ] . append ( {
' ext ' : ' ass ' ,
' data ' : self . _convert_ass_cloud ( self . _download_json ( json [ ' cloud ' ] [ ' read_url ' ] [ 0 ] , ' cloud_ %d ' % int ( videoid ) ) , videoid , title , width , height )
} )
if json [ ' subtitle ' ] != ' ' :
subs [ ' tvple ' ] . append ( {
' ext ' : ' srt ' ,
' data ' : self . _convert_srt_subtitle ( self . _download_json ( json [ ' subtitle ' ] , ' subtitle_ %d ' % int ( videoid ) ) , duration )
} )
return subs
2015-12-01 00:02:22 +09:00
2015-11-04 18:14:45 +09:00
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
2015-12-13 11:12:14 +09:00
playpage = self . _download_json ( re . search ( r ' data-meta= " (.*) " ' , webpage ) . group ( 1 ) , " playurl_ %d " % int ( video_id ) )
2015-11-08 11:03:01 +09:00
title = re . search ( " <h2.*title= \" (.*) \" " , webpage ) . group ( 1 ) # title
uploader = re . search ( r ' personacon-sm " .*/> \ s*(.*) \ s*</a> ' , webpage ) . group ( 1 ) # username
uploader_id = re . search ( r ' " /ch/(.*)/videos " ' , webpage ) . group ( 1 ) # userid
2015-12-13 11:12:14 +09:00
description = re . search ( r ' collapse-content linkify mg-top-base break-word " > \ s*(.*) \ s*<button type= " button " class= " collapse-button ' , webpage , re . DOTALL ) . group ( 1 ) . replace ( " <br /> " , " " ) . replace ( " <br /> " , " " ) . replace ( " \n " , " " ) # description
2015-11-08 11:03:01 +09:00
# point = re.search(r'fa-bar-chart"></i></span>\s*(.*)p\s*</li>', webpage).group(1).replace(",", "") # point?
view_count = int ( re . search ( r ' fa-play " ></i></span> \ s*(.*) \ s*</li> ' , webpage ) . group ( 1 ) . replace ( " , " , " " ) ) # played
2015-12-13 11:12:14 +09:00
duration = playpage [ ' stream ' ] [ ' duration ' ] # duration
2015-11-08 11:03:01 +09:00
# date = re.search(r'<small>\s*(\d{4}-\d{2}-\d{2}) (\d{1,2}:\d{1,2}:\d{1,2}).*\s*</small>', webpage).group(1).replace("-", "") # date FIXME-sometimes not working
# time = re.search(r'<small>\s*(\d{4}-\d{2}-\d{2}) (\d{1,2}:\d{1,2}:\d{1,2}).*\s*</small>', webpage).group(2) # time FIXME-sometimes not working
categories = re . search ( r ' badge-info " >(.*)</span> ' , webpage ) . group ( 1 ) # categories
tags = re . findall ( r ' " /tag/(.*) " class= " tag user-added " > ' , webpage ) # tags
2015-12-13 11:12:14 +09:00
formats = [ ]
for formatid in playpage [ ' stream ' ] [ ' sources ' ] :
formats . append ( {
' url ' : playpage [ ' stream ' ] [ ' sources ' ] [ formatid ] [ ' urls ' ] [ ' mp4_avc ' ] ,
' ext ' : ' mp4 ' , # TODO-if file isn't a mp4?
' format_id ' : formatid ,
' width ' : playpage [ ' stream ' ] [ ' width ' ] ,
' height ' : playpage [ ' stream ' ] [ ' height ' ] ,
' no_resume ' : True
} )
subtitles = self . extract_subtitles ( playpage , title , video_id , duration , playpage [ ' stream ' ] [ ' width ' ] , playpage [ ' stream ' ] [ ' height ' ] )
2015-11-08 11:03:01 +09:00
2015-11-04 18:14:45 +09:00
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
2015-12-13 11:12:14 +09:00
' duration ' : int ( duration ) ,
2015-11-04 18:14:45 +09:00
' uploader ' : uploader ,
' uploader_id ' : uploader_id ,
' view_count ' : view_count ,
2015-11-08 11:03:01 +09:00
# 'comment_count': comment_count,
2015-12-13 11:12:14 +09:00
' thumbnail ' : playpage [ ' poster ' ] ,
2015-11-04 18:14:45 +09:00
' formats ' : formats ,
2015-12-01 00:02:22 +09:00
' subtitles ' : subtitles ,
2015-11-08 11:03:01 +09:00
' categories ' : categories ,
' tags ' : tags
2015-11-04 18:14:45 +09:00
# TODO more properties (see youtube_dl/extractor/common.py)
}