fixed tvple downloader, add cloud to subtitle converter, cleaned code

now cloud can converted to ass subtitle!
This commit is contained in:
kjy00302 2015-11-08 11:03:01 +09:00
parent 6734e2d30d
commit 13dcab07a2
2 changed files with 68 additions and 56 deletions

View File

@ -582,6 +582,7 @@
- **tvp.pl** - **tvp.pl**
- **tvp.pl:Series** - **tvp.pl:Series**
- **TVPlay**: TV3Play and related services - **TVPlay**: TV3Play and related services
- **tvple**
- **Tweakers** - **Tweakers**
- **twitch:bookmarks** - **twitch:bookmarks**
- **twitch:chapter** - **twitch:chapter**

View File

@ -2,13 +2,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import unescapeHTML
from hashlib import sha1 from hashlib import sha1
import re,zlib import re
from zlib import decompress
from ..compat import (
compat_urllib_request
)
class tvpleIE(InfoExtractor): class tvpleIE(InfoExtractor):
_VALID_URL = r'https?://(?P<url>(?:www\.)?tvple\.com/(?P<id>[0-9]+))' _VALID_URL = r'https?://(?P<url>(?:www\.)?tvple\.com/(?P<id>[0-9]+))'
@ -30,69 +28,82 @@ class tvpleIE(InfoExtractor):
} }
} }
def decodetvple(self,key): def _decode_tvple(self, key):
"""based on decompiled tvple player v2.50401""" """based on decompiled tvple player v2.50401"""
#1st key checker # 1st key checker
#if((key[:4] != "feff") | (key[20:21] != "_")): if((key[:4] != "feff") | (key[20:21] != "_")):
# print("error:wrong key") self.report_warning("error:wrong key")
#descramble key # descramble key
deckey = list(key[69:85]) deckey = list(key[69:85])
code = key[125:][::-1] code = key[125:][::-1]
#descrambling # descrambling
hexed = code.replace(deckey[5], "g").replace(deckey[4], "h").replace(deckey[3], "i").replace(deckey[2], "j").replace(deckey[1], "k").replace(deckey[6], deckey[5]).replace(deckey[7], deckey[4]).replace(deckey[8], deckey[3]).replace(deckey[9], deckey[2]).replace(deckey[10], deckey[1]).replace("g", deckey[6]).replace("h", deckey[7]).replace("i", deckey[8]).replace("j", deckey[9]).replace("k", deckey[10]) hexed = code.replace(deckey[5], "g").replace(deckey[4], "h").replace(deckey[3], "i").replace(deckey[2], "j").replace(deckey[1], "k").replace(deckey[6], deckey[5]).replace(deckey[7], deckey[4]).replace(deckey[8], deckey[3]).replace(deckey[9], deckey[2]).replace(deckey[10], deckey[1]).replace("g", deckey[6]).replace("h", deckey[7]).replace("i", deckey[8]).replace("j", deckey[9]).replace("k", deckey[10])
decoded = hexed.decode("hex") decoded = hexed.decode("hex")
#2nd key checker # 2nd key checker
#if( sha1(decoded).hexdigest() != key[85:125]): if(sha1(decoded).hexdigest() != key[85:125]):
# print("error:key checksum failed") self.report_warning("error:key checksum failed")
return decoded return decoded
#def downloadgurum(misc): def _convert_sub(self, misc, title, width, height):
clouds = unescapeHTML(self._decode_tvple(misc).decode('utf-8'))
lines = re.findall(r'<item .*?</item>', clouds)
asstemp1 = "[Script Info]\nTitle: %s\nScriptType: v4.00+\nWrapStyle: 0\nPlayResX: %d\nPlayResY: %d\nScaledBorderAndShadow: yes\n\n[V4+ Styles]\nFormat: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\nStyle:Default,Arial,14,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1\n\n" % (title, width, height)
asstemp2 = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"
for line in lines:
reg = re.search(r'id="(.*)" x="(.*)" y="(.*)" pos="(.*)">(.*)<', line)
sec = int(reg.group(4))
starttime = "%02d:%02d:%02d.00" % (divmod(divmod(sec, 60)[0], 60)[0], divmod(divmod(sec, 60)[0], 60)[1], divmod(sec, 60)[1])
endtime = "%02d:%02d:%02d.00" % (divmod(divmod(sec + 2, 60)[0], 60)[0], divmod(divmod(sec + 2, 60)[0], 60)[1], divmod(sec + 2, 60)[1])
asstemp2 += "Dialogue: 0,%s,%s,Default,,0,0,0,,{\\an4\pos(%d,%d)}%s\n" % (starttime, endtime, int(reg.group(2)), int(reg.group(3)), reg.group(5))
return(asstemp1 + asstemp2)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
playurl = self._html_search_regex(r'http://tvple.com/crossdomain.xml\n(.*)\n1', self.decodetvple(re.search(r'data-key="(.*)"', webpage).group(1)), "playurl") playurl = self._html_search_regex(r'http://api.tvple.com/crossdomain.xml\n(.*)\n1', self._decode_tvple(re.search(r'data-key="(.*)"', webpage).group(1)), "playurl")
playpage = self._download_webpage(playurl, "playurl_%d" % int(video_id)) playpage = self._download_webpage(playurl, "playurl_%d" % int(video_id))
videourl = re.search("<video><page_url>http://tvple.com/[0-9]+</page_url><url>(.*)</url>.*<preview>.*<url>(.*)</url>", self.decodetvple(playpage), re.DOTALL) urls = re.findall(r'<url>(.*)</url>', self._decode_tvple(playpage))
# TODO more code goes here, for example ...
title = re.search("<h2.*title=\"(.*)\"", webpage).group(1) #title title = re.search("<h2.*title=\"(.*)\"", webpage).group(1) # title
uploader = re.search(r'personacon-sm".*/>\s*(.*)\s*</a>', webpage).group(1) #username uploader = re.search(r'personacon-sm".*/>\s*(.*)\s*</a>', webpage).group(1) # username
uploader_id = re.search(r'"/ch/(.*)/videos"', webpage).group(1) #userid uploader_id = re.search(r'"/ch/(.*)/videos"', webpage).group(1) # userid
description = re.search(r'break-word">\s*(.*)\s*<button', webpage, re.DOTALL).group(1).replace(" <br />", "").replace("<br />", "").replace("\n ", "") #description description = re.search(r'break-word">\s*(.*)\s*<button', webpage, re.DOTALL).group(1).replace(" <br />", "").replace("<br />", "").replace("\n ", "") # description
resolution = re.search(r'fa-television"></i></span>\s*([0-9]*)x([0-9]*)\s*</li>', webpage) #resolution resolution = re.search(r'fa-television"></i></span>\s*([0-9]*)x([0-9]*)\s*</li>', webpage) # resolution
point = re.search(r'fa-bar-chart"></i></span>\s*(.*)p\s*</li>', webpage).group(1).replace(",", "") #point? # point = re.search(r'fa-bar-chart"></i></span>\s*(.*)p\s*</li>', webpage).group(1).replace(",", "") # point?
view_count = int(re.search(r'fa-play"></i></span>\s*(.*)\s*</li>', webpage).group(1).replace(",", "")) #played view_count = int(re.search(r'fa-play"></i></span>\s*(.*)\s*</li>', webpage).group(1).replace(",", "")) # played
duration = int(re.search(r'fa-video-camera"></i></span>\s*(\d*):(\d*)\s*</li>', webpage).group(1))*60+int(re.search(r'fa-video-camera"></i></span>\s*(\d*):(\d*)\s*</li>', webpage).group(2)) #duration duration = int(re.search(r'fa-video-camera"></i></span>\s*(\d*):(\d*)\s*</li>', webpage).group(1)) * 60 + int(re.search(r'fa-video-camera"></i></span>\s*(\d*):(\d*)\s*</li>', webpage).group(2)) # duration
#date = re.search(r'<small>\s*(\d{4}-\d{2}-\d{2}) (\d{1,2}:\d{1,2}:\d{1,2}).*\s*</small>', webpage).group(1).replace("-", "") #date FIXME-sometimes not w # date = re.search(r'<small>\s*(\d{4}-\d{2}-\d{2}) (\d{1,2}:\d{1,2}:\d{1,2}).*\s*</small>', webpage).group(1).replace("-", "") # date FIXME-sometimes not working
#time = re.search(r'<small>\s*(\d{4}-\d{2}-\d{2}) (\d{1,2}:\d{1,2}:\d{1,2}).*\s*</small>', webpage).group(2) #time FIXME-sometimes not working # time = re.search(r'<small>\s*(\d{4}-\d{2}-\d{2}) (\d{1,2}:\d{1,2}:\d{1,2}).*\s*</small>', webpage).group(2) # time FIXME-sometimes not working
group = re.search(r'badge-info">(.*)</span>', webpage).group(1) #group categories = re.search(r'badge-info">(.*)</span>', webpage).group(1) # categories
tags = re.findall(r'"/tag/(.*)" class="tag user-added">', webpage) #tags tags = re.findall(r'"/tag/(.*)" class="tag user-added">', webpage) # tags
formats = [{ formats = [{
'url' : videourl.group(1), 'url': urls[0],
'ext' : 'mp4', 'ext': 'mp4',
'format_id' : 'mp4_h264_aac', 'format_id': 'mp4_h264_aac',
'width' : int(resolution.group(1)), 'width': int(resolution.group(1)),
'height' : int(resolution.group(2)), 'height': int(resolution.group(2)),
'no_resume' : True 'no_resume': True
}] }]
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': videourl.group(2),
'duration': duration, 'duration': duration,
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'view_count': view_count, 'view_count': view_count,
#'comment_count': comment_count, # 'comment_count': comment_count,
'formats': formats, 'formats': formats,
#'subtitles': subtitles, 'subtitles': {'ass': [{'ext': 'ass', 'data': self._convert_sub(decompress(self._request_webpage(urls[1], "clouds_xml").read()), "%s-%s" % (title, video_id), int(resolution.group(1)), int(resolution.group(2)))}]},
'tags' : tags 'categories': categories,
'tags': tags
# TODO more properties (see youtube_dl/extractor/common.py) # TODO more properties (see youtube_dl/extractor/common.py)
} }