fix TODOs, remove 'unicode', more properties

This commit is contained in:
kjy00302 2016-02-01 01:40:06 +09:00
parent f4a7e38649
commit b683cae13f
2 changed files with 49 additions and 27 deletions

View File

@ -678,7 +678,7 @@ from .tvc import (
from .tvigle import TvigleIE
from .tvp import TvpIE, TvpSeriesIE
from .tvplay import TVPlayIE
from .tvple import tvpleIE
from .tvple import TvpleIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twentytwotracks import (

View File

@ -3,11 +3,13 @@ from __future__ import unicode_literals
from .common import InfoExtractor
import re
import time
import datetime
class tvpleIE(InfoExtractor):
class TvpleIE(InfoExtractor):
_VALID_URL = r'https?://(?P<url>(?:www\.)?tvple\.com/(?P<id>[0-9]+))'
_TEST = {
_TESTS = [{
'url': 'http://tvple.com/311090',
'md5': '46329fca94a29b5517a30d7e88f48dbf',
'info_dict': {
@ -16,32 +18,42 @@ class tvpleIE(InfoExtractor):
'uploader': '[디지털 드럭] 나비붙이',
'uploader_id': 'jack1609',
'title': '팜플렛으로 yee를 연주하는 김병만',
'description': '자작입니다. 첫 조교..인가..? 조교라긴 애매하지만, 어쨋든 노래로 만드는 건 이번이 처음입니다.\n원본 영상 출처: https://www.youtube.com/watch?v=E4BPHBL35dE\nyee는 유튜브에 치면 원본 영상이 나오는데 다들 아시죠??? 저작권 문제가 될 경우는 지우겠습니다...\n\n병만로이드라고 불러야 하나??'
# TODO more properties, either as:
# * A value
# * MD5 checksum; start the string with md5:
# * A regular expression; start the string with re:
# * Any Python type (for example int or float)
'description': '자작입니다. 첫 조교..인가..? 조교라긴 애매하지만, 어쨋든 노래로 만드는 건 이번이 처음입니다.\n원본 영상 출처: https://www.youtube.com/watch?v=E4BPHBL35dE\nyee는 유튜브에 치면 원본 영상이 나오는데 다들 아시죠??? 저작권 문제가 될 경우는 지우겠습니다...\n\n병만로이드라고 불러야 하나??',
'duration': 9
}
}
}, {
'url': 'http://tvple.com/208230',
'md5': '98e4f705fbb77b0ad9afe6e86751d89a',
'info_dict': {
'id': '208230',
'ext': 'mp4',
'uploader': 'mesenghe',
'uploader_id': 'mesenghe',
'title': '소환사 협곡의 개새끼',
'description': 'http://youtu.be/LGABUervp48\n재밌게 봐라\n유튜브나 네이버 동영상으로 퍼가지 말고\n이젠 롤 관련된 건 안 만든다',
'duration': 71
}
}]
def _convert_srt_subtitle(self, json, duration):
"""convert tvple subtitle to srt subtitle"""
sec = []
sub = ""
timecode = []
text = []
for i in json:
sec.append(int(i))
if(i != 'status'):
sec.append(int(i))
sec.sort()
for second in sec:
msec = []
for i in json[unicode(second)]:
for i in json[str(second)]:
msec.append(int(i))
msec.sort()
for millisecond in msec:
timecode.append("%02d:%02d:%02d,%03d" % (second // 60 // 60, second // 60 % 60, second % 60, millisecond))
text.append(json[unicode(second)][unicode(millisecond)].replace('<BR>', '\n').replace('&nbsp;', ''))
text.append(json[str(second)][str(millisecond)].replace('<BR>', '\n').replace('&nbsp;', ''))
timecode.append("%02d:%02d:%02d,%03d" % (duration // 60 // 60, duration // 60 % 60, duration % 60, int(("%0.3f" % duration)[-3:])))
@ -50,6 +62,7 @@ class tvpleIE(InfoExtractor):
return sub
def _convert_ass_cloud(self, json, videoid, title, width, height):
"""convert tvple cloud to ass subtitle"""
sec = []
asstemp1 = "[Script Info]\nTitle: %s\nScriptType: v4.00+\nWrapStyle: 0\nPlayResX: %d\nPlayResY: %d\nScaledBorderAndShadow: yes\n\n[V4+ Styles]\nFormat: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\nStyle: Default,Arial,14,&H23FFFFFF,&H000000FF,&HC8000000,&HC8000000,-1,0,0,0,100,100,0,0,1,2,2,5,10,10,10,1\n\n" % (title + '-' + videoid, width, height)
@ -76,7 +89,7 @@ class tvpleIE(InfoExtractor):
if json['cloud']['read_url'][0] != '':
subs['tvple'].append({
'ext': 'ass',
'data': self._convert_ass_cloud(self._download_json(json['cloud']['read_url'][0], 'cloud_%d' % int(videoid)), videoid, title, width, height)
'data': self._convert_ass_cloud(self._download_json(json['cloud']['read_url'], 'cloud_%d' % int(videoid)), videoid, title, width, height)
})
if json['subtitle'] != '':
@ -96,22 +109,30 @@ class tvpleIE(InfoExtractor):
uploader = re.search(r'personacon-sm".*/>\s*(.*)\s*</a>', webpage).group(1) # username
uploader_id = re.search(r'"/ch/(.*)/videos"', webpage).group(1) # userid
description = re.search(r'collapse-content linkify mg-top-base break-word">\s*(.*)\s*<button type="button" class="collapse-button', webpage, re.DOTALL).group(1).replace(" <br />", "").replace("<br />", "").replace("\n ", "") # description
# point = re.search(r'fa-bar-chart"></i></span>\s*(.*)p\s*</li>', webpage).group(1).replace(",", "") # point?
view_count = int(re.search(r'fa-play"></i></span>\s*(.*)\s*</li>', webpage).group(1).replace(",", "")) # played
duration = playpage['stream']['duration'] # duration
# date = re.search(r'<small>\s*(\d{4}-\d{2}-\d{2}) (\d{1,2}:\d{1,2}:\d{1,2}).*\s*</small>', webpage).group(1).replace("-", "") # date FIXME-sometimes not working
# time = re.search(r'<small>\s*(\d{4}-\d{2}-\d{2}) (\d{1,2}:\d{1,2}:\d{1,2}).*\s*</small>', webpage).group(2) # time FIXME-sometimes not working
try:
comment_count = int(re.search(r'fa-cloud"></i></span>\s*(\d*)개의 구름', webpage).group(1).replace(",", "")) # comment count
except AttributeError: # if comment count is zero, tvple print '아직 구름이 없습니다. 첫 구름을 띄워보세요!'
comment_count = 0
duration = int(playpage['stream']['duration']) # duration
average_rating = int(re.search(r'fa-bar-chart"></i></span>\s*(.*)p\s*</li>', webpage).group(1).replace(",", "")) # rating
like_count = int(re.search(r'찜 하기\n<span class="badge">(\d*)</span>', webpage).group(1)) # liked
uploadeddatetime = re.search(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})에 업로드됨', webpage)
timestamp = time.mktime(datetime.datetime.strptime(uploadeddatetime.group(1), '%Y-%m-%d %H:%M:%S').timetuple()) + (60 * 60 * 9) # timestamp + KST(+9)
categories = re.search(r'badge-info">(.*)</span>', webpage).group(1) # categories
tags = re.findall(r'"/tag/(.*)" class="tag user-added">', webpage) # tags
tags = re.findall(r'class="tag "\n.*}">(.*)</a>', webpage) # tags
formats = []
for formatid in playpage['stream']['sources']:
formats.append({
'url': playpage['stream']['sources'][formatid]['urls']['mp4_avc'],
'ext': 'mp4', # TODO-if file isn't a mp4?
'ext': 'mp4', # tvple using mp4 for main format
'format_id': formatid,
'width': playpage['stream']['width'],
'height': playpage['stream']['height'],
'no_resume': True
'height': playpage['stream']['height']
})
subtitles = self.extract_subtitles(playpage, title, video_id, duration, playpage['stream']['width'], playpage['stream']['height'])
@ -120,16 +141,17 @@ class tvpleIE(InfoExtractor):
'id': video_id,
'title': title,
'description': description,
'duration': int(duration),
'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
'view_count': view_count,
# 'comment_count': comment_count,
'comment_count': comment_count,
'thumbnail': playpage['poster'],
'formats': formats,
'subtitles': subtitles,
'categories': categories,
'tags': tags
# TODO more properties (see youtube_dl/extractor/common.py)
'tags': tags,
'timestamp': timestamp,
'avrage_rating': average_rating,
'like_count': like_count
}