fix TODOs, remove 'unicode', more properties

This commit is contained in:
kjy00302 2016-02-01 01:40:06 +09:00
parent f4a7e38649
commit b683cae13f
2 changed files with 49 additions and 27 deletions

View File

@ -678,7 +678,7 @@ from .tvc import (
from .tvigle import TvigleIE from .tvigle import TvigleIE
from .tvp import TvpIE, TvpSeriesIE from .tvp import TvpIE, TvpSeriesIE
from .tvplay import TVPlayIE from .tvplay import TVPlayIE
from .tvple import tvpleIE from .tvple import TvpleIE
from .tweakers import TweakersIE from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE from .twentyfourvideo import TwentyFourVideoIE
from .twentytwotracks import ( from .twentytwotracks import (

View File

@ -3,11 +3,13 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
import re import re
import time
import datetime
class tvpleIE(InfoExtractor): class TvpleIE(InfoExtractor):
_VALID_URL = r'https?://(?P<url>(?:www\.)?tvple\.com/(?P<id>[0-9]+))' _VALID_URL = r'https?://(?P<url>(?:www\.)?tvple\.com/(?P<id>[0-9]+))'
_TEST = { _TESTS = [{
'url': 'http://tvple.com/311090', 'url': 'http://tvple.com/311090',
'md5': '46329fca94a29b5517a30d7e88f48dbf', 'md5': '46329fca94a29b5517a30d7e88f48dbf',
'info_dict': { 'info_dict': {
@ -16,32 +18,42 @@ class tvpleIE(InfoExtractor):
'uploader': '[디지털 드럭] 나비붙이', 'uploader': '[디지털 드럭] 나비붙이',
'uploader_id': 'jack1609', 'uploader_id': 'jack1609',
'title': '팜플렛으로 yee를 연주하는 김병만', 'title': '팜플렛으로 yee를 연주하는 김병만',
'description': '자작입니다. 첫 조교..인가..? 조교라긴 애매하지만, 어쨋든 노래로 만드는 건 이번이 처음입니다.\n원본 영상 출처: https://www.youtube.com/watch?v=E4BPHBL35dE\nyee는 유튜브에 치면 원본 영상이 나오는데 다들 아시죠??? 저작권 문제가 될 경우는 지우겠습니다...\n\n병만로이드라고 불러야 하나??' 'description': '자작입니다. 첫 조교..인가..? 조교라긴 애매하지만, 어쨋든 노래로 만드는 건 이번이 처음입니다.\n원본 영상 출처: https://www.youtube.com/watch?v=E4BPHBL35dE\nyee는 유튜브에 치면 원본 영상이 나오는데 다들 아시죠??? 저작권 문제가 될 경우는 지우겠습니다...\n\n병만로이드라고 불러야 하나??',
# TODO more properties, either as: 'duration': 9
# * A value
# * MD5 checksum; start the string with md5:
# * A regular expression; start the string with re:
# * Any Python type (for example int or float)
} }
}, {
'url': 'http://tvple.com/208230',
'md5': '98e4f705fbb77b0ad9afe6e86751d89a',
'info_dict': {
'id': '208230',
'ext': 'mp4',
'uploader': 'mesenghe',
'uploader_id': 'mesenghe',
'title': '소환사 협곡의 개새끼',
'description': 'http://youtu.be/LGABUervp48\n재밌게 봐라\n유튜브나 네이버 동영상으로 퍼가지 말고\n이젠 롤 관련된 건 안 만든다',
'duration': 71
} }
}]
def _convert_srt_subtitle(self, json, duration): def _convert_srt_subtitle(self, json, duration):
"""convert tvple subtitle to srt subtitle"""
sec = [] sec = []
sub = "" sub = ""
timecode = [] timecode = []
text = [] text = []
for i in json: for i in json:
if(i != 'status'):
sec.append(int(i)) sec.append(int(i))
sec.sort() sec.sort()
for second in sec: for second in sec:
msec = [] msec = []
for i in json[unicode(second)]: for i in json[str(second)]:
msec.append(int(i)) msec.append(int(i))
msec.sort() msec.sort()
for millisecond in msec: for millisecond in msec:
timecode.append("%02d:%02d:%02d,%03d" % (second // 60 // 60, second // 60 % 60, second % 60, millisecond)) timecode.append("%02d:%02d:%02d,%03d" % (second // 60 // 60, second // 60 % 60, second % 60, millisecond))
text.append(json[unicode(second)][unicode(millisecond)].replace('<BR>', '\n').replace('&nbsp;', '')) text.append(json[str(second)][str(millisecond)].replace('<BR>', '\n').replace('&nbsp;', ''))
timecode.append("%02d:%02d:%02d,%03d" % (duration // 60 // 60, duration // 60 % 60, duration % 60, int(("%0.3f" % duration)[-3:]))) timecode.append("%02d:%02d:%02d,%03d" % (duration // 60 // 60, duration // 60 % 60, duration % 60, int(("%0.3f" % duration)[-3:])))
@ -50,6 +62,7 @@ class tvpleIE(InfoExtractor):
return sub return sub
def _convert_ass_cloud(self, json, videoid, title, width, height): def _convert_ass_cloud(self, json, videoid, title, width, height):
"""convert tvple cloud to ass subtitle"""
sec = [] sec = []
asstemp1 = "[Script Info]\nTitle: %s\nScriptType: v4.00+\nWrapStyle: 0\nPlayResX: %d\nPlayResY: %d\nScaledBorderAndShadow: yes\n\n[V4+ Styles]\nFormat: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\nStyle: Default,Arial,14,&H23FFFFFF,&H000000FF,&HC8000000,&HC8000000,-1,0,0,0,100,100,0,0,1,2,2,5,10,10,10,1\n\n" % (title + '-' + videoid, width, height) asstemp1 = "[Script Info]\nTitle: %s\nScriptType: v4.00+\nWrapStyle: 0\nPlayResX: %d\nPlayResY: %d\nScaledBorderAndShadow: yes\n\n[V4+ Styles]\nFormat: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\nStyle: Default,Arial,14,&H23FFFFFF,&H000000FF,&HC8000000,&HC8000000,-1,0,0,0,100,100,0,0,1,2,2,5,10,10,10,1\n\n" % (title + '-' + videoid, width, height)
@ -76,7 +89,7 @@ class tvpleIE(InfoExtractor):
if json['cloud']['read_url'][0] != '': if json['cloud']['read_url'][0] != '':
subs['tvple'].append({ subs['tvple'].append({
'ext': 'ass', 'ext': 'ass',
'data': self._convert_ass_cloud(self._download_json(json['cloud']['read_url'][0], 'cloud_%d' % int(videoid)), videoid, title, width, height) 'data': self._convert_ass_cloud(self._download_json(json['cloud']['read_url'], 'cloud_%d' % int(videoid)), videoid, title, width, height)
}) })
if json['subtitle'] != '': if json['subtitle'] != '':
@ -96,22 +109,30 @@ class tvpleIE(InfoExtractor):
uploader = re.search(r'personacon-sm".*/>\s*(.*)\s*</a>', webpage).group(1) # username uploader = re.search(r'personacon-sm".*/>\s*(.*)\s*</a>', webpage).group(1) # username
uploader_id = re.search(r'"/ch/(.*)/videos"', webpage).group(1) # userid uploader_id = re.search(r'"/ch/(.*)/videos"', webpage).group(1) # userid
description = re.search(r'collapse-content linkify mg-top-base break-word">\s*(.*)\s*<button type="button" class="collapse-button', webpage, re.DOTALL).group(1).replace(" <br />", "").replace("<br />", "").replace("\n ", "") # description description = re.search(r'collapse-content linkify mg-top-base break-word">\s*(.*)\s*<button type="button" class="collapse-button', webpage, re.DOTALL).group(1).replace(" <br />", "").replace("<br />", "").replace("\n ", "") # description
# point = re.search(r'fa-bar-chart"></i></span>\s*(.*)p\s*</li>', webpage).group(1).replace(",", "") # point?
view_count = int(re.search(r'fa-play"></i></span>\s*(.*)\s*</li>', webpage).group(1).replace(",", "")) # played view_count = int(re.search(r'fa-play"></i></span>\s*(.*)\s*</li>', webpage).group(1).replace(",", "")) # played
duration = playpage['stream']['duration'] # duration try:
# date = re.search(r'<small>\s*(\d{4}-\d{2}-\d{2}) (\d{1,2}:\d{1,2}:\d{1,2}).*\s*</small>', webpage).group(1).replace("-", "") # date FIXME-sometimes not working comment_count = int(re.search(r'fa-cloud"></i></span>\s*(\d*)개의 구름', webpage).group(1).replace(",", "")) # comment count
# time = re.search(r'<small>\s*(\d{4}-\d{2}-\d{2}) (\d{1,2}:\d{1,2}:\d{1,2}).*\s*</small>', webpage).group(2) # time FIXME-sometimes not working except AttributeError: # if comment count is zero, tvple print '아직 구름이 없습니다. 첫 구름을 띄워보세요!'
comment_count = 0
duration = int(playpage['stream']['duration']) # duration
average_rating = int(re.search(r'fa-bar-chart"></i></span>\s*(.*)p\s*</li>', webpage).group(1).replace(",", "")) # rating
like_count = int(re.search(r'찜 하기\n<span class="badge">(\d*)</span>', webpage).group(1)) # liked
uploadeddatetime = re.search(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})에 업로드됨', webpage)
timestamp = time.mktime(datetime.datetime.strptime(uploadeddatetime.group(1), '%Y-%m-%d %H:%M:%S').timetuple()) + (60 * 60 * 9) # timestamp + KST(+9)
categories = re.search(r'badge-info">(.*)</span>', webpage).group(1) # categories categories = re.search(r'badge-info">(.*)</span>', webpage).group(1) # categories
tags = re.findall(r'"/tag/(.*)" class="tag user-added">', webpage) # tags tags = re.findall(r'class="tag "\n.*}">(.*)</a>', webpage) # tags
formats = [] formats = []
for formatid in playpage['stream']['sources']: for formatid in playpage['stream']['sources']:
formats.append({ formats.append({
'url': playpage['stream']['sources'][formatid]['urls']['mp4_avc'], 'url': playpage['stream']['sources'][formatid]['urls']['mp4_avc'],
'ext': 'mp4', # TODO-if file isn't a mp4? 'ext': 'mp4', # tvple using mp4 for main format
'format_id': formatid, 'format_id': formatid,
'width': playpage['stream']['width'], 'width': playpage['stream']['width'],
'height': playpage['stream']['height'], 'height': playpage['stream']['height']
'no_resume': True
}) })
subtitles = self.extract_subtitles(playpage, title, video_id, duration, playpage['stream']['width'], playpage['stream']['height']) subtitles = self.extract_subtitles(playpage, title, video_id, duration, playpage['stream']['width'], playpage['stream']['height'])
@ -120,16 +141,17 @@ class tvpleIE(InfoExtractor):
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'duration': int(duration), 'duration': duration,
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'view_count': view_count, 'view_count': view_count,
# 'comment_count': comment_count, 'comment_count': comment_count,
'thumbnail': playpage['poster'], 'thumbnail': playpage['poster'],
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'categories': categories, 'categories': categories,
'tags': tags 'tags': tags,
'timestamp': timestamp,
# TODO more properties (see youtube_dl/extractor/common.py) 'avrage_rating': average_rating,
'like_count': like_count
} }