l1ving_youtube-dl/youtube_dl/extractor/tvple.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
import re


class tvpleIE(InfoExtractor):
    _VALID_URL = r'https?://(?P<url>(?:www\.)?tvple\.com/(?P<id>[0-9]+))'
    _TEST = {
        'url': 'http://tvple.com/311090',
        'md5': '46329fca94a29b5517a30d7e88f48dbf',
        'info_dict': {
            'id': '311090',
            'ext': 'mp4',
            'uploader': '[디지털 드럭] 나비붙이',
            'uploader_id': 'jack1609',
            'title': '팜플렛으로 yee를 연주하는 김병만',
            'description': '자작입니다. 첫 조교..인가..? 조교라긴 애매하지만, 어쨋든 노래로 만드는 건 이번이 처음입니다.\n원본 영상 출처: https://www.youtube.com/watch?v=E4BPHBL35dE\nyee는 유튜브에 치면 원본 영상이 나오는데 다들 아시죠??? 저작권 문제가 될 경우는 지우겠습니다...\n\n병만로이드라고 불러야 하나??'
            # TODO more properties, either as:
            # * A value
            # * MD5 checksum; start the string with md5:
            # * A regular expression; start the string with re:
            # * Any Python type (for example int or float)
        }
    }

    def _convert_srt_subtitle(self, json, duration):
        sec = []
        sub = ""
        timecode = []
        text = []
        for i in json:
            sec.append(int(i))

        sec.sort()
        for second in sec:
            msec = []
            for i in json[unicode(second)]:
                msec.append(int(i))
            msec.sort()
            for millisecond in msec:
                timecode.append("%02d:%02d:%02d,%03d" % (second // 60 // 60, second // 60 % 60, second % 60, millisecond))
                text.append(json[unicode(second)][unicode(millisecond)].replace('<BR>', '\n').replace('&nbsp;', ''))

        timecode.append("%02d:%02d:%02d,%03d" % (duration // 60 // 60, duration // 60 % 60, duration % 60, int(("%0.3f" % duration)[-3:])))

        for i in range(1, len(timecode)):
            sub += str(i) + '\n' + timecode[i - 1] + ' --> ' + timecode[i] + '\n' + text[i - 1] + '\n\n'
        return sub

    def _convert_ass_cloud(self, json, videoid, title, width, height):
        sec = []

        asstemp1 = "[Script Info]\nTitle: %s\nScriptType: v4.00+\nWrapStyle: 0\nPlayResX: %d\nPlayResY: %d\nScaledBorderAndShadow: yes\n\n[V4+ Styles]\nFormat: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\nStyle: Default,Arial,14,&H23FFFFFF,&H000000FF,&HC8000000,&HC8000000,-1,0,0,0,100,100,0,0,1,2,2,5,10,10,10,1\n\n" % (title + '-' + videoid, width, height)

        for i in json:
            if(i != '_warning'):
                sec.append(int(i))

        sec.sort()

        asstemp2 = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"

        for second in sec:
            for subs in json[str(second)]:
                timecodea = "%02d:%02d:%02d.00" % (second // 60 // 60, second // 60 % 60, second % 60)
                timecodeb = "%02d:%02d:%02d.00" % ((second + 2) // 60 // 60, (second + 2) // 60 % 60, (second + 2) % 60)
                asstemp2 += "Dialogue: 0,%s,%s,Default,,0,0,0,,{\\an4\pos(%d,%d)\\fad(0,50)}%s\n" % (timecodea, timecodeb, subs['x'] * width, subs['y'] * height, subs['text'])

        return (asstemp1 + asstemp2)

    def _get_subtitles(self, json, title, videoid, duration, width, height):
        subs = {}
        subs['tvple'] = []
        if json['cloud']['read_url'][0] != '':
            subs['tvple'].append({
                'ext': 'ass',
                'data': self._convert_ass_cloud(self._download_json(json['cloud']['read_url'][0], 'cloud_%d' % int(videoid)), videoid, title, width, height)
            })

        if json['subtitle'] != '':
            subs['tvple'].append({
                'ext': 'srt',
                'data': self._convert_srt_subtitle(self._download_json(json['subtitle'], 'subtitle_%d' % int(videoid)), duration)
            })

        return subs

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        playpage = self._download_json(re.search(r'data-meta="(.*)"', webpage).group(1), "playurl_%d" % int(video_id))

        title = re.search("<h2.*title=\"(.*)\"", webpage).group(1)  # title
        uploader = re.search(r'personacon-sm".*/>\s*(.*)\s*</a>', webpage).group(1)  # username
        uploader_id = re.search(r'"/ch/(.*)/videos"', webpage).group(1)  # userid
        description = re.search(r'collapse-content linkify mg-top-base break-word">\s*(.*)\s*<button type="button" class="collapse-button', webpage, re.DOTALL).group(1).replace(" <br />", "").replace("<br />", "").replace("\n            ", "")  # description
        # point = re.search(r'fa-bar-chart"></i></span>\s*(.*)p\s*</li>', webpage).group(1).replace(",", "")  # point?
        view_count = int(re.search(r'fa-play"></i></span>\s*(.*)\s*</li>', webpage).group(1).replace(",", ""))  # played
        duration = playpage['stream']['duration']  # duration
        # date = re.search(r'<small>\s*(\d{4}-\d{2}-\d{2}) (\d{1,2}:\d{1,2}:\d{1,2}).*\s*</small>', webpage).group(1).replace("-", "")  # date FIXME-sometimes not working
        # time = re.search(r'<small>\s*(\d{4}-\d{2}-\d{2}) (\d{1,2}:\d{1,2}:\d{1,2}).*\s*</small>', webpage).group(2)  # time FIXME-sometimes not working
        categories = re.search(r'badge-info">(.*)</span>', webpage).group(1)  # categories
        tags = re.findall(r'"/tag/(.*)" class="tag user-added">', webpage)  # tags
        formats = []
        for formatid in playpage['stream']['sources']:
            formats.append({
                'url': playpage['stream']['sources'][formatid]['urls']['mp4_avc'],
                'ext': 'mp4',  # TODO-if file isn't a mp4?
                'format_id': formatid,
                'width': playpage['stream']['width'],
                'height': playpage['stream']['height'],
                'no_resume': True
            })

        subtitles = self.extract_subtitles(playpage, title, video_id, duration, playpage['stream']['width'], playpage['stream']['height'])

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'duration': int(duration),
            'uploader': uploader,
            'uploader_id': uploader_id,
            'view_count': view_count,
            # 'comment_count': comment_count,
            'thumbnail': playpage['poster'],
            'formats': formats,
            'subtitles': subtitles,
            'categories': categories,
            'tags': tags

            # TODO more properties (see youtube_dl/extractor/common.py)
        }
added tvple infoextractor it just working, but it needs code cleaning and user comment(구름) to subtitle converter 2015-11-04 18:14:45 +09:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00			`import re`
added tvple infoextractor it just working, but it needs code cleaning and user comment(구름) to subtitle converter 2015-11-04 18:14:45 +09:00

			`class tvpleIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?P<url>(?:www\.)?tvple\.com/(?P<id>[0-9]+))'`
			`_TEST = {`
			`'url': 'http://tvple.com/311090',`
fixed extractor 2015-12-13 11:12:14 +09:00			`'md5': '46329fca94a29b5517a30d7e88f48dbf',`
added tvple infoextractor it just working, but it needs code cleaning and user comment(구름) to subtitle converter 2015-11-04 18:14:45 +09:00			`'info_dict': {`
			`'id': '311090',`
			`'ext': 'mp4',`
			`'uploader': '[디지털 드럭] 나비붙이',`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00			`'uploader_id': 'jack1609',`
added tvple infoextractor it just working, but it needs code cleaning and user comment(구름) to subtitle converter 2015-11-04 18:14:45 +09:00			`'title': '팜플렛으로 yee를 연주하는 김병만',`
			`'description': '자작입니다. 첫 조교..인가..? 조교라긴 애매하지만, 어쨋든 노래로 만드는 건 이번이 처음입니다.\n원본 영상 출처: https://www.youtube.com/watch?v=E4BPHBL35dE\nyee는 유튜브에 치면 원본 영상이 나오는데 다들 아시죠??? 저작권 문제가 될 경우는 지우겠습니다...\n\n병만로이드라고 불러야 하나??'`
			`# TODO more properties, either as:`
			`# * A value`
			`# * MD5 checksum; start the string with md5:`
			`# * A regular expression; start the string with re:`
			`# * Any Python type (for example int or float)`
			`}`
			`}`

fixed extractor 2015-12-13 11:12:14 +09:00			`def _convert_srt_subtitle(self, json, duration):`
			`sec = []`
			`sub = ""`
			`timecode = []`
			`text = []`
			`for i in json:`
			`sec.append(int(i))`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00
fixed extractor 2015-12-13 11:12:14 +09:00			`sec.sort()`
			`for second in sec:`
			`msec = []`
			`for i in json[unicode(second)]:`
			`msec.append(int(i))`
			`msec.sort()`
			`for millisecond in msec:`
			`timecode.append("%02d:%02d:%02d,%03d" % (second // 60 // 60, second // 60 % 60, second % 60, millisecond))`
			`text.append(json[unicode(second)][unicode(millisecond)].replace('<BR>', '\n').replace(' ', ''))`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00
fixed extractor 2015-12-13 11:12:14 +09:00			`timecode.append("%02d:%02d:%02d,%03d" % (duration // 60 // 60, duration // 60 % 60, duration % 60, int(("%0.3f" % duration)[-3:])))`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00
fixed extractor 2015-12-13 11:12:14 +09:00			`for i in range(1, len(timecode)):`
			`sub += str(i) + '\n' + timecode[i - 1] + ' --> ' + timecode[i] + '\n' + text[i - 1] + '\n\n'`
			`return sub`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00
fixed extractor 2015-12-13 11:12:14 +09:00			`def _convert_ass_cloud(self, json, videoid, title, width, height):`
			`sec = []`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00
fixed extractor 2015-12-13 11:12:14 +09:00			asstemp1 = "[Script Info]\nTitle: %s\nScriptType: v4.00+\nWrapStyle: 0\nPlayResX: %d\nPlayResY: %d\nScaledBorderAndShadow: yes\n\n[V4+ Styles]\nFormat: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\nStyle: Default,Arial,14,&H23FFFFFF,&H000000FF,&HC8000000,&HC8000000,-1,0,0,0,100,100,0,0,1,2,2,5,10,10,10,1\n\n" % (title + '-' + videoid, width, height)

			`for i in json:`
			`if(i != '_warning'):`
			`sec.append(int(i))`

			`sec.sort()`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00
			`asstemp2 = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"`

fixed extractor 2015-12-13 11:12:14 +09:00			`for second in sec:`
			`for subs in json[str(second)]:`
			`timecodea = "%02d:%02d:%02d.00" % (second // 60 // 60, second // 60 % 60, second % 60)`
			`timecodeb = "%02d:%02d:%02d.00" % ((second + 2) // 60 // 60, (second + 2) // 60 % 60, (second + 2) % 60)`
			`asstemp2 += "Dialogue: 0,%s,%s,Default,,0,0,0,,{\\an4\pos(%d,%d)\\fad(0,50)}%s\n" % (timecodea, timecodeb, subs['x'] * width, subs['y'] * height, subs['text'])`

			`return (asstemp1 + asstemp2)`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00
fixed extractor 2015-12-13 11:12:14 +09:00			`def _get_subtitles(self, json, title, videoid, duration, width, height):`
			`subs = {}`
			`subs['tvple'] = []`
			`if json['cloud']['read_url'][0] != '':`
			`subs['tvple'].append({`
			`'ext': 'ass',`
			`'data': self._convert_ass_cloud(self._download_json(json['cloud']['read_url'][0], 'cloud_%d' % int(videoid)), videoid, title, width, height)`
			`})`

			`if json['subtitle'] != '':`
			`subs['tvple'].append({`
			`'ext': 'srt',`
			`'data': self._convert_srt_subtitle(self._download_json(json['subtitle'], 'subtitle_%d' % int(videoid)), duration)`
			`})`

			`return subs`
add subtitle fading like tvple, and little edit for subtitle extractor 2015-12-01 00:02:22 +09:00
added tvple infoextractor it just working, but it needs code cleaning and user comment(구름) to subtitle converter 2015-11-04 18:14:45 +09:00			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`
			`webpage = self._download_webpage(url, video_id)`
fixed extractor 2015-12-13 11:12:14 +09:00			`playpage = self._download_json(re.search(r'data-meta="(.*)"', webpage).group(1), "playurl_%d" % int(video_id))`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00
			`title = re.search("<h2.title=\"(.)\"", webpage).group(1) # title`
			`uploader = re.search(r'personacon-sm"./>\s(.)\s</a>', webpage).group(1) # username`
			`uploader_id = re.search(r'"/ch/(.*)/videos"', webpage).group(1) # userid`
fixed extractor 2015-12-13 11:12:14 +09:00			`description = re.search(r'collapse-content linkify mg-top-base break-word">\s(.)\s*<button type="button" class="collapse-button', webpage, re.DOTALL).group(1).replace(" <br />", "").replace("<br />", "").replace("\n ", "") # description`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00			`# point = re.search(r'fa-bar-chart"></i></span>\s(.)p\s*</li>', webpage).group(1).replace(",", "") # point?`
			`view_count = int(re.search(r'fa-play"></i></span>\s(.)\s*</li>', webpage).group(1).replace(",", "")) # played`
fixed extractor 2015-12-13 11:12:14 +09:00			`duration = playpage['stream']['duration'] # duration`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00			`# date = re.search(r'<small>\s(\d{4}-\d{2}-\d{2}) (\d{1,2}:\d{1,2}:\d{1,2}).\s*</small>', webpage).group(1).replace("-", "") # date FIXME-sometimes not working`
			`# time = re.search(r'<small>\s(\d{4}-\d{2}-\d{2}) (\d{1,2}:\d{1,2}:\d{1,2}).\s*</small>', webpage).group(2) # time FIXME-sometimes not working`
			`categories = re.search(r'badge-info">(.*)</span>', webpage).group(1) # categories`
			`tags = re.findall(r'"/tag/(.*)" class="tag user-added">', webpage) # tags`
fixed extractor 2015-12-13 11:12:14 +09:00			`formats = []`
			`for formatid in playpage['stream']['sources']:`
			`formats.append({`
			`'url': playpage['stream']['sources'][formatid]['urls']['mp4_avc'],`
			`'ext': 'mp4', # TODO-if file isn't a mp4?`
			`'format_id': formatid,`
			`'width': playpage['stream']['width'],`
			`'height': playpage['stream']['height'],`
			`'no_resume': True`
			`})`

			`subtitles = self.extract_subtitles(playpage, title, video_id, duration, playpage['stream']['width'], playpage['stream']['height'])`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00
added tvple infoextractor it just working, but it needs code cleaning and user comment(구름) to subtitle converter 2015-11-04 18:14:45 +09:00			`return {`
			`'id': video_id,`
			`'title': title,`
			`'description': description,`
fixed extractor 2015-12-13 11:12:14 +09:00			`'duration': int(duration),`
added tvple infoextractor it just working, but it needs code cleaning and user comment(구름) to subtitle converter 2015-11-04 18:14:45 +09:00			`'uploader': uploader,`
			`'uploader_id': uploader_id,`
			`'view_count': view_count,`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00			`# 'comment_count': comment_count,`
fixed extractor 2015-12-13 11:12:14 +09:00			`'thumbnail': playpage['poster'],`
added tvple infoextractor it just working, but it needs code cleaning and user comment(구름) to subtitle converter 2015-11-04 18:14:45 +09:00			`'formats': formats,`
add subtitle fading like tvple, and little edit for subtitle extractor 2015-12-01 00:02:22 +09:00			`'subtitles': subtitles,`
fixed tvple downloader, add cloud to subtitle converter, cleaned code now cloud can converted to ass subtitle! 2015-11-08 11:03:01 +09:00			`'categories': categories,`
			`'tags': tags`

added tvple infoextractor it just working, but it needs code cleaning and user comment(구름) to subtitle converter 2015-11-04 18:14:45 +09:00			`# TODO more properties (see youtube_dl/extractor/common.py)`
			`}`