[utils] Add support for DCSubtitle

This commit is contained in:
Déstin Reed 2016-08-20 15:40:02 +02:00
parent 39e1c4f08c
commit 4a27e3da81
3 changed files with 84 additions and 0 deletions

View File

@ -79,6 +79,7 @@ from youtube_dl.utils import (
match_str,
parse_dfxp_time_expr,
dfxp2srt,
dc2srt,
cli_option,
cli_valueless_option,
cli_bool_option,
@ -970,6 +971,38 @@ The first line
'''
self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
def test_dc2srt(self):
dc_data = '''<?xml version="1.0" encoding="UTF-8"?>
<DCSubtitle Version="1.0">
<SubtitleID>id</SubtitleID>
<MovieTitle>title</MovieTitle>
<ReelNumber>1</ReelNumber>
<Language>English</Language>
<Font Italic="no">
<Subtitle SpotNumber="1" TimeIn="00:00:05:000" TimeOut="00:00:08:357" FadeUpTime="20" FadeDownTime="20">
<Text Direction="horizontal" HAlign="center" HPosition="0.0" VAlign="bottom" VPosition="14.0">^_^</Text>
<Text Direction="horizontal" HAlign="center" HPosition="0.0" VAlign="bottom" VPosition="6.0">second line</Text>
</Subtitle>
<Subtitle SpotNumber="2" TimeIn="00:00:08:357" TimeOut="00:00:09:000" FadeUpTime="20" FadeDownTime="20">
<Text Direction="horizontal" HAlign="center" HPosition="0.0" VAlign="bottom" VPosition="6.0">single line</Text>
</Subtitle>
</Font>
</DCSubtitle>'''
srt_data = '''1
00:00:05,000 --> 00:00:08,356
^_^
second line
2
00:00:08,356 --> 00:00:09,000
single line
'''
self.assertEqual(dc2srt(dc_data), srt_data)
def test_cli_option(self):
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])

View File

@ -21,6 +21,7 @@ from ..utils import (
shell_quote,
subtitles_filename,
dfxp2srt,
dc2srt,
ISO639Utils,
)
@ -568,6 +569,30 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
continue
else:
sub_filenames.append(srt_file)
# TODO: Distinguish between different xml-formats
elif ext == 'xml':
self._downloader.report_warning(
'You have requested to convert DC (XML) subtitles into another format, '
'which results in style information loss')
dc_file = old_file
srt_file = subtitles_filename(filename, lang, 'srt')
with io.open(dc_file, 'rt', encoding='utf-8') as f:
srt_data = dc2srt(f.read())
with io.open(srt_file, 'wt', encoding='utf-8') as f:
f.write(srt_data)
old_file = srt_file
subs[lang] = {
'ext': 'srt',
'data': srt_data,
}
if new_ext == 'srt':
continue
sub_filenames.append(srt_file)
self.run_ffmpeg(old_file, new_file, ['-f', new_format])

View File

@ -2432,6 +2432,32 @@ def dfxp2srt(dfxp_data):
return ''.join(out)
# See https://web.archive.org/web/20140924175755/http://www.dlp.com/downloads/pdf_dlp_cinema_CineCanvas_Rev_C.pdf
def dc2srt(xml_data):
xml = compat_etree_fromstring(xml_data.encode('utf-8'))
out = []
subtitles = xml.find('Font').findall('Subtitle')
if not subtitles:
raise ValueError('Invalid DC/XML subtitle')
for subtitle, index in zip(subtitles, itertools.count(1)):
begin_time = parse_dfxp_time_expr(subtitle.attrib.get('TimeIn'))
end_time = parse_dfxp_time_expr(subtitle.attrib.get('TimeOut'))
if not begin_time or not end_time:
continue
text = ''
for line in subtitle.findall('Text'):
text += line.text + '\n'
out.append('%d\n%s --> %s\n%s\n\n' % (
index,
srt_subtitles_timecode(begin_time),
srt_subtitles_timecode(end_time),
text))
return ''.join(out)
def cli_option(params, command_option, param):
param = params.get(param)