From 7db0a3d1bc3e33972603bad07cb08ec9c273c9d9 Mon Sep 17 00:00:00 2001 From: fnord Date: Wed, 15 Jul 2015 04:29:10 -0500 Subject: [PATCH] dfxp2srt: Fix disappearing words after s ( '... this-goes-missing' ). Ensure trailing whitespace/newlines are not added. --- youtube_dl/utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 68d2f4984..485408baa 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1850,6 +1850,12 @@ def dfxp2srt(dfxp_data): 'ttaf1': 'http://www.w3.org/2006/10/ttaf1', }) + + def text_or_empty(v): + str = str_or_none(v, '') + return '' if not re.search(r'[^\s]',str,re.DOTALL) else str + + def parse_node(node): str_or_empty = functools.partial(str_or_none, default='') @@ -1859,7 +1865,7 @@ def dfxp2srt(dfxp_data): if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'): out += '\n' + str_or_empty(child.tail) elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'): - out += str_or_empty(parse_node(child)) + out += str_or_empty(parse_node(child)) + text_or_empty(child.tail) else: out += str_or_empty(xml.etree.ElementTree.tostring(child))