diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 81c22a627..2ef177275 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -85,6 +85,13 @@ class ZDFIE(InfoExtractor): uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader') uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id') upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date')) + subtitles = {} + captions_url = doc.find('.//caption/url') + if captions_url is not None: + subtitles['de'] = [{ + 'url': captions_url.text, + 'ext': 'ttml', + }] def xml_to_thumbnails(fnode): thumbnails = [] @@ -190,6 +197,7 @@ class ZDFIE(InfoExtractor): 'uploader_id': uploader_id, 'upload_date': upload_date, 'formats': formats, + 'subtitles': subtitles, } def _real_extract(self, url): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 117229e53..7dc18f245 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2133,6 +2133,7 @@ def dfxp2srt(dfxp_data): _x = functools.partial(xpath_with_ns, ns_map={ 'ttml': 'http://www.w3.org/ns/ttml', 'ttaf1': 'http://www.w3.org/2006/10/ttaf1', + 'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1', }) class TTMLPElementParser(object): @@ -2159,7 +2160,7 @@ def dfxp2srt(dfxp_data): dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) out = [] - paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p') + paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall(_x('.//ttaf1_0604:p')) or dfxp.findall('.//p') if not paras: raise ValueError('Invalid dfxp/TTML subtitle')