From 8c7d6e8e2279beccf638cd0fae9d91876e0486b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 4 Apr 2016 20:44:06 +0200 Subject: [PATCH 1/2] [zdf] Extract subtitles (closes #9081) --- youtube_dl/extractor/zdf.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 81c22a627..2ef177275 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -85,6 +85,13 @@ class ZDFIE(InfoExtractor): uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader') uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id') upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date')) + subtitles = {} + captions_url = doc.find('.//caption/url') + if captions_url is not None: + subtitles['de'] = [{ + 'url': captions_url.text, + 'ext': 'ttml', + }] def xml_to_thumbnails(fnode): thumbnails = [] @@ -190,6 +197,7 @@ class ZDFIE(InfoExtractor): 'uploader_id': uploader_id, 'upload_date': upload_date, 'formats': formats, + 'subtitles': subtitles, } def _real_extract(self, url): From 5bf28d7864d83be98233b6d1e478d7911f99e2cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 4 Apr 2016 20:46:35 +0200 Subject: [PATCH 2/2] [utils] dfxp2srt: add additional namespace Used by the ZDF subtitles (#9081). --- youtube_dl/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 5c4ab2748..8e53962c9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2131,6 +2131,7 @@ def dfxp2srt(dfxp_data): _x = functools.partial(xpath_with_ns, ns_map={ 'ttml': 'http://www.w3.org/ns/ttml', 'ttaf1': 'http://www.w3.org/2006/10/ttaf1', + 'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1', }) class TTMLPElementParser(object): @@ -2157,7 +2158,7 @@ def dfxp2srt(dfxp_data): dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) out = [] - paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p') + paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall(_x('.//ttaf1_0604:p')) or dfxp.findall('.//p') if not paras: raise ValueError('Invalid dfxp/TTML subtitle')