From 8fbefa5cf7c418f4aaccb4e4b08b7660f0549954 Mon Sep 17 00:00:00 2001 From: Guillem Vela Date: Thu, 27 Feb 2020 22:18:47 +0100 Subject: [PATCH 1/4] [CCMA] Fix wrong timestamp issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some reason, provided UTC timestamp does not comply ISO8601, as its format is YYYY-DD-MM instead of expected YYYY-MM-DD. This can be checked with the also provided "text" field of emission date object. Example: "data_emissio": { "text": "14/05/2002�21:39", "utc": "2002-14-05T21:39:28+0200" } This commit fixes this behavior. --- youtube_dl/extractor/ccma.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index 544647f92..6a955da84 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -24,8 +24,8 @@ class CCMAIE(InfoExtractor): 'ext': 'mp4', 'title': 'L\'espot de La Marató de TV3', 'description': 'md5:f12987f320e2f6e988e9908e4fe97765', - 'timestamp': 1470918540, - 'upload_date': '20160811', + 'timestamp': 1478608140, + 'upload_date': '20161108', } }, { 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/', @@ -35,8 +35,8 @@ class CCMAIE(InfoExtractor): 'ext': 'mp3', 'title': 'El Consell de Savis analitza el derbi', 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53', - 'upload_date': '20171205', - 'timestamp': 1512507300, + 'upload_date': '20170512', + 'timestamp': 1494622500, } }] @@ -74,7 +74,11 @@ class CCMAIE(InfoExtractor): title = informacio['titol'] durada = informacio.get('durada', {}) duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text')) - timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc')) + + # utc date is in format YYYY-DD-MM + data_utc = informacio.get('data_emissio', {}).get('utc') + data_iso8601 = data_utc[:5] + data_utc[8:10] + '-' + data_utc[5:7] + data_utc[10:] + timestamp = parse_iso8601(data_iso8601) subtitles = {} subtitols = media.get('subtitols', {}) From 69c4e35907db440de005ba356436f98ff60b1672 Mon Sep 17 00:00:00 2001 From: Guillem Vela Date: Thu, 27 Feb 2020 22:22:16 +0100 Subject: [PATCH 2/4] [CCMA] Add test with multiple subtitles Added test is one of the cases of broken compatibility. Issue is in featuring multiple languages in the subtitles field. --- youtube_dl/extractor/ccma.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index 6a955da84..f7f6de5c8 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -27,6 +27,17 @@ class CCMAIE(InfoExtractor): 'timestamp': 1478608140, 'upload_date': '20161108', } + }, { + 'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/', + 'md5': 'b43c3d3486f430f3032b5b160d80cbc3', + 'info_dict': { + 'id': '6031387', + 'ext': 'mp4', + 'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)', + 'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60', + 'timestamp': 1582577700, + 'upload_date': '20200224', + } }, { 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/', 'md5': 'fa3e38f269329a278271276330261425', From 8c60c29d341f9291b81f850e0c4b7b20fb15f96a Mon Sep 17 00:00:00 2001 From: Guillem Vela Date: Thu, 27 Feb 2020 22:27:21 +0100 Subject: [PATCH 3/4] [CCMA] Fix multiple subtitles incompatibility CCMA extractor used to raise an exception when attempting the download of a URL featuring multiple languages in the subtitles. When a single language is available, the field is the expected dict. When multiple languages are available, a list of dicts is provided. This commit fixes this issue. --- youtube_dl/extractor/ccma.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index f7f6de5c8..d4873860a 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -92,12 +92,15 @@ class CCMAIE(InfoExtractor): timestamp = parse_iso8601(data_iso8601) subtitles = {} - subtitols = media.get('subtitols', {}) - if subtitols: - sub_url = subtitols.get('url') + subtitols = media.get('subtitols', []) + # Single language -> dict; multiple languages -> List[dict] + if isinstance(subtitols, dict): + subtitols = [subtitols] + for st in subtitols: + sub_url = st.get('url') if sub_url: subtitles.setdefault( - subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({ + st.get('iso') or 'ca', []).append({ 'url': sub_url, }) From b4a70306209e836d635dc284f13ccda6d9229089 Mon Sep 17 00:00:00 2001 From: Guillem Vela Date: Fri, 28 Feb 2020 00:27:01 +0100 Subject: [PATCH 4/4] [CCMA] Avoid exception when 'utc' is not found --- youtube_dl/extractor/ccma.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index d4873860a..21de10b23 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -88,8 +88,11 @@ class CCMAIE(InfoExtractor): # utc date is in format YYYY-DD-MM data_utc = informacio.get('data_emissio', {}).get('utc') - data_iso8601 = data_utc[:5] + data_utc[8:10] + '-' + data_utc[5:7] + data_utc[10:] - timestamp = parse_iso8601(data_iso8601) + try: + data_iso8601 = data_utc[:5] + data_utc[8:10] + '-' + data_utc[5:7] + data_utc[10:] + timestamp = parse_iso8601(data_iso8601) + except TypeError: + timestamp = None subtitles = {} subtitols = media.get('subtitols', [])