From 839d9a85c05137b09d266e2db707fb7d52911c27 Mon Sep 17 00:00:00 2001 From: Leonardo Taccari Date: Thu, 30 Aug 2018 20:12:32 +0200 Subject: [PATCH 1/2] [internazionale] Fix extraction of non-available-abroad videos Some videos can have a `data-video-available_abroad' attribute that can be "0" or "1" depending if the video is available abroad (if not present it is available abroad). For not available abroad videos (all the ones with data-video-available_abroad="1") video-ita.internazionale.it is used instead of video.internazionale.it. Adjust Internazionale extractor to reflect that and add a test for a not available abroad video. --- youtube_dl/extractor/internazionale.py | 29 +++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/internazionale.py b/youtube_dl/extractor/internazionale.py index 10ba1f6cf..ea1dfc960 100644 --- a/youtube_dl/extractor/internazionale.py +++ b/youtube_dl/extractor/internazionale.py @@ -7,7 +7,7 @@ from ..utils import unified_timestamp class InternazionaleIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood', 'md5': '3e39d32b66882c1218e305acbf8348ca', 'info_dict': { @@ -23,7 +23,23 @@ class InternazionaleIE(InfoExtractor): 'params': { 'format': 'bestvideo', }, - } + }, { + 'url': 'https://www.internazionale.it/video/2018/08/29/telefono-stare-con-noi-stessi', + 'md5': '9db8663704cab73eb972d1cee0082c79', + 'info_dict': { + 'id': '761344', + 'display_id': 'telefono-stare-con-noi-stessi', + 'ext': 'mp4', + 'title': 'Usiamo il telefono per evitare di stare con noi stessi', + 'description': 'md5:75ccfb0d6bcefc6e7428c68b4aa1fe44', + 'timestamp': 1535528954, + 'upload_date': '20180829', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'format': 'bestvideo', + }, + }] def _real_extract(self, url): display_id = self._match_id(url) @@ -40,8 +56,15 @@ class InternazionaleIE(InfoExtractor): DATA_RE % 'job-id', webpage, 'video id', group='value') video_path = self._search_regex( DATA_RE % 'video-path', webpage, 'video path', group='value') + video_available_abroad = bool(int(self._search_regex( + DATA_RE % 'video-available_abroad', webpage, + 'video available aboard', default='1', group='value'))) - video_base = 'https://video.internazionale.it/%s/%s.' % (video_path, video_id) + if video_available_abroad: + video_base = 'https://video.internazionale.it/' + else: + video_base = 'https://video-ita.internazionale.it/' + video_base = video_base + '%s/%s.' % (video_path, video_id) formats = self._extract_m3u8_formats( video_base + 'm3u8', display_id, 'mp4', From b3b271a0b126c973886fa1d8677d2f247add82b3 Mon Sep 17 00:00:00 2001 From: Leonardo Taccari Date: Thu, 30 Aug 2018 21:08:30 +0200 Subject: [PATCH 2/2] [internazionale] Avoid bool(int(...)) casting and code duplication (Hopefully) based on what @dstftw suggested. It is better to avoid bool(int(...)) because it is fragile. Avoid code duplication in video_base initialization. --- youtube_dl/extractor/internazionale.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/internazionale.py b/youtube_dl/extractor/internazionale.py index ea1dfc960..676e8e269 100644 --- a/youtube_dl/extractor/internazionale.py +++ b/youtube_dl/extractor/internazionale.py @@ -56,15 +56,13 @@ class InternazionaleIE(InfoExtractor): DATA_RE % 'job-id', webpage, 'video id', group='value') video_path = self._search_regex( DATA_RE % 'video-path', webpage, 'video path', group='value') - video_available_abroad = bool(int(self._search_regex( + video_available_abroad = self._search_regex( DATA_RE % 'video-available_abroad', webpage, - 'video available aboard', default='1', group='value'))) + 'video available aboard', default='1', group='value') + video_available_abroad = video_available_abroad == '1' - if video_available_abroad: - video_base = 'https://video.internazionale.it/' - else: - video_base = 'https://video-ita.internazionale.it/' - video_base = video_base + '%s/%s.' % (video_path, video_id) + video_base = 'https://video%s.internazionale.it/%s/%s.' % \ + ('' if video_available_abroad else '-ita', video_path, video_id) formats = self._extract_m3u8_formats( video_base + 'm3u8', display_id, 'mp4',