From 839d9a85c05137b09d266e2db707fb7d52911c27 Mon Sep 17 00:00:00 2001 From: Leonardo Taccari Date: Thu, 30 Aug 2018 20:12:32 +0200 Subject: [PATCH] [internazionale] Fix extraction of non-available-abroad videos Some videos can have a `data-video-available_abroad' attribute that can be "0" or "1" depending if the video is available abroad (if not present it is available abroad). For not available abroad videos (all the ones with data-video-available_abroad="1") video-ita.internazionale.it is used instead of video.internazionale.it. Adjust Internazionale extractor to reflect that and add a test for a not available abroad video. --- youtube_dl/extractor/internazionale.py | 29 +++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/internazionale.py b/youtube_dl/extractor/internazionale.py index 10ba1f6cf..ea1dfc960 100644 --- a/youtube_dl/extractor/internazionale.py +++ b/youtube_dl/extractor/internazionale.py @@ -7,7 +7,7 @@ from ..utils import unified_timestamp class InternazionaleIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood', 'md5': '3e39d32b66882c1218e305acbf8348ca', 'info_dict': { @@ -23,7 +23,23 @@ class InternazionaleIE(InfoExtractor): 'params': { 'format': 'bestvideo', }, - } + }, { + 'url': 'https://www.internazionale.it/video/2018/08/29/telefono-stare-con-noi-stessi', + 'md5': '9db8663704cab73eb972d1cee0082c79', + 'info_dict': { + 'id': '761344', + 'display_id': 'telefono-stare-con-noi-stessi', + 'ext': 'mp4', + 'title': 'Usiamo il telefono per evitare di stare con noi stessi', + 'description': 'md5:75ccfb0d6bcefc6e7428c68b4aa1fe44', + 'timestamp': 1535528954, + 'upload_date': '20180829', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'format': 'bestvideo', + }, + }] def _real_extract(self, url): display_id = self._match_id(url) @@ -40,8 +56,15 @@ class InternazionaleIE(InfoExtractor): DATA_RE % 'job-id', webpage, 'video id', group='value') video_path = self._search_regex( DATA_RE % 'video-path', webpage, 'video path', group='value') + video_available_abroad = bool(int(self._search_regex( + DATA_RE % 'video-available_abroad', webpage, + 'video available aboard', default='1', group='value'))) - video_base = 'https://video.internazionale.it/%s/%s.' % (video_path, video_id) + if video_available_abroad: + video_base = 'https://video.internazionale.it/' + else: + video_base = 'https://video-ita.internazionale.it/' + video_base = video_base + '%s/%s.' % (video_path, video_id) formats = self._extract_m3u8_formats( video_base + 'm3u8', display_id, 'mp4',