From 2cd20b353aa43ad0b9808fe2989a0a3fbe4f1407 Mon Sep 17 00:00:00 2001 From: kenavera Date: Fri, 6 Apr 2018 11:10:24 +0200 Subject: [PATCH 1/3] [medialaan] Fix clips downloading --- youtube_dl/extractor/medialaan.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/medialaan.py b/youtube_dl/extractor/medialaan.py index 50d5db802..c5da97db3 100644 --- a/youtube_dl/extractor/medialaan.py +++ b/youtube_dl/extractor/medialaan.py @@ -64,8 +64,6 @@ class MedialaanIE(GigyaBaseIE): 'ext': 'mp4', 'title': '"Veronique liegt!"', 'description': 'md5:1385e2b743923afe54ba4adc38476155', - 'timestamp': 1489002029, - 'upload_date': '20170308', 'duration': 96, }, }, { @@ -147,21 +145,14 @@ class MedialaanIE(GigyaBaseIE): # clip, no authentication required if not vod_id: - player = self._parse_json( - self._search_regex( - r'vmmaplayer\(({.+?})\);', webpage, 'vmma player', - default=''), - video_id, transform_source=lambda s: '[%s]' % s, fatal=False) - if player: - video = player[-1] - if video['videoUrl'] in ('http', 'https'): - return self.url_result(video['url'], MedialaanIE.ie_key()) + video = self._parse_json(self._search_regex(r'"video":({.+?}}]})', webpage, 'video', default='{}'), video_id) + metadata = self._parse_json(self._search_regex(r'{"metadata":({.+?})', webpage, 'metadata', default='{}'), video_id) + if video: info = { 'id': video_id, - 'url': video['videoUrl'], - 'title': video['title'], - 'thumbnail': video.get('imageUrl'), - 'timestamp': int_or_none(video.get('createdDate')), + 'url': video.get('formats')[0].get('url'), + 'title': metadata.get('videoTitle'), + 'thumbnail': video.get('poster'), 'duration': int_or_none(video.get('duration')), } else: From c965be955abd104ad7955fc3dcca0e7b8a48febc Mon Sep 17 00:00:00 2001 From: kenavera Date: Mon, 23 Apr 2018 10:34:10 +0200 Subject: [PATCH 2/3] Use drupal settings --- youtube_dl/extractor/medialaan.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/medialaan.py b/youtube_dl/extractor/medialaan.py index c5da97db3..15137f0d8 100644 --- a/youtube_dl/extractor/medialaan.py +++ b/youtube_dl/extractor/medialaan.py @@ -10,6 +10,7 @@ from ..utils import ( parse_duration, try_get, unified_timestamp, + unified_strdate ) @@ -60,11 +61,13 @@ class MedialaanIE(GigyaBaseIE): # clip 'url': 'http://vtm.be/video?aid=168332', 'info_dict': { - 'id': '168332', + 'id': 'vtm_168332', 'ext': 'mp4', 'title': '"Veronique liegt!"', 'description': 'md5:1385e2b743923afe54ba4adc38476155', 'duration': 96, + 'timestamp': 1489002029, + 'upload_date': '20170308', }, }, { # vod @@ -145,15 +148,31 @@ class MedialaanIE(GigyaBaseIE): # clip, no authentication required if not vod_id: - video = self._parse_json(self._search_regex(r'"video":({.+?}}]})', webpage, 'video', default='{}'), video_id) - metadata = self._parse_json(self._search_regex(r'{"metadata":({.+?})', webpage, 'metadata', default='{}'), video_id) + settings = self._parse_json( + self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings', default='{}'), + video_id) + + if not re.match(video_id, 'vtm_[0-9]*'): + video_id = settings.get('medialaan_player').keys()[0] + config = try_get(settings, lambda x: x['medialaan_player'][video_id]['videoConfig'], None) + + video = config.get('video') + metadata = config.get('tracking').get('metadata') + pubdate = metadata.get('pubDate') or self._search_regex( + r'"%s"\s*:\s*"([^"]+)' % 'pubDate', webpage, 'pubDate', + default=None) + if video: info = { 'id': video_id, 'url': video.get('formats')[0].get('url'), 'title': metadata.get('videoTitle'), 'thumbnail': video.get('poster'), + 'timestamp': unified_timestamp(pubdate), 'duration': int_or_none(video.get('duration')), + 'upload_date': unified_strdate(pubdate) } else: info = self._parse_html5_media_entries( From 22547719965a69da34c072b17ff662e2a0320122 Mon Sep 17 00:00:00 2001 From: kenavera Date: Fri, 18 May 2018 10:50:45 +0200 Subject: [PATCH 3/3] Fix mandatory feature extraction --- youtube_dl/extractor/medialaan.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/medialaan.py b/youtube_dl/extractor/medialaan.py index 15137f0d8..10ae53420 100644 --- a/youtube_dl/extractor/medialaan.py +++ b/youtube_dl/extractor/medialaan.py @@ -167,8 +167,8 @@ class MedialaanIE(GigyaBaseIE): if video: info = { 'id': video_id, - 'url': video.get('formats')[0].get('url'), - 'title': metadata.get('videoTitle'), + 'formats': video.get('formats'), + 'title': metadata.get('videoTitle') or self._og_search_title(webpage), 'thumbnail': video.get('poster'), 'timestamp': unified_timestamp(pubdate), 'duration': int_or_none(video.get('duration')),