From 866fffce403d9b858e6d56e15e8da1d9b90a072e Mon Sep 17 00:00:00 2001 From: james Date: Sat, 17 Jun 2017 17:15:41 +0200 Subject: [PATCH 1/4] Support for rai livestreams --- youtube_dl/extractor/rai.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 81eb9db85..d9a0d3c30 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -118,7 +118,7 @@ class RaiBaseIE(InfoExtractor): class RaiPlayIE(RaiBaseIE): - _VALID_URL = r'(?Phttps?://(?:www\.)?raiplay\.it/.+?-(?P%s)\.html)' % RaiBaseIE._UUID_RE + _VALID_URL = r'(?Phttps?://(?:www\.)?raiplay\.it/(?:dirette/.*)?(?:.+?-(?=[\da-f]{8})(?P%s)\.html)?)' % RaiBaseIE._UUID_RE _TESTS = [{ 'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter', 'md5': '340aa3b7afb54bfd14a8c11786450d76', @@ -136,7 +136,7 @@ class RaiPlayIE(RaiBaseIE): 'upload_date': '20161029', 'series': 'La Casa Bianca', 'season': '2016', - }, + } }, { 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 'md5': '8970abf8caf8aef4696e7b1f2adfc696', @@ -156,16 +156,26 @@ class RaiPlayIE(RaiBaseIE): }, 'params': { 'skip_download': True, - }, + } }, { 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', 'only_matching': True, + }, { + 'url': 'http://www.raiplay.it/dirette/rai3', + 'only_matching': True, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) url, video_id = mobj.group('url', 'id') + # Support for livestreams: downloads the page and retrieves ContentItem id + if video_id is None and 'dirette' in url: + webpage = self._download_webpage(url, video_id) + re_id = r']*)data-uniquename=(["\'])[\w-]*(?P%s)(\2)([^>]*?)>' % RaiBaseIE._UUID_RE + video_id = self._html_search_regex(re_id, webpage, 'livestream-id', group='id') + url = 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id + media = self._download_json( '%s?json' % url, video_id, 'Downloading video JSON') From 7d85663a85abbcc7f1704987937ebdb82bda3cc4 Mon Sep 17 00:00:00 2001 From: james Date: Sun, 18 Jun 2017 18:28:57 +0200 Subject: [PATCH 2/4] Add support in separate extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/rai.py | 65 +++++++++++++++++++++++++----- 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7e45232dd..2d753dedb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -820,6 +820,7 @@ from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE from .rai import ( RaiPlayIE, + RaiPlayLiveIE, RaiIE, ) from .rbmaradio import RBMARadioIE diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index d9a0d3c30..3bc61a814 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -118,7 +118,7 @@ class RaiBaseIE(InfoExtractor): class RaiPlayIE(RaiBaseIE): - _VALID_URL = r'(?Phttps?://(?:www\.)?raiplay\.it/(?:dirette/.*)?(?:.+?-(?=[\da-f]{8})(?P%s)\.html)?)' % RaiBaseIE._UUID_RE + _VALID_URL = r'(?Phttps?://(?:www\.)?raiplay\.it/.+?-(?P%s)\.html)' % RaiBaseIE._UUID_RE _TESTS = [{ 'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter', 'md5': '340aa3b7afb54bfd14a8c11786450d76', @@ -160,22 +160,12 @@ class RaiPlayIE(RaiBaseIE): }, { 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', 'only_matching': True, - }, { - 'url': 'http://www.raiplay.it/dirette/rai3', - 'only_matching': True, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) url, video_id = mobj.group('url', 'id') - # Support for livestreams: downloads the page and retrieves ContentItem id - if video_id is None and 'dirette' in url: - webpage = self._download_webpage(url, video_id) - re_id = r']*)data-uniquename=(["\'])[\w-]*(?P%s)(\2)([^>]*?)>' % RaiBaseIE._UUID_RE - video_id = self._html_search_regex(re_id, webpage, 'livestream-id', group='id') - url = 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id - media = self._download_json( '%s?json' % url, video_id, 'Downloading video JSON') @@ -218,7 +208,60 @@ class RaiPlayIE(RaiBaseIE): } info.update(relinker_info) + return info + +class RaiPlayLiveIE(RaiBaseIE): + _VALID_URL = r'(?Phttps?://(?:www\.)?raiplay\.it/dirette/(?P\w*))' + _TEST = { + 'url': 'http://www.raiplay.it/dirette/rai3', + 'only_matching': True, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + url, channel = mobj.group('url', 'id') + + webpage = self._download_webpage(url, channel) + re_id = r']*)data-uniquename=(["\'])[\w-]*(?P%s)(\2)([^>]*?)>' % RaiBaseIE._UUID_RE + video_id = self._html_search_regex(re_id, webpage, 'livestream-id', group='id') + url = 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id + + media = self._download_json( + '%s?json' % url, video_id, 'Downloading video JSON') + + title = media['name'] + video = media['video'] + + relinker_info = self._extract_relinker_info(video['contentUrl'], video_id) + self._sort_formats(relinker_info['formats']) + + thumbnails = [] + if 'images' in media: + for _, value in media.get('images').items(): + if value: + thumbnails.append({ + 'url': value.replace('[RESOLUTION]', '600x400') + }) + + timestamp = unified_timestamp(try_get( + media, lambda x: x['availabilities'][0]['start'], compat_str)) + + subtitles = self._extract_subtitles(url, video.get('subtitles')) + + info = { + 'id': video_id, + 'title': title, + 'alt_title': media.get('subtitle'), + 'description': media.get('description'), + 'uploader': media.get('channel'), + 'creator': media.get('editor'), + 'timestamp': timestamp, + 'thumbnails': thumbnails, + 'subtitles': subtitles, + } + + info.update(relinker_info) return info From 0a3a0abb4b143f025634b4f41566b9008fbea910 Mon Sep 17 00:00:00 2001 From: james Date: Tue, 20 Jun 2017 17:40:12 +0200 Subject: [PATCH 3/4] Delegate to RaiPlayIE --- youtube_dl/extractor/rai.py | 40 ++++--------------------------------- 1 file changed, 4 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 3bc61a814..7a8c16ef9 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -225,45 +225,13 @@ class RaiPlayLiveIE(RaiBaseIE): webpage = self._download_webpage(url, channel) re_id = r']*)data-uniquename=(["\'])[\w-]*(?P%s)(\2)([^>]*?)>' % RaiBaseIE._UUID_RE video_id = self._html_search_regex(re_id, webpage, 'livestream-id', group='id') - url = 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id - media = self._download_json( - '%s?json' % url, video_id, 'Downloading video JSON') - - title = media['name'] - video = media['video'] - - relinker_info = self._extract_relinker_info(video['contentUrl'], video_id) - self._sort_formats(relinker_info['formats']) - - thumbnails = [] - if 'images' in media: - for _, value in media.get('images').items(): - if value: - thumbnails.append({ - 'url': value.replace('[RESOLUTION]', '600x400') - }) - - timestamp = unified_timestamp(try_get( - media, lambda x: x['availabilities'][0]['start'], compat_str)) - - subtitles = self._extract_subtitles(url, video.get('subtitles')) - - info = { - 'id': video_id, - 'title': title, - 'alt_title': media.get('subtitle'), - 'description': media.get('description'), - 'uploader': media.get('channel'), - 'creator': media.get('editor'), - 'timestamp': timestamp, - 'thumbnails': thumbnails, - 'subtitles': subtitles, + return { + '_type': 'url_transparent', + 'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id, + 'ie_key': RaiPlayIE.ie_key() } - info.update(relinker_info) - return info - class RaiIE(RaiBaseIE): _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE From 9fbbbe3ed6a51d20a02f9721ca7b6c5a829f677a Mon Sep 17 00:00:00 2001 From: james Date: Fri, 23 Jun 2017 21:02:16 +0200 Subject: [PATCH 4/4] Add changes as requested --- youtube_dl/extractor/rai.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 7a8c16ef9..ed15a5f10 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -136,7 +136,7 @@ class RaiPlayIE(RaiBaseIE): 'upload_date': '20161029', 'series': 'La Casa Bianca', 'season': '2016', - } + }, }, { 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 'md5': '8970abf8caf8aef4696e7b1f2adfc696', @@ -156,7 +156,7 @@ class RaiPlayIE(RaiBaseIE): }, 'params': { 'skip_download': True, - } + }, }, { 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', 'only_matching': True, @@ -212,25 +212,21 @@ class RaiPlayIE(RaiBaseIE): class RaiPlayLiveIE(RaiBaseIE): - _VALID_URL = r'(?Phttps?://(?:www\.)?raiplay\.it/dirette/(?P\w*))' + _VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P\w*)' _TEST = { 'url': 'http://www.raiplay.it/dirette/rai3', 'only_matching': True, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - url, channel = mobj.group('url', 'id') + channel = self._match_id(url) webpage = self._download_webpage(url, channel) re_id = r']*)data-uniquename=(["\'])[\w-]*(?P%s)(\2)([^>]*?)>' % RaiBaseIE._UUID_RE video_id = self._html_search_regex(re_id, webpage, 'livestream-id', group='id') - return { - '_type': 'url_transparent', - 'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id, - 'ie_key': RaiPlayIE.ie_key() - } + return self.url_result('http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id, + RaiPlayIE.ie_key(), video_id) class RaiIE(RaiBaseIE):