From ad544260716d733de04b41db7a0e5cef7573df43 Mon Sep 17 00:00:00 2001 From: Vincent Olivier Date: Thu, 17 Nov 2016 18:23:20 -0500 Subject: [PATCH] [radiocanada] Extractor Enhancements --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/radiocanada.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9107f0b96..a55db0928 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -738,6 +738,7 @@ from .r7 import ( from .radiocanada import ( RadioCanadaIE, RadioCanadaAudioVideoIE, + RadioCanadaArticleIE ) from .radiode import RadioDeIE from .radiojavan import RadioJavanIE diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 321917ad0..2133c332e 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -14,6 +14,7 @@ from ..utils import ( ExtractorError, determine_protocol, unsmuggle_url, + unescapeHTML ) @@ -168,3 +169,30 @@ class RadioCanadaAudioVideoIE(InfoExtractor): def _real_extract(self, url): return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) + + + +class RadioCanadaArticleIE(InfoExtractor): + 'radiocanada:article' + _VALID_URL = r'https?://ici\.radio-canada\.ca/(?P[^?#&]+)' + + @classmethod + def suitable(cls, url): + return False if RadioCanadaAudioVideoIE.suitable(url) else super(RadioCanadaArticleIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + display_id.replace('/', '-') + + webpage = unescapeHTML(self._download_webpage(url, display_id)) + + + entries = [ + self.url_result( + 'radiocanada:medianet:%s' % mobj.group('id'), + ie=RadioCanadaIE.ie_key(), video_id=mobj.group('id')) + for mobj in re.finditer( + r'\"idMedia\"\s*:\s*\"(?P\d+)\"', webpage)] + + return self.playlist_result(entries, display_id) +