From c4ad62274d29c30b5e59b5d27c05e8905a4a3770 Mon Sep 17 00:00:00 2001 From: Ondrej Zara Date: Tue, 11 Apr 2017 20:42:37 +0200 Subject: [PATCH] added support for decko.ceskatelevize.cz to CeskaTelevize IE --- youtube_dl/extractor/ceskatelevize.py | 85 ++++++++++++++++++++++----- youtube_dl/extractor/extractors.py | 1 + 2 files changed, 70 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index e250de18c..2041a44f9 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..compat import ( compat_urllib_parse_unquote, compat_urllib_parse_urlparse, + compat_urllib_parse_urlencode ) from ..utils import ( ExtractorError, @@ -19,7 +20,14 @@ from ..utils import ( class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P[^/#?]+)' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)* + |decko.ceskatelevize.cz/video/ + ) + (?P[^/#?]+) + ''' _TESTS = [{ 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 'info_dict': { @@ -65,6 +73,14 @@ class CeskaTelevizeIE(InfoExtractor): }, { 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', 'only_matching': True, + }, { + 'url': 'http://decko.ceskatelevize.cz/video/213543116070004', + 'info_dict': { + 'id': '61924494877085121', + 'ext': 'mp4', + 'description': 'Internetové hřiště České televize pro malé i velké děti.', + 'title': 'Déčko' + } }] def _real_extract(self, url): @@ -78,23 +94,29 @@ class CeskaTelevizeIE(InfoExtractor): type_ = None episode_id = None + is_decko = "decko.ceskatelevize.cz" in url - playlist = self._parse_json( - self._search_regex( - r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist', - default='{}'), playlist_id) - if playlist: - type_ = playlist.get('type') - episode_id = playlist.get('id') + if is_decko: + type_ = "episode" + episode_id = compat_urllib_parse_unquote(playlist_id) + episode_id = episode_id.replace(" ", "").replace("_", "") + else: + playlist = self._parse_json( + self._search_regex( + r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist', + default='{}'), playlist_id) + if playlist: + type_ = playlist.get('type') + episode_id = playlist.get('id') - if not type_: - type_ = self._html_search_regex( - r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', - webpage, 'type') - if not episode_id: - episode_id = self._html_search_regex( - r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', - webpage, 'episode_id') + if not type_: + type_ = self._html_search_regex( + r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', + webpage, 'type') + if not episode_id: + episode_id = self._html_search_regex( + r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', + webpage, 'episode_id') data = { 'playlist[0][type]': type_, @@ -277,3 +299,34 @@ class CeskaTelevizePoradyIE(InfoExtractor): webpage, 'iframe player url', group='url')) return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) + + +class CeskaTelevizeDeckoIE(InfoExtractor): + _VALID_URL = r'https?://decko.ceskatelevize.cz/(?P[a-z-]+)$' + _TEST = { + 'url': 'http://decko.ceskatelevize.cz/nejmensi-slon-na-svete', + 'playlist_count': 13 + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + idec = self._html_search_regex(r'var\s+IDEC\s+=\s+\'(.+?)\'', webpage, 'IDEC') + + args = compat_urllib_parse_urlencode({"IDEC":idec}) + url = "http://decko.ceskatelevize.cz/rest/Programme/relatedVideosForEpisode?" + args + json = self._download_json(url, video_id) + episodes = json.get("episodes", []) + + entries = [] + for episode in episodes: + idec = episode.get("episode", {}).get("IDEC") + idec = idec.replace(" ", "").replace("/", "") + url = "http://decko.ceskatelevize.cz/video/" + idec + entries.append(self.url_result(url)) + + return { + '_type': 'playlist', + 'entries': entries + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 68e1a5cfc..4f14f4a15 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -168,6 +168,7 @@ from .cda import CDAIE from .ceskatelevize import ( CeskaTelevizeIE, CeskaTelevizePoradyIE, + CeskaTelevizeDeckoIE ) from .channel9 import Channel9IE from .charlierose import CharlieRoseIE