added support for decko.ceskatelevize.cz to CeskaTelevize IE

This commit is contained in:
Ondrej Zara 2017-04-11 20:42:37 +02:00
parent 1730878167
commit c4ad62274d
2 changed files with 70 additions and 16 deletions

View File

@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
compat_urllib_parse_urlencode
)
from ..utils import (
ExtractorError,
@ -19,7 +20,14 @@ from ..utils import (
class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_VALID_URL = r'''(?x)
https?://
(?:
(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*
|decko.ceskatelevize.cz/video/
)
(?P<id>[^/#?]+)
'''
_TESTS = [{
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
'info_dict': {
@ -65,6 +73,14 @@ class CeskaTelevizeIE(InfoExtractor):
}, {
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
'only_matching': True,
}, {
'url': 'http://decko.ceskatelevize.cz/video/213543116070004',
'info_dict': {
'id': '61924494877085121',
'ext': 'mp4',
'description': 'Internetové hřiště České televize pro malé i velké děti.',
'title': 'Déčko'
}
}]
def _real_extract(self, url):
@ -78,23 +94,29 @@ class CeskaTelevizeIE(InfoExtractor):
type_ = None
episode_id = None
is_decko = "decko.ceskatelevize.cz" in url
playlist = self._parse_json(
self._search_regex(
r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
default='{}'), playlist_id)
if playlist:
type_ = playlist.get('type')
episode_id = playlist.get('id')
if is_decko:
type_ = "episode"
episode_id = compat_urllib_parse_unquote(playlist_id)
episode_id = episode_id.replace(" ", "").replace("_", "")
else:
playlist = self._parse_json(
self._search_regex(
r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
default='{}'), playlist_id)
if playlist:
type_ = playlist.get('type')
episode_id = playlist.get('id')
if not type_:
type_ = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
webpage, 'type')
if not episode_id:
episode_id = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
webpage, 'episode_id')
if not type_:
type_ = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
webpage, 'type')
if not episode_id:
episode_id = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
webpage, 'episode_id')
data = {
'playlist[0][type]': type_,
@ -277,3 +299,34 @@ class CeskaTelevizePoradyIE(InfoExtractor):
webpage, 'iframe player url', group='url'))
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
class CeskaTelevizeDeckoIE(InfoExtractor):
_VALID_URL = r'https?://decko.ceskatelevize.cz/(?P<id>[a-z-]+)$'
_TEST = {
'url': 'http://decko.ceskatelevize.cz/nejmensi-slon-na-svete',
'playlist_count': 13
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
idec = self._html_search_regex(r'var\s+IDEC\s+=\s+\'(.+?)\'', webpage, 'IDEC')
args = compat_urllib_parse_urlencode({"IDEC":idec})
url = "http://decko.ceskatelevize.cz/rest/Programme/relatedVideosForEpisode?" + args
json = self._download_json(url, video_id)
episodes = json.get("episodes", [])
entries = []
for episode in episodes:
idec = episode.get("episode", {}).get("IDEC")
idec = idec.replace(" ", "").replace("/", "")
url = "http://decko.ceskatelevize.cz/video/" + idec
entries.append(self.url_result(url))
return {
'_type': 'playlist',
'entries': entries
}

View File

@ -168,6 +168,7 @@ from .cda import CDAIE
from .ceskatelevize import (
CeskaTelevizeIE,
CeskaTelevizePoradyIE,
CeskaTelevizeDeckoIE
)
from .channel9 import Channel9IE
from .charlierose import CharlieRoseIE