diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py index 6c6ec8d1f..b5ddf8f39 100644 --- a/youtube_dl/extractor/tvnoe.py +++ b/youtube_dl/extractor/tvnoe.py @@ -4,15 +4,21 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( clean_html, + url_or_none, + js_to_json, + try_get +) +from ..compat import ( + compat_str ) class TVNoeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/porad/(?P[-0-9a-z]+)' + _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/porad/(?P[0-9]+).*' _TEST = { 'url': 'https://www.tvnoe.cz/porad/26011-terra-santa-news-13-11-2019', 'info_dict': { - 'id': '26011-terra-santa-news-13-11-2019', + 'id': '26011', 'ext': 'mp4', 'series': 'Terra Santa News', 'title': '13. 11. 2019', @@ -24,14 +30,12 @@ class TVNoeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) formats = [] - hls_url = self._search_regex( - r"\s*src:\s*\'(?Phttps?://[^\']+playlist.m3u8)\',", webpage, 'm3u8', fatal=False) - if hls_url: - dash_url = self._search_regex( - r"\s*src:\s*\'(?Phttps?://[^\']+manifest.mpd)\',", webpage, 'mpd', fatal=False) - else: - dash_url = self._search_regex( - r"\s*src:\s*\'(?Phttps?://[^\']+manifest.mpd)\',", webpage, 'mpd') + json = self._search_regex(r'(?sm)var *INIT_PLAYER *= *(?P[^;]+);', webpage, 'json') + player_data = self._parse_json(json, video_id, js_to_json) + hls_url = url_or_none(try_get(player_data, + lambda x: x['tracks']['HLS'][0]['src'], compat_str)) + dash_url = url_or_none(try_get(player_data, + lambda x: x['tracks']['DASH'][0]['src'], compat_str)) if dash_url: formats.extend(self._extract_mpd_formats( @@ -45,10 +49,12 @@ class TVNoeIE(InfoExtractor): hls_url, video_id, ext='mp4', m3u8_id='hls')) self._sort_formats(formats) + title = clean_html(self._search_regex( - r"

(?P.*)<\/h2>", webpage, 'title')) + r'<h2>(?P<title>.+)<\/h2>', webpage, 'title', fatal=False)) series = clean_html(self._search_regex( - r"<h1>(?P<series>.*)<\/h1>", webpage, 'series')) + r'<h1>(?P<series>.+)<\/h1>', webpage, 'series', fatal=False)) + return { 'id': video_id, 'title': title,