From 8f347dcc02f4263b00f4b3a171ce33080d4356b6 Mon Sep 17 00:00:00 2001 From: tamas Date: Thu, 6 Sep 2018 23:02:28 +0200 Subject: [PATCH 1/5] first commit for mediaklikk.hu extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/mediaklikk.py | 62 ++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 youtube_dl/extractor/mediaklikk.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 995af9988..0c9fcec0a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -607,6 +607,7 @@ from .markiza import ( from .massengeschmacktv import MassengeschmackTVIE from .matchtv import MatchTVIE from .mdr import MDRIE +from .mediaklikk import MediaKlikkIE from .mediaset import MediasetIE from .mediasite import MediasiteIE from .medici import MediciIE diff --git a/youtube_dl/extractor/mediaklikk.py b/youtube_dl/extractor/mediaklikk.py new file mode 100644 index 000000000..8646591b6 --- /dev/null +++ b/youtube_dl/extractor/mediaklikk.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_unquote +) + + +class MediaKlikkIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?mediaklikk\.hu/video/(?:[^/]+/)' + _TESTS = [ + { + 'url': 'https://www.mediaklikk.hu/video/az-evszakok-buvoleteben-osz/', + 'info_dict': { + 'id': '2512015', + 'title': 'Az évszakok bűvöletében, Ősz', + 'series': 'Az évszakok bűvöletében', + 'ext': 'mp4' + } + }, + { + 'url': 'https://www.mediaklikk.hu/video/sporthirado-350-resz/', + 'info_dict': { + 'id': '2523053', + 'title': 'Sporthíradó, 350. rész', + 'series': 'Sporthíradó', + 'ext': 'mp4' + } + }, + ] + + def _real_extract(self, url): + webpage = self._download_webpage(url, + None, + note='Fetching page') + pattern = r"mtva_player_manager\.player\(document.getElementById\(.*\),(.*)\);" + info_json = self._html_search_regex(pattern, webpage, 'info_json') + info_meta = self._parse_json(compat_urllib_parse_unquote(info_json), + None) + video_id = str(info_meta['contentId']).decode('utf-8') + info_ret = { + '_type': 'video', + 'title': info_meta['title'], + 'ext': 'mp4', + 'id': video_id + } + if 'series' in info_meta: + info_ret['series'] = info_meta['series'] + info_meta['video'] = info_meta['token'] + del info_meta['token'] + playerpage = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, note='Downloading player page', query=info_meta) + pattern = r"\"file\": \"(.*)\"," + playlist_url = 'https:' + compat_urllib_parse_unquote( + self._html_search_regex(pattern, playerpage, 'playlist_url'))\ + .replace('\\/', '/') + formats = self._extract_wowza_formats( + playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash']) + self._sort_formats(formats) + info_ret['formats'] = formats + return info_ret From d72a1263cf090832a54d0fa336fb6b542ed12156 Mon Sep 17 00:00:00 2001 From: tamas Date: Thu, 6 Sep 2018 23:10:26 +0200 Subject: [PATCH 2/5] title fallback --- youtube_dl/extractor/mediaklikk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mediaklikk.py b/youtube_dl/extractor/mediaklikk.py index 8646591b6..35a83403d 100644 --- a/youtube_dl/extractor/mediaklikk.py +++ b/youtube_dl/extractor/mediaklikk.py @@ -42,7 +42,7 @@ class MediaKlikkIE(InfoExtractor): video_id = str(info_meta['contentId']).decode('utf-8') info_ret = { '_type': 'video', - 'title': info_meta['title'], + 'title': info_meta.get('title') or self._og_search_title(webpage), 'ext': 'mp4', 'id': video_id } From 099b65e2681805daa40193743a69295d4fb68e05 Mon Sep 17 00:00:00 2001 From: tamas Date: Thu, 6 Sep 2018 23:31:55 +0200 Subject: [PATCH 3/5] python3 compatibility fix --- youtube_dl/extractor/mediaklikk.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mediaklikk.py b/youtube_dl/extractor/mediaklikk.py index 35a83403d..f95974ce4 100644 --- a/youtube_dl/extractor/mediaklikk.py +++ b/youtube_dl/extractor/mediaklikk.py @@ -39,7 +39,9 @@ class MediaKlikkIE(InfoExtractor): info_json = self._html_search_regex(pattern, webpage, 'info_json') info_meta = self._parse_json(compat_urllib_parse_unquote(info_json), None) - video_id = str(info_meta['contentId']).decode('utf-8') + video_id = str(info_meta['contentId']) + if type(video_id) == bytes: + video_id = video_id.decode('utf-8') info_ret = { '_type': 'video', 'title': info_meta.get('title') or self._og_search_title(webpage), From 9184b056d0a8f0cad3ce2fb0418b1d86750c38f1 Mon Sep 17 00:00:00 2001 From: tamas Date: Thu, 6 Sep 2018 23:34:42 +0200 Subject: [PATCH 4/5] removed unused import --- youtube_dl/extractor/mediaklikk.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/mediaklikk.py b/youtube_dl/extractor/mediaklikk.py index f95974ce4..4afb53eb4 100644 --- a/youtube_dl/extractor/mediaklikk.py +++ b/youtube_dl/extractor/mediaklikk.py @@ -1,6 +1,5 @@ # coding: utf-8 from __future__ import unicode_literals -import re from .common import InfoExtractor from ..compat import ( From d548fbd5728aa24dcb8e34a215d4d952e1367a37 Mon Sep 17 00:00:00 2001 From: tamas Date: Sat, 8 Sep 2018 22:43:12 +0200 Subject: [PATCH 5/5] fixed regex searches, capturing display_id, removed download notes, using str_compat --- youtube_dl/extractor/mediaklikk.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/mediaklikk.py b/youtube_dl/extractor/mediaklikk.py index 4afb53eb4..f1278938d 100644 --- a/youtube_dl/extractor/mediaklikk.py +++ b/youtube_dl/extractor/mediaklikk.py @@ -3,7 +3,8 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_unquote + compat_urllib_parse_unquote, + compat_str ) @@ -16,6 +17,7 @@ class MediaKlikkIE(InfoExtractor): 'id': '2512015', 'title': 'Az évszakok bűvöletében, Ősz', 'series': 'Az évszakok bűvöletében', + 'display_id': 'az-evszakok-buvoleteben-osz', 'ext': 'mp4' } }, @@ -25,6 +27,7 @@ class MediaKlikkIE(InfoExtractor): 'id': '2523053', 'title': 'Sporthíradó, 350. rész', 'series': 'Sporthíradó', + 'display_id': 'sporthirado-350-resz', 'ext': 'mp4' } }, @@ -32,27 +35,30 @@ class MediaKlikkIE(InfoExtractor): def _real_extract(self, url): webpage = self._download_webpage(url, - None, - note='Fetching page') - pattern = r"mtva_player_manager\.player\(document.getElementById\(.*\),(.*)\);" + None) + + pattern = r"mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);" info_json = self._html_search_regex(pattern, webpage, 'info_json') info_meta = self._parse_json(compat_urllib_parse_unquote(info_json), None) - video_id = str(info_meta['contentId']) - if type(video_id) == bytes: - video_id = video_id.decode('utf-8') + pattern = r"https?://(?:www\.)?mediaklikk\.hu/video/([a-z\-0-9]+)/?" + display_id = self._search_regex(pattern, url, 'display_id') + video_id = compat_str(info_meta['contentId']) info_ret = { '_type': 'video', 'title': info_meta.get('title') or self._og_search_title(webpage), 'ext': 'mp4', + 'display_id': display_id, 'id': video_id } if 'series' in info_meta: info_ret['series'] = info_meta['series'] info_meta['video'] = info_meta['token'] del info_meta['token'] - playerpage = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, note='Downloading player page', query=info_meta) - pattern = r"\"file\": \"(.*)\"," + playerpage = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', + video_id, + query=info_meta) + pattern = r"\"file\": \"(\\/\\/.*playlist\.m3u8)\"," playlist_url = 'https:' + compat_urllib_parse_unquote( self._html_search_regex(pattern, playerpage, 'playlist_url'))\ .replace('\\/', '/')