From 82cb01bb0fb667cea81cf026d9bde497d9fda900 Mon Sep 17 00:00:00 2001 From: Vukkk Date: Wed, 31 Aug 2016 10:56:11 +0200 Subject: [PATCH 1/2] [TV2HU] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tv2hu.py | 85 ++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 youtube_dl/extractor/tv2hu.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 21efa96b2..7207d0b69 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -899,6 +899,7 @@ from .tv2 import ( TV2IE, TV2ArticleIE, ) +from .tv2hu import TV2HUIE from .tv3 import TV3IE from .tv4 import TV4IE from .tvc import ( diff --git a/youtube_dl/extractor/tv2hu.py b/youtube_dl/extractor/tv2hu.py new file mode 100644 index 000000000..d9f250ff8 --- /dev/null +++ b/youtube_dl/extractor/tv2hu.py @@ -0,0 +1,85 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +class TV2HUIE(InfoExtractor): + IE_NAME = 'tv2.hu' + _VALID_URL = r'https?://(?:www\.)?tv2\.hu/(?:musoraink/)?(?P[^/]+)/(?:teljes_adasok/)?(?P[0-9]+)_(.+?)\.html' + _JSON_URL = r'(?Phttps?://.+?\.tv2\.hu/vod/(?P\d+)/id_(?P\d+).+?&type=json)' + + _TESTS = [{ + 'url': 'http://tv2.hu/ezek_megorultek/217679_ezek-megorultek---1.-adas-1.-resz.html', + 'info_dict': { + 'id': '217679', + 'ext': 'mp4', + 'title': 'Ezek megőrültek! - 1. adás 1. rész', + 'upload_id': '220289', + 'upload_date': '20160826', + 'uploader': 'ezek_megorultek', + 'thumbnail': 're:^https?://.*\.jpg$' + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://tv2.hu/ezek_megorultek/teljes_adasok/217677_ezek-megorultek---1.-adas-2.-resz.html', + 'info_dict': { + 'id': '217677', + 'ext': 'mp4', + 'title': 'Ezek megőrültek! - 1. adás 2. rész', + 'upload_id': '220290', + 'upload_date': '20160826', + 'uploader': 'ezek_megorultek', + 'thumbnail': 're:^https?://.*\.jpg$' + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://tv2.hu/musoraink/aktiv/aktiv_teljes_adas/217963_aktiv-teljes-adas---2016.08.30..html', + 'info_dict': { + 'id': '217963', + 'ext': 'mp4', + 'title': 'AKTÍV / Aktív teljes adás - 2016.08.30. / tv2.hu', + 'upload_id': '220700', + 'upload_date': '20160830', + 'uploader': 'aktiv', + 'thumbnail': 're:^https?://.*\.jpg$' + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + url, video_id, 'Downloading info page') + + json_url = re.search(self._JSON_URL, webpage) + + json_data = self._download_json( + json_url.group('json_url'), video_id, 'Downloading video info') + + manifest_url = json_data['bitrates']['hls'] + + formats = self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', entry_protocol='m3u8_native') + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage).strip(), + 'thumbnail': self._og_search_property('image', webpage), + 'uploader': self._search_regex(self._VALID_URL, url, 'uploader'), + 'upload_id': json_url.group('upload_id'), + 'upload_date': json_url.group('upload_date'), + 'formats': formats + } \ No newline at end of file From 50c3d4c36921db2ed6ca8e38edd8b317c7cbd14e Mon Sep 17 00:00:00 2001 From: Vukkk Date: Mon, 15 May 2017 19:04:22 +0200 Subject: [PATCH 2/2] [tv2.hu] Fix error caused by missing protocol in url --- youtube_dl/extractor/tv2hu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tv2hu.py b/youtube_dl/extractor/tv2hu.py index 86017b757..e14233db1 100644 --- a/youtube_dl/extractor/tv2hu.py +++ b/youtube_dl/extractor/tv2hu.py @@ -36,7 +36,7 @@ class TV2HuIE(InfoExtractor): formats = [] for b in ('bitrates', 'backupBitrates'): bitrates = json_data.get(b, {}) - m3u8_url = bitrates.get('hls') + m3u8_url = 'http:' + bitrates.get('hls') if m3u8_url: formats.extend(self._extract_wowza_formats( m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp']))