From 54269370755d7cb2462a0615a4ef5a3f2c050045 Mon Sep 17 00:00:00 2001 From: Niklas Sombert Date: Thu, 18 Jan 2018 13:29:18 +0100 Subject: [PATCH] [hhu] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/hhu.py | 72 ++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 youtube_dl/extractor/hhu.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 44120cae2..81021d5b5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -438,6 +438,7 @@ from .hellporno import HellPornoIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE from .hgtv import HGTVComShowIE +from .hhu import HHUIE from .hketv import HKETVIE from .hidive import HiDiveIE from .historicfilms import HistoricFilmsIE diff --git a/youtube_dl/extractor/hhu.py b/youtube_dl/extractor/hhu.py new file mode 100644 index 000000000..5ecf4a9bb --- /dev/null +++ b/youtube_dl/extractor/hhu.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class HHUIE(InfoExtractor): + _VALID_URL = r'https://mediathek\.hhu\.de/watch/(?P.+)' + _TEST = { + 'url': 'https://mediathek.hhu.de/watch/2dd05982-ea45-4108-9620-0c36e6ed8df5', + 'md5': 'b99ff77f2148b1e754555abdf53f0e51', + 'info_dict': { + 'id': '2dd05982-ea45-4108-9620-0c36e6ed8df5', + 'ext': 'mp4', + 'title': 'Das Multimediazentrum', + 'description': '', + 'uploader_id': 'clames', + 'thumbnail': 'https://mediathek.hhu.de/thumbs/2dd05982-ea45-4108-9620-0c36e6ed8df5/thumb_000.jpg', + } + } + + def _real_extract(self, url): + # TODO: Login for some videos. + video_id = self._match_id(url) + webpage, webpage_url = self._download_webpage_handle(url, video_id) + if webpage_url.geturl().startswith("https://sts."): + self.raise_login_required() + file_id = self._html_search_regex( + r"{ file: '\/movies\/(.+?)\/v_100\.mp4', label: '", + webpage, 'file_id' + ) + formats = [ + ({'url': format_url.format(file_id)}) + for format_url in ( + 'https://mediathek.hhu.de/movies/{}/v_10.webm', + 'https://mediathek.hhu.de/movies/{}/v_10.mp4', + 'https://mediathek.hhu.de/movies/{}/v_50.webm', + 'https://mediathek.hhu.de/movies/{}/v_50.mp4', + 'https://mediathek.hhu.de/movies/{}/v_100.webm', + 'https://mediathek.hhu.de/movies/{}/v_100.mp4', + ) + ] + try: + title = self._og_search_title(webpage) + except: + title = self._html_search_regex( + r'

\s+(.+?)\s+<\/h1>', + webpage, 'title' + ) + try: + description = self._og_search_description(webpage) + except: + description = self._html_search_regex( + r'

\s+(.+?)\s+<\/p>', + webpage, 'description', fatal=False + ) + thumbnail = self._og_search_property( + 'image:secure_url', webpage, 'thumbnail' + ) + uploader_id = self._html_search_regex( + r'(.+?)<\/a>', + webpage, 'uploader', fatal=False + ) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'uploader_id': uploader_id, + 'thumbnail': thumbnail, + 'formats': formats, + }