From 66a73678fc350ffb1e43b2134ec938701596120b Mon Sep 17 00:00:00 2001 From: Markus Golser Date: Sat, 6 Oct 2018 08:53:35 +0200 Subject: [PATCH] Added a new extractor for the german news site in south tyrol www.sdf.bz.it --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/sdf.py | 42 ++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/sdf.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 464c8d690..c0f9866f4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -974,6 +974,7 @@ from .sbs import SBSIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .scrippsnetworks import ScrippsNetworksWatchIE +from .sdf import SdfIE from .seeker import SeekerIE from .senateisvp import SenateISVPIE from .sendtonews import SendtoNewsIE diff --git a/youtube_dl/extractor/sdf.py b/youtube_dl/extractor/sdf.py new file mode 100644 index 000000000..ae5047747 --- /dev/null +++ b/youtube_dl/extractor/sdf.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals +from .common import InfoExtractor + + +class SdfIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?sdf\.bz\.it/Mediathek/\(video\)/(?P[0-9]+)' + _TESTS = [ + { + 'url': 'http://www.sdf.bz.it/Mediathek/(video)/62982', + 'md5': 'c08bfa83e5a011dae3dab7d935ae1f7d', + 'info_dict': { + 'id': '62982', + 'ext': 'mp4', + 'title': 'Südtiroler Sporthilfe', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }, { + 'url': 'http://www.sdf.bz.it/Mediathek/(video)/62981', + 'md5': '9523207e57a0db6b322eccb70825142a', + 'info_dict': { + 'id': '62981', + 'ext': 'mp4', + 'title': 'Seelische Gesundheit', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + url = self._html_search_regex(r'(?s)file:\s\"(http.*?\.mp4)', webpage, 'url', fatal=True) + thumbnail = self._html_search_regex(r'(?s)image:\s\"(http.*?\.jpg)', webpage, 'thumbnail', fatal=True) + title = self._html_search_regex(r'(?s)\"og:title\"\scontent\=\"(.+?)\"\/>', webpage, 'title', default=video_id, fatal=False) + return (info_dict) + info_dict = { + 'id': video_id, + 'title': title, + 'url': url, + 'format': 'mp4', + 'thumbnail': thumbnail, + }