From 66a73678fc350ffb1e43b2134ec938701596120b Mon Sep 17 00:00:00 2001 From: Markus Golser Date: Sat, 6 Oct 2018 08:53:35 +0200 Subject: [PATCH 1/2] Added a new extractor for the german news site in south tyrol www.sdf.bz.it --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/sdf.py | 42 ++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/sdf.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 464c8d690..c0f9866f4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -974,6 +974,7 @@ from .sbs import SBSIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .scrippsnetworks import ScrippsNetworksWatchIE +from .sdf import SdfIE from .seeker import SeekerIE from .senateisvp import SenateISVPIE from .sendtonews import SendtoNewsIE diff --git a/youtube_dl/extractor/sdf.py b/youtube_dl/extractor/sdf.py new file mode 100644 index 000000000..ae5047747 --- /dev/null +++ b/youtube_dl/extractor/sdf.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals +from .common import InfoExtractor + + +class SdfIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?sdf\.bz\.it/Mediathek/\(video\)/(?P[0-9]+)' + _TESTS = [ + { + 'url': 'http://www.sdf.bz.it/Mediathek/(video)/62982', + 'md5': 'c08bfa83e5a011dae3dab7d935ae1f7d', + 'info_dict': { + 'id': '62982', + 'ext': 'mp4', + 'title': 'Südtiroler Sporthilfe', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }, { + 'url': 'http://www.sdf.bz.it/Mediathek/(video)/62981', + 'md5': '9523207e57a0db6b322eccb70825142a', + 'info_dict': { + 'id': '62981', + 'ext': 'mp4', + 'title': 'Seelische Gesundheit', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + url = self._html_search_regex(r'(?s)file:\s\"(http.*?\.mp4)', webpage, 'url', fatal=True) + thumbnail = self._html_search_regex(r'(?s)image:\s\"(http.*?\.jpg)', webpage, 'thumbnail', fatal=True) + title = self._html_search_regex(r'(?s)\"og:title\"\scontent\=\"(.+?)\"\/>', webpage, 'title', default=video_id, fatal=False) + return (info_dict) + info_dict = { + 'id': video_id, + 'title': title, + 'url': url, + 'format': 'mp4', + 'thumbnail': thumbnail, + } From b3d27fcc95faf2e924df7bbebcff1f8b06a8f79e Mon Sep 17 00:00:00 2001 From: Markus Golser Date: Sat, 6 Oct 2018 08:55:43 +0200 Subject: [PATCH 2/2] Added a new extractor for the german news site in south tyrol www.sdf.bz.it --- youtube_dl/extractor/sdf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sdf.py b/youtube_dl/extractor/sdf.py index ae5047747..f47a5a6d2 100644 --- a/youtube_dl/extractor/sdf.py +++ b/youtube_dl/extractor/sdf.py @@ -32,7 +32,6 @@ class SdfIE(InfoExtractor): url = self._html_search_regex(r'(?s)file:\s\"(http.*?\.mp4)', webpage, 'url', fatal=True) thumbnail = self._html_search_regex(r'(?s)image:\s\"(http.*?\.jpg)', webpage, 'thumbnail', fatal=True) title = self._html_search_regex(r'(?s)\"og:title\"\scontent\=\"(.+?)\"\/>', webpage, 'title', default=video_id, fatal=False) - return (info_dict) info_dict = { 'id': video_id, 'title': title, @@ -40,3 +39,5 @@ class SdfIE(InfoExtractor): 'format': 'mp4', 'thumbnail': thumbnail, } + return (info_dict) +