Added a new extractor for the german news site in south tyrol www.sdf.bz.it

2018-10-06 08:53:35 +02:00 · 2018-10-06 08:53:35 +02:00 · 66a73678fc
commit 66a73678fc
parent d98cb62e55
2 changed files with 43 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -974,6 +974,7 @@ from .sbs import SBSIE
 from .screencast import ScreencastIE
 from .screencastomatic import ScreencastOMaticIE
 from .scrippsnetworks import ScrippsNetworksWatchIE
+from .sdf import SdfIE
 from .seeker import SeekerIE
 from .senateisvp import SenateISVPIE
 from .sendtonews import SendtoNewsIE
--- a/youtube_dl/extractor/sdf.py
+++ b/youtube_dl/extractor/sdf.py
@ -0,0 +1,42 @@
+# coding: utf-8
+from __future__ import unicode_literals
+from .common import InfoExtractor
+
+
+class SdfIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?sdf\.bz\.it/Mediathek/\(video\)/(?P<id>[0-9]+)'
+    _TESTS = [
+        {
+            'url': 'http://www.sdf.bz.it/Mediathek/(video)/62982',
+            'md5': 'c08bfa83e5a011dae3dab7d935ae1f7d',
+            'info_dict': {
+                'id': '62982',
+                'ext': 'mp4',
+                'title': 'Südtiroler Sporthilfe',
+                'thumbnail': r're:^https?://.*\.jpg$',
+             },
+        },  {
+             'url': 'http://www.sdf.bz.it/Mediathek/(video)/62981',
+             'md5': '9523207e57a0db6b322eccb70825142a',
+             'info_dict': {
+                 'id': '62981',
+                 'ext': 'mp4',
+                 'title': 'Seelische Gesundheit',
+                 'thumbnail': r're:^https?://.*\.jpg$',
+             }
+        }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        url = self._html_search_regex(r'(?s)file:\s\"(http.*?\.mp4)', webpage, 'url', fatal=True)
+        thumbnail = self._html_search_regex(r'(?s)image:\s\"(http.*?\.jpg)', webpage, 'thumbnail', fatal=True)
+        title = self._html_search_regex(r'(?s)\"og:title\"\scontent\=\"(.+?)\"\/>', webpage, 'title', default=video_id, fatal=False)
+        return (info_dict)
+        info_dict = {
+                'id': video_id,
+                'title': title,
+                'url': url,
+                'format': 'mp4',
+                'thumbnail': thumbnail,
+            }