[voxnow] branch out voxnow support from rtlnow

2015-05-15 18:16:10 +02:00 · 2015-05-15 18:16:10 +02:00 · 4249f66ce6
commit 4249f66ce6
parent 62c95fd5fc
3 changed files with 85 additions and 15 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -643,6 +643,7 @@ from .vk import (
 )
 from .vodlocker import VodlockerIE
 from .voicerepublic import VoiceRepublicIE
+from .voxnow import VOXnowIE
 from .vporn import VpornIE
 from .vrt import VRTIE
 from .vube import VubeIE
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@ -20,7 +20,6 @@ class RTLnowIE(InfoExtractor):
                            (?P<domain>
                                rtl-now\.rtl\.de|
                                rtl2now\.rtl2\.de|
-                                (?:www\.)?voxnow\.de|
                                (?:www\.)?rtlnitronow\.de|
                                (?:www\.)?superrtlnow\.de|
                                (?:www\.)?n-tvnow\.de)
@ -61,20 +60,6 @@ class RTLnowIE(InfoExtractor):
            },
            'skip': 'Only works from Germany',
        },
-        {
-            'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
-            'info_dict': {
-                'id': '13883',
-                'ext': 'flv',
-                'title': 'Voxtours - Südafrika-Reporter II',
-                'description': 'md5:de7f8d56be6fd4fed10f10f57786db00',
-                'upload_date': '20090627',
-                'duration': 1800,
-            },
-            'params': {
-                'skip_download': True,
-            },
-        },
        {
            'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
            'info_dict': {
--- a/youtube_dl/extractor/voxnow.py
+++ b/youtube_dl/extractor/voxnow.py
@ -0,0 +1,84 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    clean_html,
+    unified_strdate,
+    int_or_none,
+)
+
+
+class VOXnowIE(InfoExtractor):
+    """Information Extractor for VOX NOW"""
+    _VALID_URL = r'''(?x)
+                        (?:https?://)?
+                        (?P<url>
+                            (?P<domain>
+                               (?:www\.)?voxnow\.de
+			    )
+                            /+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?
+                            (?:container_id|film_id)=(?P<video_id>[0-9]+)&
+                            player=1(?:&season=[0-9]+)?(?:&.*)?
+                        )'''
+
+    _TEST = {
+            'url': 'http://www.voxnow.de/der-hundeprofi/bulldogge-bruno-schaeferhuendin-mona.php?container_id=136867&player=1&season=6',
+            'info_dict': {
+                'id': '136867',
+                'ext': 'mp4',
+                'title': "Der Hundeprofi - Bulldogge 'Bruno' / Schäferhündin 'Mona'",
+                'description': 'md5:eb5b500f3e97c476614a0c1989841060',
+                'upload_date': '20150509',
+                'duration': 3077,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_page_url = 'http://%s/' % mobj.group('domain')
+        video_id = mobj.group('video_id')
+
+        webpage = self._download_webpage('http://' + mobj.group('url'), video_id)
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+        upload_date = unified_strdate(self._html_search_meta('uploadDate', webpage, 'upload date'))
+
+        mobj = re.search(r'<meta itemprop="duration" content="PT(?P<seconds>\d+)S" />', webpage)
+        duration = int(mobj.group('seconds')) if mobj else None
+
+        playerdata_url = self._html_search_regex(
+            r"'playerdata': '(?P<playerdata_url>[^']+)'", webpage, 'playerdata_url')
+
+        playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML')
+
+        filename = playerdata.find('./playlist/videoinfo/filename').text
+        manifest = self._download_xml(filename, video_id, 'Downloading manifest')
+
+        formats = []
+        for media in manifest.findall('{http://ns.adobe.com/f4m/2.0}media'):
+            fmt = {
+                'url': media.attrib['href'].replace('hds', 'hls').replace('f4m', 'm3u8'),
+                'ext': 'mp4',
+                'format_id': 'hls',
+            }
+            formats.append(fmt)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'duration': duration,
+            'formats': formats,
+        }