Generalized the Nuevo extractor

Affects: anitube, trollvids, trutube
2015-12-08 23:18:48 +02:00 · 2015-12-08 23:18:48 +02:00 · 0dc7852080
commit 0dc7852080
parent 73d918b0da
4 changed files with 46 additions and 72 deletions
--- a/youtube_dl/extractor/anitube.py
+++ b/youtube_dl/extractor/anitube.py
@ -2,10 +2,10 @@ from __future__ import unicode_literals
 import re
-from .common import InfoExtractor
+from .nuevo import NuevoBaseIE
-class AnitubeIE(InfoExtractor):
+class AnitubeIE(NuevoBaseIE):
    IE_NAME = 'anitube.se'
    _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
@ -29,31 +29,5 @@ class AnitubeIE(InfoExtractor):
        key = self._search_regex(
            r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
-        config_xml = self._download_xml(
+        config_url = 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key
-            'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
+        return self._extract_nuevo(config_url, video_id)
        video_title = config_xml.find('title').text
        thumbnail = config_xml.find('image').text
        duration = float(config_xml.find('duration').text)
        formats = []
        video_url = config_xml.find('file')
        if video_url is not None:
            formats.append({
                'format_id': 'sd',
                'url': video_url.text,
            })
        video_url = config_xml.find('filehd')
        if video_url is not None:
            formats.append({
                'format_id': 'hd',
                'url': video_url.text,
            })
        return {
            'id': video_id,
            'title': video_title,
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats
        }
--- a/youtube_dl/extractor/nuevo.py
+++ b/youtube_dl/extractor/nuevo.py
@ -0,0 +1,33 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import float_or_none
 class NuevoBaseIE(InfoExtractor):
    def _extract_nuevo(self, config_url, video_id, info=None, ignore_hd=False):
        if info is None:
            info = {}
        sdformats, hdformats = [], []
        tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip())
        for child in tree:
            tag, val = child.tag, child.text
            if tag == "file":
                sdformats.append({"url": val})
            elif tag == "filehd" and not ignore_hd:
                hdformats.append({"url": val})
            elif tag == "duration":
                info["duration"] = float_or_none(val)
            elif tag == "image":
                info["thumbnail"] = val
            elif tag == "title":
                info["title"] = val.strip()
        info["id"] = video_id
        info["formats"] = sdformats + hdformats
        return info
--- a/youtube_dl/extractor/trollvids.py
+++ b/youtube_dl/extractor/trollvids.py
@ -1,7 +1,7 @@
 # encoding: utf-8
 from __future__ import unicode_literals
-from .common import InfoExtractor
+from .nuevo import NuevoBaseIE
 from ..compat import (
    compat_urllib_parse_unquote
@ -10,7 +10,7 @@ from ..compat import (
 import re
-class TrollvidsIE(InfoExtractor):
+class TrollvidsIE(NuevoBaseIE):
    _VALID_URL = r"http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)"
    IE_NAME = 'trollvids'
@ -21,35 +21,15 @@ class TrollvidsIE(InfoExtractor):
        raw_video_title = match.group('title')
        video_title = compat_urllib_parse_unquote(raw_video_title)
        url = "http://trollvids.com/video/%s/%s" % (video_id, raw_video_title)
        config_url = "http://trollvids.com/nuevo/player/config.php?v=%s" % video_id
        info = {
            "id": video_id,
            "title": video_title,
            "webpage_url": url,
            "age_limit": 18
        }
-        sdformats = []
+        return self._extract_nuevo(config_url, video_id, info)
        hdformats = []
        tree = self._download_xml("http://trollvids.com/nuevo/player/config.php?v=%s" % video_id, video_id)
        for child in tree:
            tag, val = child.tag, child.text
            if tag == "file":
                sdformats.append({"url": val})
            elif tag == "filehd":
                hdformats.append({"url": val})
            elif tag == "duration":
                info["duration"] = int(float(val))
            elif tag == "image":
                info["thumbnail"] = val
            elif tag == "title":
                info["title"] = val
        info["formats"] = sdformats + hdformats
        return info
    _TESTS = [
        {
@ -60,7 +40,7 @@ class TrollvidsIE(InfoExtractor):
                'ext': 'mp4',
                'title': "【MMD R-18】ガールフレンド carry_me_off",
                'age_limit': 18,
-                'duration': 216,
+                'duration': 216.78,
            },
        },
    ]
--- a/youtube_dl/extractor/trutube.py
+++ b/youtube_dl/extractor/trutube.py
@ -1,10 +1,9 @@
 from __future__ import unicode_literals
-from .common import InfoExtractor
+from .nuevo import NuevoBaseIE
 from ..utils import xpath_text
-class TruTubeIE(InfoExtractor):
+class TruTubeIE(NuevoBaseIE):
    _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
@ -22,19 +21,7 @@ class TruTubeIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
-
+        config_url = "https://trutube.tv/nuevo/player/config.php?v=%s" % video_id
        config = self._download_xml(
            'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id,
            video_id, transform_source=lambda s: s.strip())
        # filehd is always 404
-        video_url = xpath_text(config, './file', 'video URL', fatal=True)
+        return self._extract_nuevo(config_url, video_id, ignore_hd=True)
        title = xpath_text(config, './title', 'title').strip()
        thumbnail = xpath_text(config, './image', ' thumbnail')
        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'thumbnail': thumbnail,
        }