From 0dc78520808cb65596c5f13d68fa933b822346c3 Mon Sep 17 00:00:00 2001
From: "Andrew \"Akari\" Alexeyew" <akari@dbc.1gb.ua>
Date: Tue, 8 Dec 2015 23:18:48 +0200
Subject: [PATCH] Generalized the Nuevo extractor

Affects: anitube, trollvids, trutube
---
 youtube_dl/extractor/anitube.py   | 34 ++++---------------------------
 youtube_dl/extractor/nuevo.py     | 33 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/trollvids.py | 30 +++++----------------------
 youtube_dl/extractor/trutube.py   | 21 ++++---------------
 4 files changed, 46 insertions(+), 72 deletions(-)
 create mode 100644 youtube_dl/extractor/nuevo.py
diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py
index 23f942ae2..73690df82 100644
--- a/youtube_dl/extractor/anitube.py
+++ b/youtube_dl/extractor/anitube.py
@@ -2,10 +2,10 @@ from __future__ import unicode_literals
 
 import re
 
-from .common import InfoExtractor
+from .nuevo import NuevoBaseIE
 
 
-class AnitubeIE(InfoExtractor):
+class AnitubeIE(NuevoBaseIE):
     IE_NAME = 'anitube.se'
     _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
 
@@ -29,31 +29,5 @@ class AnitubeIE(InfoExtractor):
         key = self._search_regex(
             r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
 
-        config_xml = self._download_xml(
-            'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
-
-        video_title = config_xml.find('title').text
-        thumbnail = config_xml.find('image').text
-        duration = float(config_xml.find('duration').text)
-
-        formats = []
-        video_url = config_xml.find('file')
-        if video_url is not None:
-            formats.append({
-                'format_id': 'sd',
-                'url': video_url.text,
-            })
-        video_url = config_xml.find('filehd')
-        if video_url is not None:
-            formats.append({
-                'format_id': 'hd',
-                'url': video_url.text,
-            })
-
-        return {
-            'id': video_id,
-            'title': video_title,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'formats': formats
-        }
+        config_url = 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key
+        return self._extract_nuevo(config_url, video_id)
diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py
new file mode 100644
index 000000000..d79bd601d
--- /dev/null
+++ b/youtube_dl/extractor/nuevo.py
@@ -0,0 +1,33 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import float_or_none
+
+class NuevoBaseIE(InfoExtractor):
+    def _extract_nuevo(self, config_url, video_id, info=None, ignore_hd=False):
+        if info is None:
+            info = {}
+
+        sdformats, hdformats = [], []
+        tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip())
+
+        for child in tree:
+            tag, val = child.tag, child.text
+
+            if tag == "file":
+                sdformats.append({"url": val})
+            elif tag == "filehd" and not ignore_hd:
+                hdformats.append({"url": val})
+            elif tag == "duration":
+                info["duration"] = float_or_none(val)
+            elif tag == "image":
+                info["thumbnail"] = val
+            elif tag == "title":
+                info["title"] = val.strip()
+
+        info["id"] = video_id
+        info["formats"] = sdformats + hdformats
+
+        return info
diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py
index 2b817639a..c4863cac0 100644
--- a/youtube_dl/extractor/trollvids.py
+++ b/youtube_dl/extractor/trollvids.py
@@ -1,7 +1,7 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
+from .nuevo import NuevoBaseIE
 
 from ..compat import (
     compat_urllib_parse_unquote
@@ -10,7 +10,7 @@ from ..compat import (
 import re
 
 
-class TrollvidsIE(InfoExtractor):
+class TrollvidsIE(NuevoBaseIE):
     _VALID_URL = r"http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)"
     IE_NAME = 'trollvids'
 
@@ -21,35 +21,15 @@ class TrollvidsIE(InfoExtractor):
         raw_video_title = match.group('title')
         video_title = compat_urllib_parse_unquote(raw_video_title)
         url = "http://trollvids.com/video/%s/%s" % (video_id, raw_video_title)
+        config_url = "http://trollvids.com/nuevo/player/config.php?v=%s" % video_id
 
         info = {
-            "id": video_id,
             "title": video_title,
             "webpage_url": url,
             "age_limit": 18
         }
 
-        sdformats = []
-        hdformats = []
-
-        tree = self._download_xml("http://trollvids.com/nuevo/player/config.php?v=%s" % video_id, video_id)
-
-        for child in tree:
-            tag, val = child.tag, child.text
-
-            if tag == "file":
-                sdformats.append({"url": val})
-            elif tag == "filehd":
-                hdformats.append({"url": val})
-            elif tag == "duration":
-                info["duration"] = int(float(val))
-            elif tag == "image":
-                info["thumbnail"] = val
-            elif tag == "title":
-                info["title"] = val
-
-        info["formats"] = sdformats + hdformats
-        return info
+        return self._extract_nuevo(config_url, video_id, info)
 
     _TESTS = [
         {
@@ -60,7 +40,7 @@ class TrollvidsIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': "【MMD R-18】ガールフレンド carry_me_off",
                 'age_limit': 18,
-                'duration': 216,
+                'duration': 216.78,
             },
         },
     ]
diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py
index e7b79243a..e2f5a4a2c 100644
--- a/youtube_dl/extractor/trutube.py
+++ b/youtube_dl/extractor/trutube.py
@@ -1,10 +1,9 @@
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
-from ..utils import xpath_text
+from .nuevo import NuevoBaseIE
 
 
-class TruTubeIE(InfoExtractor):
+class TruTubeIE(NuevoBaseIE):
     _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
@@ -22,19 +21,7 @@ class TruTubeIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
-        config = self._download_xml(
-            'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id,
-            video_id, transform_source=lambda s: s.strip())
+        config_url = "https://trutube.tv/nuevo/player/config.php?v=%s" % video_id
 
         # filehd is always 404
-        video_url = xpath_text(config, './file', 'video URL', fatal=True)
-        title = xpath_text(config, './title', 'title').strip()
-        thumbnail = xpath_text(config, './image', ' thumbnail')
-
-        return {
-            'id': video_id,
-            'url': video_url,
-            'title': title,
-            'thumbnail': thumbnail,
-        }
+        return self._extract_nuevo(config_url, video_id, ignore_hd=True)