Generalized the Nuevo extractor

Affects: anitube, trollvids, trutube
This commit is contained in:
Andrew "Akari" Alexeyew 2015-12-08 23:18:48 +02:00
parent 73d918b0da
commit 0dc7852080
4 changed files with 46 additions and 72 deletions

View File

@ -2,10 +2,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .nuevo import NuevoBaseIE
class AnitubeIE(InfoExtractor): class AnitubeIE(NuevoBaseIE):
IE_NAME = 'anitube.se' IE_NAME = 'anitube.se'
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
@ -29,31 +29,5 @@ class AnitubeIE(InfoExtractor):
key = self._search_regex( key = self._search_regex(
r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key') r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
config_xml = self._download_xml( config_url = 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key
'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key) return self._extract_nuevo(config_url, video_id)
video_title = config_xml.find('title').text
thumbnail = config_xml.find('image').text
duration = float(config_xml.find('duration').text)
formats = []
video_url = config_xml.find('file')
if video_url is not None:
formats.append({
'format_id': 'sd',
'url': video_url.text,
})
video_url = config_xml.find('filehd')
if video_url is not None:
formats.append({
'format_id': 'hd',
'url': video_url.text,
})
return {
'id': video_id,
'title': video_title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats
}

View File

@ -0,0 +1,33 @@
# encoding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import float_or_none
class NuevoBaseIE(InfoExtractor):
def _extract_nuevo(self, config_url, video_id, info=None, ignore_hd=False):
if info is None:
info = {}
sdformats, hdformats = [], []
tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip())
for child in tree:
tag, val = child.tag, child.text
if tag == "file":
sdformats.append({"url": val})
elif tag == "filehd" and not ignore_hd:
hdformats.append({"url": val})
elif tag == "duration":
info["duration"] = float_or_none(val)
elif tag == "image":
info["thumbnail"] = val
elif tag == "title":
info["title"] = val.strip()
info["id"] = video_id
info["formats"] = sdformats + hdformats
return info

View File

@ -1,7 +1,7 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .nuevo import NuevoBaseIE
from ..compat import ( from ..compat import (
compat_urllib_parse_unquote compat_urllib_parse_unquote
@ -10,7 +10,7 @@ from ..compat import (
import re import re
class TrollvidsIE(InfoExtractor): class TrollvidsIE(NuevoBaseIE):
_VALID_URL = r"http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)" _VALID_URL = r"http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)"
IE_NAME = 'trollvids' IE_NAME = 'trollvids'
@ -21,35 +21,15 @@ class TrollvidsIE(InfoExtractor):
raw_video_title = match.group('title') raw_video_title = match.group('title')
video_title = compat_urllib_parse_unquote(raw_video_title) video_title = compat_urllib_parse_unquote(raw_video_title)
url = "http://trollvids.com/video/%s/%s" % (video_id, raw_video_title) url = "http://trollvids.com/video/%s/%s" % (video_id, raw_video_title)
config_url = "http://trollvids.com/nuevo/player/config.php?v=%s" % video_id
info = { info = {
"id": video_id,
"title": video_title, "title": video_title,
"webpage_url": url, "webpage_url": url,
"age_limit": 18 "age_limit": 18
} }
sdformats = [] return self._extract_nuevo(config_url, video_id, info)
hdformats = []
tree = self._download_xml("http://trollvids.com/nuevo/player/config.php?v=%s" % video_id, video_id)
for child in tree:
tag, val = child.tag, child.text
if tag == "file":
sdformats.append({"url": val})
elif tag == "filehd":
hdformats.append({"url": val})
elif tag == "duration":
info["duration"] = int(float(val))
elif tag == "image":
info["thumbnail"] = val
elif tag == "title":
info["title"] = val
info["formats"] = sdformats + hdformats
return info
_TESTS = [ _TESTS = [
{ {
@ -60,7 +40,7 @@ class TrollvidsIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': "【MMD R-18】ガールフレンド carry_me_off", 'title': "【MMD R-18】ガールフレンド carry_me_off",
'age_limit': 18, 'age_limit': 18,
'duration': 216, 'duration': 216.78,
}, },
}, },
] ]

View File

@ -1,10 +1,9 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .nuevo import NuevoBaseIE
from ..utils import xpath_text
class TruTubeIE(InfoExtractor): class TruTubeIE(NuevoBaseIE):
_VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-', 'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
@ -22,19 +21,7 @@ class TruTubeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
config_url = "https://trutube.tv/nuevo/player/config.php?v=%s" % video_id
config = self._download_xml(
'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id,
video_id, transform_source=lambda s: s.strip())
# filehd is always 404 # filehd is always 404
video_url = xpath_text(config, './file', 'video URL', fatal=True) return self._extract_nuevo(config_url, video_id, ignore_hd=True)
title = xpath_text(config, './title', 'title').strip()
thumbnail = xpath_text(config, './image', ' thumbnail')
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
}