From 0dc78520808cb65596c5f13d68fa933b822346c3 Mon Sep 17 00:00:00 2001 From: "Andrew \"Akari\" Alexeyew" Date: Tue, 8 Dec 2015 23:18:48 +0200 Subject: [PATCH] Generalized the Nuevo extractor Affects: anitube, trollvids, trutube --- youtube_dl/extractor/anitube.py | 34 ++++--------------------------- youtube_dl/extractor/nuevo.py | 33 ++++++++++++++++++++++++++++++ youtube_dl/extractor/trollvids.py | 30 +++++---------------------- youtube_dl/extractor/trutube.py | 21 ++++--------------- 4 files changed, 46 insertions(+), 72 deletions(-) create mode 100644 youtube_dl/extractor/nuevo.py diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py index 23f942ae2..73690df82 100644 --- a/youtube_dl/extractor/anitube.py +++ b/youtube_dl/extractor/anitube.py @@ -2,10 +2,10 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .nuevo import NuevoBaseIE -class AnitubeIE(InfoExtractor): +class AnitubeIE(NuevoBaseIE): IE_NAME = 'anitube.se' _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P\d+)' @@ -29,31 +29,5 @@ class AnitubeIE(InfoExtractor): key = self._search_regex( r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key') - config_xml = self._download_xml( - 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key) - - video_title = config_xml.find('title').text - thumbnail = config_xml.find('image').text - duration = float(config_xml.find('duration').text) - - formats = [] - video_url = config_xml.find('file') - if video_url is not None: - formats.append({ - 'format_id': 'sd', - 'url': video_url.text, - }) - video_url = config_xml.find('filehd') - if video_url is not None: - formats.append({ - 'format_id': 'hd', - 'url': video_url.text, - }) - - return { - 'id': video_id, - 'title': video_title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats - } + config_url = 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key + return self._extract_nuevo(config_url, video_id) diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py new file mode 100644 index 000000000..d79bd601d --- /dev/null +++ b/youtube_dl/extractor/nuevo.py @@ -0,0 +1,33 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import float_or_none + +class NuevoBaseIE(InfoExtractor): + def _extract_nuevo(self, config_url, video_id, info=None, ignore_hd=False): + if info is None: + info = {} + + sdformats, hdformats = [], [] + tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip()) + + for child in tree: + tag, val = child.tag, child.text + + if tag == "file": + sdformats.append({"url": val}) + elif tag == "filehd" and not ignore_hd: + hdformats.append({"url": val}) + elif tag == "duration": + info["duration"] = float_or_none(val) + elif tag == "image": + info["thumbnail"] = val + elif tag == "title": + info["title"] = val.strip() + + info["id"] = video_id + info["formats"] = sdformats + hdformats + + return info diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py index 2b817639a..c4863cac0 100644 --- a/youtube_dl/extractor/trollvids.py +++ b/youtube_dl/extractor/trollvids.py @@ -1,7 +1,7 @@ # encoding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor +from .nuevo import NuevoBaseIE from ..compat import ( compat_urllib_parse_unquote @@ -10,7 +10,7 @@ from ..compat import ( import re -class TrollvidsIE(InfoExtractor): +class TrollvidsIE(NuevoBaseIE): _VALID_URL = r"http://(?:www\.)?trollvids\.com/+video/+(?P[0-9]+)/+(?P[^?&]+)" IE_NAME = 'trollvids' @@ -21,35 +21,15 @@ class TrollvidsIE(InfoExtractor): raw_video_title = match.group('title') video_title = compat_urllib_parse_unquote(raw_video_title) url = "http://trollvids.com/video/%s/%s" % (video_id, raw_video_title) + config_url = "http://trollvids.com/nuevo/player/config.php?v=%s" % video_id info = { - "id": video_id, "title": video_title, "webpage_url": url, "age_limit": 18 } - sdformats = [] - hdformats = [] - - tree = self._download_xml("http://trollvids.com/nuevo/player/config.php?v=%s" % video_id, video_id) - - for child in tree: - tag, val = child.tag, child.text - - if tag == "file": - sdformats.append({"url": val}) - elif tag == "filehd": - hdformats.append({"url": val}) - elif tag == "duration": - info["duration"] = int(float(val)) - elif tag == "image": - info["thumbnail"] = val - elif tag == "title": - info["title"] = val - - info["formats"] = sdformats + hdformats - return info + return self._extract_nuevo(config_url, video_id, info) _TESTS = [ { @@ -60,7 +40,7 @@ class TrollvidsIE(InfoExtractor): 'ext': 'mp4', 'title': "【MMD R-18】ガールフレンド carry_me_off", 'age_limit': 18, - 'duration': 216, + 'duration': 216.78, }, }, ] diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py index e7b79243a..e2f5a4a2c 100644 --- a/youtube_dl/extractor/trutube.py +++ b/youtube_dl/extractor/trutube.py @@ -1,10 +1,9 @@ from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import xpath_text +from .nuevo import NuevoBaseIE -class TruTubeIE(InfoExtractor): +class TruTubeIE(NuevoBaseIE): _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-', @@ -22,19 +21,7 @@ class TruTubeIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - - config = self._download_xml( - 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id, - video_id, transform_source=lambda s: s.strip()) + config_url = "https://trutube.tv/nuevo/player/config.php?v=%s" % video_id # filehd is always 404 - video_url = xpath_text(config, './file', 'video URL', fatal=True) - title = xpath_text(config, './title', 'title').strip() - thumbnail = xpath_text(config, './image', ' thumbnail') - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - } + return self._extract_nuevo(config_url, video_id, ignore_hd=True)