From 310bcd73008032cb7c310f2f765335f84708ef00 Mon Sep 17 00:00:00 2001 From: "Andrew \"Akari\" Alexeyew" Date: Sat, 12 Dec 2015 09:40:30 +0200 Subject: [PATCH] [nuevo] Complied with the code comments. --- youtube_dl/extractor/nuevo.py | 46 +++++++++++++++++-------------- youtube_dl/extractor/trollvids.py | 23 +++++++++------- youtube_dl/extractor/trutube.py | 10 +++++-- 3 files changed, 45 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py index d79bd601d..ccc697e4f 100644 --- a/youtube_dl/extractor/nuevo.py +++ b/youtube_dl/extractor/nuevo.py @@ -3,31 +3,35 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import float_or_none +from ..utils import ( + float_or_none, + xpath_text +) + class NuevoBaseIE(InfoExtractor): - def _extract_nuevo(self, config_url, video_id, info=None, ignore_hd=False): - if info is None: - info = {} - - sdformats, hdformats = [], [] + def _extract_nuevo(self, config_url, video_id): tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip()) - for child in tree: - tag, val = child.tag, child.text + title = xpath_text(tree, './title') + if title: + title = title.strip() - if tag == "file": - sdformats.append({"url": val}) - elif tag == "filehd" and not ignore_hd: - hdformats.append({"url": val}) - elif tag == "duration": - info["duration"] = float_or_none(val) - elif tag == "image": - info["thumbnail"] = val - elif tag == "title": - info["title"] = val.strip() + thumbnail = xpath_text(tree, './image') + duration = float_or_none(xpath_text(tree, './duration')) - info["id"] = video_id - info["formats"] = sdformats + hdformats + formats = [] + for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')): + video_url = tree.find(element_name) + video_url is None or formats.append({ + 'format_id': format_id, + 'url': video_url.text + }) - return info + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats + } diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py index c4863cac0..e4fe620f7 100644 --- a/youtube_dl/extractor/trollvids.py +++ b/youtube_dl/extractor/trollvids.py @@ -11,7 +11,7 @@ import re class TrollvidsIE(NuevoBaseIE): - _VALID_URL = r"http://(?:www\.)?trollvids\.com/+video/+(?P[0-9]+)/+(?P[^?&]+)" + _VALID_URL = r'http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)' IE_NAME = 'trollvids' def _real_extract(self, url): @@ -19,17 +19,20 @@ class TrollvidsIE(NuevoBaseIE): video_id = match.group('id') raw_video_title = match.group('title') - video_title = compat_urllib_parse_unquote(raw_video_title) - url = "http://trollvids.com/video/%s/%s" % (video_id, raw_video_title) - config_url = "http://trollvids.com/nuevo/player/config.php?v=%s" % video_id + url = 'http://trollvids.com/video/%s/%s' % (video_id, raw_video_title) + config_url = 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id - info = { - "title": video_title, - "webpage_url": url, - "age_limit": 18 - } + info = self._extract_nuevo(config_url, video_id) - return self._extract_nuevo(config_url, video_id, info) + info.update({ + 'webpage_url': url, + 'age_limit': 18 + }) + + if 'title' not in info: + info['title'] = compat_urllib_parse_unquote(raw_video_title) + + return info _TESTS = [ { diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py index e2f5a4a2c..d7ec2ec26 100644 --- a/youtube_dl/extractor/trutube.py +++ b/youtube_dl/extractor/trutube.py @@ -21,7 +21,11 @@ class TruTubeIE(NuevoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - config_url = "https://trutube.tv/nuevo/player/config.php?v=%s" % video_id + config_url = 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id - # filehd is always 404 - return self._extract_nuevo(config_url, video_id, ignore_hd=True) + info = self._extract_nuevo(config_url, video_id) + + # filehd always 404s + info['formats'] = info['formats'][:1] + + return info