Fixed extractor

This commit is contained in:
JChris246 2019-02-02 13:13:48 -04:00 committed by GitHub
parent 7c5307f4c4
commit d472ea4192
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -6,8 +6,8 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_duration,
parse_resolution,
str_to_int,
urljoin,
)
@ -64,47 +64,37 @@ class VpornIE(InfoExtractor):
title = self._html_search_regex(
r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
description = self._html_search_regex(
r'class="(?:descr|description_txt)">(.*?)</div>',
webpage, 'description', fatal=False)
thumbnail = urljoin('http://www.vporn.com', self._html_search_regex(
r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description',
default=None))
uploader = self._html_search_regex(
r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',
webpage, 'uploader', fatal=False)
description = self._search_regex(r'[^>]*class="(?:sidebar-box)"[^>]*>[\n]<p>(.*?)</p>',
webpage, 'description', fatal=False)
categories = re.findall(r'<a href="/cat/[^"]+"[^>]*>([^<]+)</a>', webpage)
thumbnail = self._search_regex(r'<video[^>]+poster="([^"])"', webpage, 'thumbnail', default=None) or self._search_regex(r'posterurl\s=\s\'([^\']+)', webpage, 'thumbnail', fatal=False)
uploader = self._search_regex(r'class="avatarname">(.*?)</span>',
webpage, 'uploader', fatal=False)
categories = re.findall(r'<a[^>]*class="tags links"[^>]*>([^<]+)</a>', webpage)
duration = parse_duration(self._search_regex(
r'Runtime:\s*</span>\s*(\d+ min \d+ sec)',
r'class="durat-img"[^>]*>\s*(\d+ min \d+ sec)',
webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex(
r'class="views">([\d,\.]+) [Vv]iews<',
r'class="view-count">[\n]([\d,\.]+) [Vv]iews[\n]<',
webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex(
r"'Comments \(([\d,\.]+)\)'",
webpage, 'comment count', default=None))
formats = []
for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage):
video_url = video[1]
fmt = {
'url': video_url,
'format_id': video[0],
}
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)_(?P<vbr>\d+)k\.mp4$', video_url)
if m:
fmt.update({
'width': int(m.group('width')),
'height': int(m.group('height')),
'vbr': int(m.group('vbr')),
})
formats.append(fmt)
for mobj in re.finditer(r'<source[^>]+src="([^"]+)"[^>]+label="([^"]+)[^>]*>', webpage):
f = parse_resolution(mobj.group(2))
f.update({
'url': mobj.group(1),
'format_id': mobj.group(2),
})
formats.append(f)
self._sort_formats(formats)
return {