Fixed extractor

This commit is contained in:
JChris246 2019-02-02 13:13:48 -04:00 committed by GitHub
parent 7c5307f4c4
commit d472ea4192
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -6,8 +6,8 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
parse_duration, parse_duration,
parse_resolution,
str_to_int, str_to_int,
urljoin,
) )
@ -64,47 +64,37 @@ class VpornIE(InfoExtractor):
title = self._html_search_regex( title = self._html_search_regex(
r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip() r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
description = self._html_search_regex(
r'class="(?:descr|description_txt)">(.*?)</div>',
webpage, 'description', fatal=False)
thumbnail = urljoin('http://www.vporn.com', self._html_search_regex(
r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description',
default=None))
uploader = self._html_search_regex( description = self._search_regex(r'[^>]*class="(?:sidebar-box)"[^>]*>[\n]<p>(.*?)</p>',
r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>', webpage, 'description', fatal=False)
webpage, 'uploader', fatal=False)
categories = re.findall(r'<a href="/cat/[^"]+"[^>]*>([^<]+)</a>', webpage) thumbnail = self._search_regex(r'<video[^>]+poster="([^"])"', webpage, 'thumbnail', default=None) or self._search_regex(r'posterurl\s=\s\'([^\']+)', webpage, 'thumbnail', fatal=False)
uploader = self._search_regex(r'class="avatarname">(.*?)</span>',
webpage, 'uploader', fatal=False)
categories = re.findall(r'<a[^>]*class="tags links"[^>]*>([^<]+)</a>', webpage)
duration = parse_duration(self._search_regex( duration = parse_duration(self._search_regex(
r'Runtime:\s*</span>\s*(\d+ min \d+ sec)', r'class="durat-img"[^>]*>\s*(\d+ min \d+ sec)',
webpage, 'duration', fatal=False)) webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
r'class="views">([\d,\.]+) [Vv]iews<', r'class="view-count">[\n]([\d,\.]+) [Vv]iews[\n]<',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex( comment_count = str_to_int(self._html_search_regex(
r"'Comments \(([\d,\.]+)\)'", r"'Comments \(([\d,\.]+)\)'",
webpage, 'comment count', default=None)) webpage, 'comment count', default=None))
formats = [] formats = []
for mobj in re.finditer(r'<source[^>]+src="([^"]+)"[^>]+label="([^"]+)[^>]*>', webpage):
for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage): f = parse_resolution(mobj.group(2))
video_url = video[1] f.update({
fmt = { 'url': mobj.group(1),
'url': video_url, 'format_id': mobj.group(2),
'format_id': video[0], })
} formats.append(f)
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)_(?P<vbr>\d+)k\.mp4$', video_url)
if m:
fmt.update({
'width': int(m.group('width')),
'height': int(m.group('height')),
'vbr': int(m.group('vbr')),
})
formats.append(fmt)
self._sort_formats(formats) self._sort_formats(formats)
return { return {