Merge branch 'master' into PornHub-issue-16078

This commit is contained in:
Parmjit Virk 2018-09-11 16:57:16 -05:00
commit 0f0f298f2c
3 changed files with 61 additions and 23 deletions

View File

@ -9,6 +9,7 @@ from ..utils import (
encode_base_n,
ExtractorError,
int_or_none,
merge_dicts,
parse_duration,
str_to_int,
url_or_none,
@ -25,10 +26,16 @@ class EpornerIE(InfoExtractor):
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
'ext': 'mp4',
'title': 'Infamous Tiffany Teen Strip Tease Video',
'description': 'md5:764f39abf932daafa37485eb46efa152',
'timestamp': 1232520922,
'upload_date': '20090121',
'duration': 1838,
'view_count': int,
'age_limit': 18,
},
'params': {
'proxy': '127.0.0.1:8118'
}
}, {
# New (May 2016) URL layout
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
@ -104,12 +111,15 @@ class EpornerIE(InfoExtractor):
})
self._sort_formats(formats)
duration = parse_duration(self._html_search_meta('duration', webpage))
json_ld = self._search_json_ld(webpage, display_id, default={})
duration = parse_duration(self._html_search_meta(
'duration', webpage, default=None))
view_count = str_to_int(self._search_regex(
r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
webpage, 'view count', fatal=False))
return {
return merge_dicts(json_ld, {
'id': video_id,
'display_id': display_id,
'title': title,
@ -117,4 +127,4 @@ class EpornerIE(InfoExtractor):
'view_count': view_count,
'formats': formats,
'age_limit': 18,
}
})

View File

@ -45,7 +45,7 @@ class Tube8IE(KeezMoviesIE):
r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
description = self._html_search_regex(
r'>Description:</strong>\s*(.+?)\s*<', webpage, 'description', fatal=False)
r'(?s)Description:</dt>\s*<dd>(.+?)</dd>', webpage, 'description', fatal=False)
uploader = self._html_search_regex(
r'<span class="username">\s*(.+?)\s*<',
webpage, 'uploader', fatal=False)
@ -55,19 +55,19 @@ class Tube8IE(KeezMoviesIE):
dislike_count = int_or_none(self._search_regex(
r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False))
view_count = str_to_int(self._search_regex(
r'<strong>Views: </strong>([\d,\.]+)\s*</li>',
r'Views:\s*</dt>\s*<dd>([\d,\.]+)',
webpage, 'view count', fatal=False))
comment_count = str_to_int(self._search_regex(
r'<span id="allCommentsCount">(\d+)</span>',
webpage, 'comment count', fatal=False))
category = self._search_regex(
r'Category:\s*</strong>\s*<a[^>]+href=[^>]+>([^<]+)',
r'Category:\s*</dt>\s*<dd>\s*<a[^>]+href=[^>]+>([^<]+)',
webpage, 'category', fatal=False)
categories = [category] if category else None
tags_str = self._search_regex(
r'(?s)Tags:\s*</strong>(.+?)</(?!a)',
r'(?s)Tags:\s*</dt>\s*<dd>(.+?)</(?!a)',
webpage, 'tags', fatal=False)
tags = [t for t in re.findall(
r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None

View File

@ -4,15 +4,19 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
float_or_none,
unified_timestamp,
url_or_none,
)
class VzaarIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
_TESTS = [{
# HTTP and HLS
'url': 'https://vzaar.com/videos/1152805',
'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
'info_dict': {
@ -40,24 +44,48 @@ class VzaarIE(InfoExtractor):
video_id = self._match_id(url)
video_data = self._download_json(
'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
source_url = video_data['sourceUrl']
info = {
title = video_data['videoTitle']
formats = []
source_url = url_or_none(video_data.get('sourceUrl'))
if source_url:
f = {
'url': source_url,
'format_id': 'http',
}
if 'audio' in source_url:
f.update({
'vcodec': 'none',
'ext': 'mp3',
})
else:
f.update({
'width': int_or_none(video_data.get('width')),
'height': int_or_none(video_data.get('height')),
'ext': 'mp4',
'fps': float_or_none(video_data.get('fps')),
})
formats.append(f)
video_guid = video_data.get('guid')
usp = video_data.get('usp')
if isinstance(video_guid, compat_str) and isinstance(usp, dict):
m3u8_url = ('http://fable.vzaar.com/v4/usp/%s/%s.ism/.m3u8?'
% (video_guid, video_id)) + '&'.join(
'%s=%s' % (k, v) for k, v in usp.items())
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
return {
'id': video_id,
'title': video_data['videoTitle'],
'url': source_url,
'title': title,
'thumbnail': self._proto_relative_url(video_data.get('poster')),
'duration': float_or_none(video_data.get('videoDuration')),
'timestamp': unified_timestamp(video_data.get('ts')),
'formats': formats,
}
if 'audio' in source_url:
info.update({
'vcodec': 'none',
'ext': 'mp3',
})
else:
info.update({
'width': int_or_none(video_data.get('width')),
'height': int_or_none(video_data.get('height')),
'ext': 'mp4',
})
return info