Revert .gitignore commit
Revert .gitignore commit
This commit is contained in:
parent
0392a82cc6
commit
a0ddf26b26
BIN
AtomicParsley.exe
Normal file
BIN
AtomicParsley.exe
Normal file
Binary file not shown.
BIN
ffmpeg.exe
Normal file
BIN
ffmpeg.exe
Normal file
Binary file not shown.
BIN
ffprobe.exe
Normal file
BIN
ffprobe.exe
Normal file
Binary file not shown.
24
test_fork.py
Normal file
24
test_fork.py
Normal file
@ -0,0 +1,24 @@
|
||||
"""
|
||||
test_fork.py is not added to git (only for local testing)
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import youtube_dl
|
||||
|
||||
ydl_opts = {
|
||||
'skip_download': True,
|
||||
}
|
||||
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(['http://www.pornhub.com/view_video.php?viewkey=1331683002'])
|
||||
|
||||
exit()
|
||||
|
||||
ydl_opts = {
|
||||
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]',
|
||||
'username': "tdsist",
|
||||
'password': "ZtOUktu0QP",
|
||||
}
|
||||
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(['https://fr.pornhubpremium.com/view_video.php?viewkey=ph5bdbb2cd6df6a'])
|
451
youtube_dl/extractor/pornhubpremium.py
Normal file
451
youtube_dl/extractor/pornhubpremium.py
Normal file
@ -0,0 +1,451 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import itertools
|
||||
import operator
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
remove_quotes,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PorHubPremiumBaseIE(InfoExtractor):
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
def dl(*args, **kwargs):
|
||||
return super(PorHubPremiumBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
||||
|
||||
webpage, urlh = dl(*args, **kwargs)
|
||||
|
||||
if any(re.search(p, webpage) for p in (
|
||||
r'<body\b[^>]+\bonload=["\']go\(\)',
|
||||
r'document\.cookie\s*=\s*["\']RNKEY=',
|
||||
r'document\.location\.reload\(true\)')):
|
||||
url_or_request = args[0]
|
||||
url = (url_or_request.get_full_url()
|
||||
if isinstance(url_or_request, compat_urllib_request.Request)
|
||||
else url_or_request)
|
||||
phantom = PhantomJSwrapper(self, required_version='2.0')
|
||||
phantom.get(url, html=webpage)
|
||||
webpage, urlh = dl(*args, **kwargs)
|
||||
|
||||
return webpage, urlh
|
||||
|
||||
|
||||
class PorHubPremiumIE(PorHubPremiumBaseIE):
|
||||
IE_DESC = 'PorHubPremium and Thumbzilla'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||
(?:www\.)?thumbzilla\.com/video/
|
||||
)
|
||||
(?P<id>[\da-z]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||
'md5': '1e19b41231a02eba417839222ac9d58e',
|
||||
'info_dict': {
|
||||
'id': '648719015',
|
||||
'ext': 'mp4',
|
||||
'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
|
||||
'uploader': 'Babes',
|
||||
'upload_date': '20130628',
|
||||
'duration': 361,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
'tags': list,
|
||||
'categories': list,
|
||||
},
|
||||
}, {
|
||||
# non-ASCII title
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
|
||||
'info_dict': {
|
||||
'id': '1331683002',
|
||||
'ext': 'mp4',
|
||||
'title': '重庆婷婷女王足交',
|
||||
'uploader': 'Unknown',
|
||||
'upload_date': '20150213',
|
||||
'duration': 1753,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
'tags': list,
|
||||
'categories': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# subtitles
|
||||
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
|
||||
'info_dict': {
|
||||
'id': 'ph5af5fef7c2aa7',
|
||||
'ext': 'mp4',
|
||||
'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
|
||||
'uploader': 'BFFs',
|
||||
'duration': 622,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
'tags': list,
|
||||
'categories': list,
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
"ext": 'srt'
|
||||
}]
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# removed at the request of cam4.com
|
||||
'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# removed at the request of the copyright owner
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# removed by uploader
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# private video
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)',
|
||||
webpage)
|
||||
|
||||
def _extract_count(self, pattern, webpage, name):
|
||||
return str_to_int(self._search_regex(
|
||||
pattern, webpage, '%s count' % name, fatal=False))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host') or 'pornhub.com'
|
||||
video_id = mobj.group('id')
|
||||
|
||||
self._set_cookie(host, 'age_verified', '1')
|
||||
|
||||
def dl_webpage(platform):
|
||||
self._set_cookie(host, 'platform', platform)
|
||||
return self._download_webpage(
|
||||
'http://www.%s/view_video.php?viewkey=%s' % (host, video_id),
|
||||
video_id, 'Downloading %s webpage' % platform)
|
||||
|
||||
webpage = dl_webpage('pc')
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||
webpage, 'error message', default=None, group='error')
|
||||
if error_msg:
|
||||
error_msg = re.sub(r'\s+', ' ', error_msg)
|
||||
raise ExtractorError(
|
||||
'PorHubPremium said: %s' % error_msg,
|
||||
expected=True, video_id=video_id)
|
||||
|
||||
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
||||
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
||||
# on that anymore.
|
||||
title = self._html_search_meta(
|
||||
'twitter:title', webpage, default=None) or self._search_regex(
|
||||
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
|
||||
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
|
||||
r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
|
||||
webpage, 'title', group='title')
|
||||
|
||||
video_urls = []
|
||||
video_urls_set = set()
|
||||
subtitles = {}
|
||||
|
||||
flashvars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
|
||||
video_id)
|
||||
if flashvars:
|
||||
subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': 'srt',
|
||||
})
|
||||
thumbnail = flashvars.get('image_url')
|
||||
duration = int_or_none(flashvars.get('video_duration'))
|
||||
media_definitions = flashvars.get('mediaDefinitions')
|
||||
if isinstance(media_definitions, list):
|
||||
for definition in media_definitions:
|
||||
if not isinstance(definition, dict):
|
||||
continue
|
||||
video_url = definition.get('videoUrl')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
continue
|
||||
if video_url in video_urls_set:
|
||||
continue
|
||||
video_urls_set.add(video_url)
|
||||
video_urls.append(
|
||||
(video_url, int_or_none(definition.get('quality'))))
|
||||
else:
|
||||
thumbnail, duration = [None] * 2
|
||||
|
||||
if not video_urls:
|
||||
tv_webpage = dl_webpage('tv')
|
||||
|
||||
assignments = self._search_regex(
|
||||
r'(var.+?mediastring.+?)</script>', tv_webpage,
|
||||
'encoded url').split(';')
|
||||
|
||||
js_vars = {}
|
||||
|
||||
def parse_js_value(inp):
|
||||
inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
|
||||
if '+' in inp:
|
||||
inps = inp.split('+')
|
||||
return functools.reduce(
|
||||
operator.concat, map(parse_js_value, inps))
|
||||
inp = inp.strip()
|
||||
if inp in js_vars:
|
||||
return js_vars[inp]
|
||||
return remove_quotes(inp)
|
||||
|
||||
for assn in assignments:
|
||||
assn = assn.strip()
|
||||
if not assn:
|
||||
continue
|
||||
assn = re.sub(r'var\s+', '', assn)
|
||||
vname, value = assn.split('=', 1)
|
||||
js_vars[vname] = parse_js_value(value)
|
||||
|
||||
video_url = js_vars['mediastring']
|
||||
if video_url not in video_urls_set:
|
||||
video_urls.append((video_url, None))
|
||||
video_urls_set.add(video_url)
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage):
|
||||
video_url = mobj.group('url')
|
||||
if video_url not in video_urls_set:
|
||||
video_urls.append((video_url, None))
|
||||
video_urls_set.add(video_url)
|
||||
|
||||
upload_date = None
|
||||
formats = []
|
||||
for video_url, height in video_urls:
|
||||
if not upload_date:
|
||||
upload_date = self._search_regex(
|
||||
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
|
||||
if upload_date:
|
||||
upload_date = upload_date.replace('/', '')
|
||||
if determine_ext(video_url) == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
continue
|
||||
tbr = None
|
||||
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
|
||||
if mobj:
|
||||
if not height:
|
||||
height = int(mobj.group('height'))
|
||||
tbr = int(mobj.group('tbr'))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': '%dp' % height if height else None,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_uploader = self._html_search_regex(
|
||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
view_count = self._extract_count(
|
||||
r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
|
||||
like_count = self._extract_count(
|
||||
r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
|
||||
dislike_count = self._extract_count(
|
||||
r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
def extract_list(meta_key):
|
||||
div = self._search_regex(
|
||||
r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
|
||||
% meta_key, webpage, meta_key, default=None)
|
||||
if div:
|
||||
return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': upload_date,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
'tags': extract_list('tags'),
|
||||
'categories': extract_list('categories'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class PorHubPremiumPlaylistBaseIE(PorHubPremiumBaseIE):
|
||||
def _extract_entries(self, webpage, host):
|
||||
# Only process container div with main playlist content skipping
|
||||
# drop-down menu that uses similar pattern for videos (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/11594).
|
||||
container = self._search_regex(
|
||||
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
|
||||
'container', default=webpage)
|
||||
|
||||
return [
|
||||
self.url_result(
|
||||
'http://www.%s/%s' % (host, video_url),
|
||||
PorHubPremiumIE.ie_key(), video_title=title)
|
||||
for video_url, title in orderedSet(re.findall(
|
||||
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
|
||||
container))
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = self._extract_entries(webpage, host)
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
|
||||
'playlist', default='{}'),
|
||||
playlist_id, fatal=False)
|
||||
title = playlist.get('title') or self._search_regex(
|
||||
r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, title, playlist.get('description'))
|
||||
|
||||
|
||||
class PorHubPremiumPlaylistIE(PorHubPremiumPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/playlist/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/playlist/4667351',
|
||||
'info_dict': {
|
||||
'id': '4667351',
|
||||
'title': 'Nataly Hot',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://de.pornhub.com/playlist/4667351',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class PorHubPremiumUserVideosIE(PorHubPremiumPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
||||
'info_dict': {
|
||||
'id': 'zoe_ph',
|
||||
},
|
||||
'playlist_mincount': 171,
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/users/rushandlia/videos',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# default sorting as Top Rated Videos
|
||||
'url': 'https://www.pornhub.com/channels/povd/videos',
|
||||
'info_dict': {
|
||||
'id': 'povd',
|
||||
},
|
||||
'playlist_mincount': 293,
|
||||
}, {
|
||||
# Top Rated Videos
|
||||
'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Most Recent Videos
|
||||
'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Most Viewed Videos
|
||||
'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/model/jayndrea/videos/upload',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
user_id = mobj.group('id')
|
||||
|
||||
entries = []
|
||||
for page_num in itertools.count(1):
|
||||
try:
|
||||
webpage = self._download_webpage(
|
||||
url, user_id, 'Downloading page %d' % page_num,
|
||||
query={'page': page_num})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
break
|
||||
raise
|
||||
page_entries = self._extract_entries(webpage, host)
|
||||
if not page_entries:
|
||||
break
|
||||
entries.extend(page_entries)
|
||||
|
||||
return self.playlist_result(entries, user_id)
|
38
youtube_dl/extractor/yourextractor.py
Normal file
38
youtube_dl/extractor/yourextractor.py
Normal file
@ -0,0 +1,38 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class YourExtractorIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://yourextractor.com/watch/42',
|
||||
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
||||
'info_dict': {
|
||||
'id': '42',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video title goes here',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
# TODO more properties, either as:
|
||||
# * A value
|
||||
# * MD5 checksum; start the string with md5:
|
||||
# * A regular expression; start the string with re:
|
||||
# * Any Python type (for example int or float)
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# TODO more code goes here, for example ...
|
||||
title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
|
||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user