[pornhub] Fix downloading of unwanted videos from user page (issue#12819)

This commit is contained in:
Puffington Toast 2017-08-12 16:00:28 -07:00
parent 2bb34f0ad2
commit ef7f27ae73

View File

@ -22,6 +22,7 @@ from ..utils import (
# sanitized_Request, # sanitized_Request,
remove_quotes, remove_quotes,
str_to_int, str_to_int,
bug_reports_message,
) )
# from ..aes import ( # from ..aes import (
# aes_decrypt_text # aes_decrypt_text
@ -287,6 +288,18 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
def _filter_user_vids(self, webpage):
# PornHub sends (un-rendered) related videos as part of the userlist webpage.
# Omit everything before the userlist to avoid downloading unnecessary videos.
user_list_start = webpage.find('<div class="videoUList">')
if user_list_start >= 0:
return webpage[user_list_start:]
# Getting here means PornHub changed layout of the user page.
self.report_warning("Could not find start of user's upload list, "
"downloading all videos in webpage%s" % bug_reports_message())
return webpage
def _real_extract(self, url): def _real_extract(self, url):
user_id = self._match_id(url) user_id = self._match_id(url)
@ -300,6 +313,7 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
break break
raise raise
webpage = self._filter_user_vids(webpage)
page_entries = self._extract_entries(webpage) page_entries = self._extract_entries(webpage)
if not page_entries: if not page_entries:
break break