diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index e032817f2..329cf07e2 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -22,6 +22,7 @@ from ..utils import ( # sanitized_Request, remove_quotes, str_to_int, + bug_reports_message, ) # from ..aes import ( # aes_decrypt_text @@ -287,6 +288,18 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): 'only_matching': True, }] + def _filter_user_vids(self, webpage): + # PornHub sends (un-rendered) related videos as part of the userlist webpage. + # Omit everything before the userlist to avoid downloading unnecessary videos. + user_list_start = webpage.find('
') + if user_list_start >= 0: + return webpage[user_list_start:] + + # Getting here means PornHub changed layout of the user page. + self.report_warning("Could not find start of user's upload list, " + "downloading all videos in webpage%s" % bug_reports_message()) + return webpage + def _real_extract(self, url): user_id = self._match_id(url) @@ -300,6 +313,7 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: break raise + webpage = self._filter_user_vids(webpage) page_entries = self._extract_entries(webpage) if not page_entries: break