diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index e9209d701..e16c10f97 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -159,14 +159,6 @@ class PornHubIE(PornHubBaseIE): return str_to_int(self._search_regex( pattern, webpage, '%s count' % name, fatal=False)) - @staticmethod - def _get_text(class_name, page): - div = re.findall(r'
\s+[^\n]+\s+([^\n]+)', page) - if div: - return [a for a in re.findall(r']+>([^<]+)', div[0])] - else: - return [] - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) host = mobj.group('host') or 'pornhub.com' @@ -310,8 +302,14 @@ class PornHubIE(PornHubBaseIE): comment_count = self._extract_count( r'All Comments\s*\(([\d,.]+)\)', webpage, 'comment') - categories = self._get_text("categoriesWrapper", webpage) - tags = self._get_text("tagsWrapper", webpage) + def _get_text(class_name, page): + div = re.search( + r'
\s+[^\n]+\s+([^\n]+)\s+[^\n]+\s+
', page) + if div: + return [a for a in re.findall(r'
]+>([^<]+)', div.group(1))] + + categories = _get_text('categoriesWrapper', webpage) + tags = _get_text('tagsWrapper', webpage) return { 'id': video_id,