diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 8bf4d9f62..1a2f07345 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -159,10 +159,12 @@ class PornHubIE(PornHubBaseIE): return str_to_int(self._search_regex( pattern, webpage, '%s count' % name, fatal=False)) - def _get_text(self, str): + def _get_text(self, str, page): l = [] - for a in re.finditer(r']+>([^<]+)', str): - l.append(a.group(1)) + div = re.search(r'
\s+[^\n]+\s+([^\n]+)', page) + if div: + for a in re.finditer(r']+>([^<]+)', div.group(1)): + l.append(a.group(1)) return l def _real_extract(self, url): @@ -308,17 +310,8 @@ class PornHubIE(PornHubBaseIE): comment_count = self._extract_count( r'All Comments\s*\(([\d,.]+)\)', webpage, 'comment') - div = re.search(r'
\s+[^\n]+\s+([^\n]+)', webpage) - if div: - categories = self._get_text(div.group(1)) - else: - categories = None - - div = re.search(r'
\s+Tags: \s+([^\n]+)', webpage) - if div: - tags = self._get_text(div.group(1)) - else: - tags = None + categories = self._get_text("categoriesWrapper", webpage) + tags = self._get_text("tagsWrapper", webpage) return { 'id': video_id,