diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 4312db656..8bf4d9f62 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -159,6 +159,12 @@ class PornHubIE(PornHubBaseIE): return str_to_int(self._search_regex( pattern, webpage, '%s count' % name, fatal=False)) + def _get_text(self, str): + l = [] + for a in re.finditer(r']+>([^<]+)', str): + l.append(a.group(1)) + return l + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) host = mobj.group('host') or 'pornhub.com' @@ -302,15 +308,17 @@ class PornHubIE(PornHubBaseIE): comment_count = self._extract_count( r'All Comments\s*\(([\d,.]+)\)', webpage, 'comment') - categories = [] - cat_div = re.search(r'
\s+Categories: \s+([^\n]+)', webpage) - for a in re.finditer(r']+Category[^>]*>([^<]+)', cat_div.group(1)): - categories.append(a.group(1)) + div = re.search(r'
\s+[^\n]+\s+([^\n]+)', webpage) + if div: + categories = self._get_text(div.group(1)) + else: + categories = None - tags = [] - tag_div = re.search(r'