improved function to grab categories and tags
This commit is contained in:
parent
eb6f5ea0b0
commit
32f6c118cb
@ -159,10 +159,12 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
return str_to_int(self._search_regex(
|
return str_to_int(self._search_regex(
|
||||||
pattern, webpage, '%s count' % name, fatal=False))
|
pattern, webpage, '%s count' % name, fatal=False))
|
||||||
|
|
||||||
def _get_text(self, str):
|
def _get_text(self, str, page):
|
||||||
l = []
|
l = []
|
||||||
for a in re.finditer(r'<a href=[^>]+>([^<]+)', str):
|
div = re.search(r'<div class="categoriesWrapper">\s+[^\n]+\s+([^\n]+)', page)
|
||||||
l.append(a.group(1))
|
if div:
|
||||||
|
for a in re.finditer(r'<a href=[^>]+>([^<]+)', div.group(1)):
|
||||||
|
l.append(a.group(1))
|
||||||
return l
|
return l
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -308,17 +310,8 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
comment_count = self._extract_count(
|
comment_count = self._extract_count(
|
||||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||||
|
|
||||||
div = re.search(r'<div class="categoriesWrapper">\s+[^\n]+\s+([^\n]+)', webpage)
|
categories = self._get_text("categoriesWrapper", webpage)
|
||||||
if div:
|
tags = self._get_text("tagsWrapper", webpage)
|
||||||
categories = self._get_text(div.group(1))
|
|
||||||
else:
|
|
||||||
categories = None
|
|
||||||
|
|
||||||
div = re.search(r'<div class="tagsWrapper">\s+Tags: \s+([^\n]+)', webpage)
|
|
||||||
if div:
|
|
||||||
tags = self._get_text(div.group(1))
|
|
||||||
else:
|
|
||||||
tags = None
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user