made requested changes

fixed div regex for tags and categories
changed function name
This commit is contained in:
JChris246 2019-02-02 17:10:39 -04:00 committed by GitHub
parent ee3a27d036
commit 4c125b8181
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -302,14 +302,17 @@ class PornHubIE(PornHubBaseIE):
comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
def _get_text(class_name, page):
div = re.search(
r'<div class="' + class_name + '">\s+[^\n]+\s+([^\n]+)\s+[^\n]+\s+</div>', page)
def _get_items(class_name):
div = self._search_regex(
r'<div class="' + class_name + '">([\S\s]+?)</div>',
webpage, class_name, default=None)
if div:
return [a for a in re.findall(r'<a href=[^>]+>([^<]+)', div.group(1))]
return [a for a in re.findall(r'<a href=[^>]+>([^<]+)', div)]
else:
return None
categories = _get_text('categoriesWrapper', webpage)
tags = _get_text('tagsWrapper', webpage)
categories = _get_items('categoriesWrapper')
tags = _get_items('tagsWrapper')
return {
'id': video_id,