From 4c125b818166b532523c32260b23378d54dbbec8 Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Sat, 2 Feb 2019 17:10:39 -0400 Subject: [PATCH] made requested changes fixed div regex for tags and categories changed function name --- youtube_dl/extractor/pornhub.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index e16c10f97..428324ef0 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -302,14 +302,17 @@ class PornHubIE(PornHubBaseIE): comment_count = self._extract_count( r'All Comments\s*\(([\d,.]+)\)', webpage, 'comment') - def _get_text(class_name, page): - div = re.search( - r'
\s+[^\n]+\s+([^\n]+)\s+[^\n]+\s+
', page) + def _get_items(class_name): + div = self._search_regex( + r'
([\S\s]+?)
', + webpage, class_name, default=None) if div: - return [a for a in re.findall(r']+>([^<]+)', div.group(1))] + return [a for a in re.findall(r']+>([^<]+)', div)] + else: + return None - categories = _get_text('categoriesWrapper', webpage) - tags = _get_text('tagsWrapper', webpage) + categories = _get_items('categoriesWrapper') + tags = _get_items('tagsWrapper') return { 'id': video_id,