diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index b52879c7a..534f06277 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -85,6 +85,9 @@ class PornHdIE(InfoExtractor): r"poster'?\s*:\s*([\"'])(?P(?:(?!\1).)+)\1", webpage, 'thumbnail', fatal=False, group='url') + like_count = int_or_none(self._search_regex( + r'class="save-count">(\d+)<', webpage, 'like_count', fatal=False)) + return { 'id': video_id, 'display_id': display_id, @@ -94,4 +97,5 @@ class PornHdIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': 18, + 'like_count': like_count, } diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 428324ef0..27b938ed2 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -16,6 +16,7 @@ from .openload import PhantomJSwrapper from ..utils import ( ExtractorError, int_or_none, + js_to_json, orderedSet, remove_quotes, str_to_int, @@ -302,17 +303,14 @@ class PornHubIE(PornHubBaseIE): comment_count = self._extract_count( r'All Comments\s*\(([\d,.]+)\)', webpage, 'comment') - def _get_items(class_name): - div = self._search_regex( - r'
([\S\s]+?)
', - webpage, class_name, default=None) - if div: - return [a for a in re.findall(r']+>([^<]+)', div)] - else: - return None - - categories = _get_items('categoriesWrapper') - tags = _get_items('tagsWrapper') + page_params = self._parse_json(self._search_regex( + r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P{[^}]+})', + webpage, 'page parameters', group='data', default='{}'), + video_id, transform_source=js_to_json, fatal=False) + tags = categories = None + if page_params: + tags = page_params.get('tags', '').split(',') + categories = page_params.get('categories', '').split(',') return { 'id': video_id, @@ -448,4 +446,4 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): break entries.extend(page_entries) - return self.playlist_result(entries, user_id) +return self.playlist_result(entries, user_id)