[pornhub] Extract categories and tags (closes #10499)
This commit is contained in:
		
							parent
							
								
									fea74acad8
								
							
						
					
					
						commit
						6bb05b32a9
					
				| @ -1,6 +1,7 @@ | |||||||
| version <unreleased> | version <unreleased> | ||||||
| 
 | 
 | ||||||
| Extractors | Extractors | ||||||
|  | + [pornhub] Extract categories and tags (#10499) | ||||||
| + [foxnews] Support Fox News articles (#10598) | + [foxnews] Support Fox News articles (#10598) | ||||||
| * [iwara] Fix extraction after relaunch (#10462, #3215) | * [iwara] Fix extraction after relaunch (#10462, #3215) | ||||||
| * [newgrounds] Fix uploader extraction (#10584) | * [newgrounds] Fix uploader extraction (#10584) | ||||||
|  | |||||||
| @ -15,6 +15,7 @@ from ..compat import ( | |||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     js_to_json, | ||||||
|     orderedSet, |     orderedSet, | ||||||
|     sanitized_Request, |     sanitized_Request, | ||||||
|     str_to_int, |     str_to_int, | ||||||
| @ -48,6 +49,8 @@ class PornHubIE(InfoExtractor): | |||||||
|             'dislike_count': int, |             'dislike_count': int, | ||||||
|             'comment_count': int, |             'comment_count': int, | ||||||
|             'age_limit': 18, |             'age_limit': 18, | ||||||
|  |             'tags': list, | ||||||
|  |             'categories': list, | ||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
|         # non-ASCII title |         # non-ASCII title | ||||||
| @ -63,6 +66,8 @@ class PornHubIE(InfoExtractor): | |||||||
|             'dislike_count': int, |             'dislike_count': int, | ||||||
|             'comment_count': int, |             'comment_count': int, | ||||||
|             'age_limit': 18, |             'age_limit': 18, | ||||||
|  |             'tags': list, | ||||||
|  |             'categories': list, | ||||||
|         }, |         }, | ||||||
|         'params': { |         'params': { | ||||||
|             'skip_download': True, |             'skip_download': True, | ||||||
| @ -183,6 +188,15 @@ class PornHubIE(InfoExtractor): | |||||||
|             }) |             }) | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
| 
 | 
 | ||||||
|  |         page_params = self._parse_json(self._search_regex( | ||||||
|  |             r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})', | ||||||
|  |             webpage, 'page parameters', group='data', default='{}'), | ||||||
|  |             video_id, transform_source=js_to_json, fatal=False) | ||||||
|  |         tags = categories = None | ||||||
|  |         if page_params: | ||||||
|  |             tags = page_params.get('tags', '').split(',') | ||||||
|  |             categories = page_params.get('categories', '').split(',') | ||||||
|  | 
 | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'uploader': video_uploader, |             'uploader': video_uploader, | ||||||
| @ -195,6 +209,8 @@ class PornHubIE(InfoExtractor): | |||||||
|             'comment_count': comment_count, |             'comment_count': comment_count, | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'age_limit': 18, |             'age_limit': 18, | ||||||
|  |             'tags': tags, | ||||||
|  |             'categories': categories, | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user