62 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			62 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|  | import re | ||
|  | 
 | ||
|  | from .common import InfoExtractor | ||
|  | from ..utils import ( | ||
|  |     compat_urllib_parse, | ||
|  | 
 | ||
|  |     ExtractorError, | ||
|  | ) | ||
|  | 
 | ||
|  | 
 | ||
|  | class XHamsterIE(InfoExtractor): | ||
|  |     """Information Extractor for xHamster""" | ||
|  |     _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html' | ||
|  | 
 | ||
|  |     def _real_extract(self,url): | ||
|  |         mobj = re.match(self._VALID_URL, url) | ||
|  | 
 | ||
|  |         video_id = mobj.group('id') | ||
|  |         mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id | ||
|  |         webpage = self._download_webpage(mrss_url, video_id) | ||
|  | 
 | ||
|  |         mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage) | ||
|  |         if mobj is None: | ||
|  |             raise ExtractorError(u'Unable to extract media URL') | ||
|  |         if len(mobj.group('server')) == 0: | ||
|  |             video_url = compat_urllib_parse.unquote(mobj.group('file')) | ||
|  |         else: | ||
|  |             video_url = mobj.group('server')+'/key='+mobj.group('file') | ||
|  |         video_extension = video_url.split('.')[-1] | ||
|  | 
 | ||
|  |         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', | ||
|  |             webpage, u'title') | ||
|  | 
 | ||
|  |         # Can't see the description anywhere in the UI | ||
|  |         # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)', | ||
|  |         #     webpage, u'description', fatal=False) | ||
|  |         # if video_description: video_description = unescapeHTML(video_description) | ||
|  | 
 | ||
|  |         mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage) | ||
|  |         if mobj: | ||
|  |             video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d') | ||
|  |         else: | ||
|  |             video_upload_date = None | ||
|  |             self._downloader.report_warning(u'Unable to extract upload date') | ||
|  | 
 | ||
|  |         video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)', | ||
|  |             webpage, u'uploader id', default=u'anonymous') | ||
|  | 
 | ||
|  |         video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'', | ||
|  |             webpage, u'thumbnail', fatal=False) | ||
|  | 
 | ||
|  |         return [{ | ||
|  |             'id':       video_id, | ||
|  |             'url':      video_url, | ||
|  |             'ext':      video_extension, | ||
|  |             'title':    video_title, | ||
|  |             # 'description': video_description, | ||
|  |             'upload_date': video_upload_date, | ||
|  |             'uploader_id': video_uploader_id, | ||
|  |             'thumbnail': video_thumbnail | ||
|  |         }] |