| 
									
										
										
										
											2013-06-23 22:32:44 +02:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							|  |  |  |     compat_urllib_parse, | 
					
						
							| 
									
										
										
										
											2013-08-23 16:40:20 +02:00
										 |  |  |     unescapeHTML, | 
					
						
							| 
									
										
										
										
											2013-08-23 17:23:34 +02:00
										 |  |  |     determine_ext, | 
					
						
							| 
									
										
										
										
											2013-06-23 22:32:44 +02:00
										 |  |  |     ExtractorError, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class XHamsterIE(InfoExtractor): | 
					
						
							|  |  |  |     """Information Extractor for xHamster""" | 
					
						
							|  |  |  |     _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html' | 
					
						
							| 
									
										
										
										
											2013-06-27 20:46:46 +02:00
										 |  |  |     _TEST = { | 
					
						
							|  |  |  |         u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', | 
					
						
							|  |  |  |         u'file': u'1509445.flv', | 
					
						
							|  |  |  |         u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa', | 
					
						
							|  |  |  |         u'info_dict': { | 
					
						
							|  |  |  |             u"upload_date": u"20121014",  | 
					
						
							|  |  |  |             u"uploader_id": u"Ruseful2011",  | 
					
						
							|  |  |  |             u"title": u"FemaleAgent Shy beauty takes the bait" | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2013-06-23 22:32:44 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self,url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         video_id = mobj.group('id') | 
					
						
							|  |  |  |         mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id | 
					
						
							|  |  |  |         webpage = self._download_webpage(mrss_url, video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage) | 
					
						
							|  |  |  |         if mobj is None: | 
					
						
							|  |  |  |             raise ExtractorError(u'Unable to extract media URL') | 
					
						
							|  |  |  |         if len(mobj.group('server')) == 0: | 
					
						
							|  |  |  |             video_url = compat_urllib_parse.unquote(mobj.group('file')) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             video_url = mobj.group('server')+'/key='+mobj.group('file') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', | 
					
						
							|  |  |  |             webpage, u'title') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-08-23 16:40:20 +02:00
										 |  |  |         # Only a few videos have an description | 
					
						
							|  |  |  |         mobj = re.search('<span>Description: </span>(?P<description>[^<]+)', webpage) | 
					
						
							|  |  |  |         if mobj: | 
					
						
							|  |  |  |             video_description = unescapeHTML(mobj.group('description')) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             video_description = None | 
					
						
							| 
									
										
										
										
											2013-06-23 22:32:44 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage) | 
					
						
							|  |  |  |         if mobj: | 
					
						
							|  |  |  |             video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d') | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             video_upload_date = None | 
					
						
							|  |  |  |             self._downloader.report_warning(u'Unable to extract upload date') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)', | 
					
						
							|  |  |  |             webpage, u'uploader id', default=u'anonymous') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'', | 
					
						
							|  |  |  |             webpage, u'thumbnail', fatal=False) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return [{ | 
					
						
							|  |  |  |             'id':       video_id, | 
					
						
							|  |  |  |             'url':      video_url, | 
					
						
							| 
									
										
										
										
											2013-08-23 17:23:34 +02:00
										 |  |  |             'ext':      determine_ext(video_url), | 
					
						
							| 
									
										
										
										
											2013-06-23 22:32:44 +02:00
										 |  |  |             'title':    video_title, | 
					
						
							| 
									
										
										
										
											2013-08-23 16:40:20 +02:00
										 |  |  |             'description': video_description, | 
					
						
							| 
									
										
										
										
											2013-06-23 22:32:44 +02:00
										 |  |  |             'upload_date': video_upload_date, | 
					
						
							|  |  |  |             'uploader_id': video_uploader_id, | 
					
						
							|  |  |  |             'thumbnail': video_thumbnail | 
					
						
							|  |  |  |         }] |