| 
									
										
										
										
											2013-06-23 22:32:44 +02:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							|  |  |  |     compat_urllib_parse, | 
					
						
							| 
									
										
										
										
											2013-08-23 16:40:20 +02:00
										 |  |  |     unescapeHTML, | 
					
						
							| 
									
										
										
										
											2013-08-23 17:23:34 +02:00
										 |  |  |     determine_ext, | 
					
						
							| 
									
										
										
										
											2013-06-23 22:32:44 +02:00
										 |  |  |     ExtractorError, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class XHamsterIE(InfoExtractor): | 
					
						
							|  |  |  |     """Information Extractor for xHamster""" | 
					
						
							| 
									
										
										
										
											2013-09-17 06:24:20 +02:00
										 |  |  |     _VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?' | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2013-06-27 20:46:46 +02:00
										 |  |  |         u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', | 
					
						
							|  |  |  |         u'file': u'1509445.flv', | 
					
						
							|  |  |  |         u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa', | 
					
						
							|  |  |  |         u'info_dict': { | 
					
						
							|  |  |  |             u"upload_date": u"20121014",  | 
					
						
							|  |  |  |             u"uploader_id": u"Ruseful2011",  | 
					
						
							| 
									
										
										
										
											2013-10-19 21:09:48 +02:00
										 |  |  |             u"title": u"FemaleAgent Shy beauty takes the bait", | 
					
						
							|  |  |  |             u"age_limit": 18, | 
					
						
							| 
									
										
										
										
											2013-06-27 20:46:46 +02:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2013-09-17 06:24:20 +02:00
										 |  |  |     }, | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', | 
					
						
							|  |  |  |         u'file': u'2221348.flv', | 
					
						
							| 
									
										
										
										
											2013-12-07 22:17:13 +01:00
										 |  |  |         u'md5': u'e767b9475de189320f691f49c679c4c7', | 
					
						
							| 
									
										
										
										
											2013-09-17 06:24:20 +02:00
										 |  |  |         u'info_dict': { | 
					
						
							| 
									
										
										
										
											2013-10-19 21:09:48 +02:00
										 |  |  |             u"upload_date": u"20130914", | 
					
						
							|  |  |  |             u"uploader_id": u"jojo747400", | 
					
						
							|  |  |  |             u"title": u"Britney Spears  Sexy Booty", | 
					
						
							|  |  |  |             u"age_limit": 18, | 
					
						
							| 
									
										
										
										
											2013-09-17 06:24:20 +02:00
										 |  |  |         } | 
					
						
							|  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2013-06-23 22:32:44 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self,url): | 
					
						
							| 
									
										
										
										
											2013-10-26 20:38:54 +02:00
										 |  |  |         def extract_video_url(webpage): | 
					
						
							|  |  |  |             mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage) | 
					
						
							|  |  |  |             if mobj is None: | 
					
						
							|  |  |  |                 raise ExtractorError(u'Unable to extract media URL') | 
					
						
							|  |  |  |             if len(mobj.group('server')) == 0: | 
					
						
							|  |  |  |                 return compat_urllib_parse.unquote(mobj.group('file')) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 return mobj.group('server')+'/key='+mobj.group('file') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def is_hd(webpage): | 
					
						
							| 
									
										
										
										
											2013-12-07 21:39:32 +01:00
										 |  |  |             return webpage.find('<div class=\'icon iconHD\'') != -1 | 
					
						
							| 
									
										
										
										
											2013-10-26 20:38:54 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-06-23 22:32:44 +02:00
										 |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         video_id = mobj.group('id') | 
					
						
							| 
									
										
										
										
											2013-09-17 06:24:20 +02:00
										 |  |  |         seo = mobj.group('seo') | 
					
						
							| 
									
										
										
										
											2013-10-26 20:38:54 +02:00
										 |  |  |         mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo) | 
					
						
							| 
									
										
										
										
											2013-06-23 22:32:44 +02:00
										 |  |  |         webpage = self._download_webpage(mrss_url, video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', | 
					
						
							|  |  |  |             webpage, u'title') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-08-23 16:40:20 +02:00
										 |  |  |         # Only a few videos have an description | 
					
						
							|  |  |  |         mobj = re.search('<span>Description: </span>(?P<description>[^<]+)', webpage) | 
					
						
							|  |  |  |         if mobj: | 
					
						
							|  |  |  |             video_description = unescapeHTML(mobj.group('description')) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             video_description = None | 
					
						
							| 
									
										
										
										
											2013-06-23 22:32:44 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage) | 
					
						
							|  |  |  |         if mobj: | 
					
						
							|  |  |  |             video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d') | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             video_upload_date = None | 
					
						
							|  |  |  |             self._downloader.report_warning(u'Unable to extract upload date') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)', | 
					
						
							|  |  |  |             webpage, u'uploader id', default=u'anonymous') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'', | 
					
						
							|  |  |  |             webpage, u'thumbnail', fatal=False) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-19 21:09:48 +02:00
										 |  |  |         age_limit = self._rta_search(webpage) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-26 20:38:54 +02:00
										 |  |  |         video_url = extract_video_url(webpage) | 
					
						
							|  |  |  |         hd = is_hd(webpage) | 
					
						
							|  |  |  |         formats = [{ | 
					
						
							|  |  |  |             'url': video_url, | 
					
						
							|  |  |  |             'ext': determine_ext(video_url), | 
					
						
							|  |  |  |             'format': 'hd' if hd else 'sd', | 
					
						
							|  |  |  |             'format_id': 'hd' if hd else 'sd', | 
					
						
							|  |  |  |         }] | 
					
						
							|  |  |  |         if not hd: | 
					
						
							|  |  |  |             webpage = self._download_webpage(mrss_url+'?hd', video_id) | 
					
						
							|  |  |  |             if is_hd(webpage): | 
					
						
							|  |  |  |                 video_url = extract_video_url(webpage) | 
					
						
							|  |  |  |                 formats.append({ | 
					
						
							|  |  |  |                     'url': video_url, | 
					
						
							|  |  |  |                     'ext': determine_ext(video_url), | 
					
						
							|  |  |  |                     'format': 'hd', | 
					
						
							|  |  |  |                     'format_id': 'hd', | 
					
						
							|  |  |  |                 }) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': video_title, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							| 
									
										
										
										
											2013-08-23 16:40:20 +02:00
										 |  |  |             'description': video_description, | 
					
						
							| 
									
										
										
										
											2013-06-23 22:32:44 +02:00
										 |  |  |             'upload_date': video_upload_date, | 
					
						
							|  |  |  |             'uploader_id': video_uploader_id, | 
					
						
							| 
									
										
										
										
											2013-10-19 21:09:48 +02:00
										 |  |  |             'thumbnail': video_thumbnail, | 
					
						
							|  |  |  |             'age_limit': age_limit, | 
					
						
							| 
									
										
										
										
											2013-10-26 20:38:54 +02:00
										 |  |  |         } |