| 
									
										
										
										
											2013-07-13 09:17:48 +05:00
										 |  |  | # -*- coding: utf-8 -*- | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2013-07-13 12:26:05 +02:00
										 |  |  | from ..utils import determine_ext | 
					
						
							| 
									
										
										
										
											2013-07-13 09:17:48 +05:00
										 |  |  | 
 | 
					
						
							|  |  |  | class CriterionIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2013-07-13 12:26:05 +02:00
										 |  |  |     _VALID_URL = r'https?://www\.criterion\.com/films/(\d*)-.+' | 
					
						
							| 
									
										
										
										
											2013-07-13 09:18:03 +05:00
										 |  |  |     _TEST = { | 
					
						
							|  |  |  |         u'url': u'http://www.criterion.com/films/184-le-samourai', | 
					
						
							|  |  |  |         u'file': u'184.mp4', | 
					
						
							|  |  |  |         u'md5': u'bc51beba55685509883a9a7830919ec3', | 
					
						
							|  |  |  |         u'info_dict': { | 
					
						
							|  |  |  |             u"title": u"Le Samouraï", | 
					
						
							| 
									
										
										
										
											2013-07-13 12:26:05 +02:00
										 |  |  |             u"description" : u'md5:a2b4b116326558149bef81f76dcbb93f', | 
					
						
							| 
									
										
										
										
											2013-07-13 09:18:03 +05:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2013-07-13 09:17:48 +05:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							| 
									
										
										
										
											2013-07-13 12:26:05 +02:00
										 |  |  |         video_id = mobj.group(1) | 
					
						
							| 
									
										
										
										
											2013-07-13 09:17:48 +05:00
										 |  |  |         webpage = self._download_webpage(url, video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         final_url = self._search_regex(r'so.addVariable\("videoURL", "(.+?)"\)\;', | 
					
						
							|  |  |  |                                 webpage, 'video url') | 
					
						
							| 
									
										
										
										
											2013-07-13 12:26:05 +02:00
										 |  |  |         title = self._html_search_regex(r'<meta content="(.+?)" property="og:title" />', | 
					
						
							| 
									
										
										
										
											2013-07-13 09:17:48 +05:00
										 |  |  |                                 webpage, 'video title') | 
					
						
							| 
									
										
										
										
											2013-07-13 12:26:05 +02:00
										 |  |  |         description = self._html_search_regex(r'<meta name="description" content="(.+?)" />', | 
					
						
							| 
									
										
										
										
											2013-07-13 09:17:48 +05:00
										 |  |  |                                 webpage, 'video description') | 
					
						
							|  |  |  |         thumbnail = self._search_regex(r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', | 
					
						
							|  |  |  |                                 webpage, 'thumbnail url') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return {'id': video_id, | 
					
						
							|  |  |  |                 'url' : final_url, | 
					
						
							|  |  |  |                 'title': title, | 
					
						
							| 
									
										
										
										
											2013-07-13 12:26:05 +02:00
										 |  |  |                 'ext': determine_ext(final_url), | 
					
						
							| 
									
										
										
										
											2013-07-13 09:17:48 +05:00
										 |  |  |                 'description': description, | 
					
						
							|  |  |  |                 'thumbnail': thumbnail, | 
					
						
							|  |  |  |                 } |