2013-07-13 09:17:48 +05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# -*- coding: utf-8 -*-
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import re
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								from .common import InfoExtractor
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 12:26:05 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from ..utils import determine_ext
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 09:17:48 +05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class CriterionIE(InfoExtractor):
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 12:26:05 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    _VALID_URL = r'https?://www\.criterion\.com/films/(\d*)-.+'
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 09:18:03 +05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    _TEST = {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        u'url': u'http://www.criterion.com/films/184-le-samourai',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        u'file': u'184.mp4',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        u'md5': u'bc51beba55685509883a9a7830919ec3',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        u'info_dict': {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            u"title": u"Le Samouraï",
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 12:26:05 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            u"description" : u'md5:a2b4b116326558149bef81f76dcbb93f',
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 09:18:03 +05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 09:17:48 +05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def _real_extract(self, url):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        mobj = re.match(self._VALID_URL, url)
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 12:26:05 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        video_id = mobj.group(1)
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 09:17:48 +05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        webpage = self._download_webpage(url, video_id)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        final_url = self._search_regex(r'so.addVariable\("videoURL", "(.+?)"\)\;',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                webpage, 'video url')
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 12:26:05 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        title = self._html_search_regex(r'<meta content="(.+?)" property="og:title" />',
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 09:17:48 +05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                webpage, 'video title')
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 12:26:05 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        description = self._html_search_regex(r'<meta name="description" content="(.+?)" />',
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 09:17:48 +05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                webpage, 'video description')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        thumbnail = self._search_regex(r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                webpage, 'thumbnail url')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return {'id': video_id,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'url' : final_url,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'title': title,
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 12:26:05 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                'ext': determine_ext(final_url),
							 | 
						
					
						
							
								
									
										
										
										
											2013-07-13 09:17:48 +05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'description': description,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'thumbnail': thumbnail,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                }
							 |