2014-03-21 00:53:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  __future__  import  unicode_literals  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								import  re  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-05-02 00:08:38 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  . . compat  import  (  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    compat_urlparse , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    compat_urllib_request , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								)  
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								from  . . utils  import  (  
						 
					
						
							
								
									
										
										
										
											2013-11-24 15:28:33 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    ExtractorError , 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-25 06:06:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    unescapeHTML , 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    unified_strdate , 
							 
						 
					
						
							
								
									
										
										
										
											2014-03-21 00:59:51 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    US_RATINGS , 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-02 01:19:06 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    determine_ext , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    mimetype2ext , 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								)  
						 
					
						
							
								
									
										
										
										
											2015-02-18 20:37:16 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  . common  import  InfoExtractor  
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-02-18 20:37:16 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  VikiIE ( InfoExtractor ) :  
						 
					
						
							
								
									
										
										
										
											2014-03-21 00:53:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    IE_NAME  =  ' viki ' 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-05-02 00:08:38 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    # iPad2 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    _USER_AGENT  =  ' Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5 ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    _VALID_URL  =  r ' ^https?://(?:www \ .)?viki \ .com/videos/(?P<id>[0-9]+v) ' 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-02 00:08:38 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    _TESTS  =  [ { 
							 
						 
					
						
							
								
									
										
										
										
											2014-03-21 00:53:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.viki.com/videos/1023585v-heirs-episode-14 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' 1023585v ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ' Heirs Episode 14 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' uploader ' :  ' SBS ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  ' md5:c4b17b9626dd4b143dcc4d855ba3474e ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' upload_date ' :  ' 20131121 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' age_limit ' :  13 , 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 15:28:33 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
									
										
										
										
											2014-03-21 00:53:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' skip ' :  ' Blocked in the US ' , 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-02 00:08:38 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' md5 ' :  ' ca6493e6f0a6ec07da9aa8d6304b4b2c ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' 1067139v ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  ' md5:d70b2f9428f5488321bfe1db10d612ea ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' upload_date ' :  ' 20150430 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ' \' The Avengers: Age of Ultron \'  Press Conference ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-02 01:19:06 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.viki.com/videos/1048879v-ankhon-dekhi ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' 1048879v ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' upload_date ' :  ' 20140820 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  ' md5:54ff56d51bdfc7a30441ec967394e91c ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ' Ankhon Dekhi ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' params ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            # requires ffmpeg 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' skip_download ' :  True , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-02 00:08:38 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ] 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _real_extract ( self ,  url ) : 
							 
						 
					
						
							
								
									
										
										
										
											2015-01-07 07:21:24 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        video_id  =  self . _match_id ( url ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        webpage  =  self . _download_webpage ( url ,  video_id ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        title  =  self . _og_search_title ( webpage ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        description  =  self . _og_search_description ( webpage ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        thumbnail  =  self . _og_search_thumbnail ( webpage ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-11-25 02:02:34 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        uploader_m  =  re . search ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            r ' <strong>Broadcast Network: </strong> \ s*([^<]*)< ' ,  webpage ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  uploader_m  is  None : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            uploader  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        else : 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-25 05:57:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            uploader  =  uploader_m . group ( 1 ) . strip ( ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        rating_str  =  self . _html_search_regex ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            r ' <strong>Rating: </strong> \ s*([^<]*)< ' ,  webpage , 
							 
						 
					
						
							
								
									
										
										
										
											2014-03-21 00:53:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' rating information ' ,  default = ' ' ) . strip ( ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-03-21 00:59:51 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        age_limit  =  US_RATINGS . get ( rating_str ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-05-02 00:08:38 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        req  =  compat_urllib_request . Request ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' http://www.viki.com/player5_fragment/ %s ?action=show&controller=videos '  %  video_id ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        req . add_header ( ' User-Agent ' ,  self . _USER_AGENT ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 15:20:16 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        info_webpage  =  self . _download_webpage ( 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-02 00:08:38 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            req ,  video_id ,  note = ' Downloading info page ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-02 00:32:46 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        err_msg  =  self . _html_search_regex ( r ' <div[^>]+class= " video-error[^>]+>(.+)</div> ' ,  info_webpage ,  ' error message ' ,  default = None ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  err_msg : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  ' not available in your region '  in  err_msg : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                raise  ExtractorError ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' Video  %s  is blocked from your location. '  %  video_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    expected = True ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                raise  ExtractorError ( ' Viki said:  '  +  err_msg ) 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-02 01:19:06 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        mobj  =  re . search ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            r ' <source[^>]+type= " (?P<mime_type>[^ " ]+) " [^>]+src= " (?P<url>[^ " ]+) " ' ,  info_webpage ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  not  mobj : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            raise  ExtractorError ( ' Unable to find video URL ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        video_url  =  unescapeHTML ( mobj . group ( ' url ' ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        video_ext  =  mimetype2ext ( mobj . group ( ' mime_type ' ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  determine_ext ( video_url )  ==  ' m3u8 ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            formats  =  self . _extract_m3u8_formats ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                video_url ,  video_id ,  ext = video_ext ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            formats  =  [ { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' url ' :  video_url , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' ext ' :  video_ext , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } ] 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        upload_date_str  =  self . _html_search_regex ( 
							 
						 
					
						
							
								
									
										
										
										
											2014-03-21 00:53:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            r ' " created_at " : " ([^ " ]+) " ' ,  info_webpage ,  ' upload date ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        upload_date  =  ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            unified_strdate ( upload_date_str ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  upload_date_str  is  not  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            else  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # subtitles 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        video_subtitles  =  self . extract_subtitles ( video_id ,  info_webpage ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  video_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  title , 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-02 01:19:06 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' formats ' :  formats , 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            ' description ' :  description , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' thumbnail ' :  thumbnail , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' age_limit ' :  age_limit , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' uploader ' :  uploader , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' subtitles ' :  video_subtitles , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' upload_date ' :  upload_date , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-02-18 20:37:16 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  _get_subtitles ( self ,  video_id ,  info_webpage ) : 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        res  =  { } 
							 
						 
					
						
							
								
									
										
										
										
											2015-02-18 20:37:16 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        for  sturl_html  in  re . findall ( r ' <track src= " ([^ " ]+) " ' ,  info_webpage ) : 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-25 06:06:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            sturl  =  unescapeHTML ( sturl_html ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            m  =  re . search ( r ' /(?P<lang>[a-z]+) \ .vtt ' ,  sturl ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  not  m : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                continue 
							 
						 
					
						
							
								
									
										
										
										
											2015-02-18 20:37:16 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            res [ m . group ( ' lang ' ) ]  =  [ { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' url ' :  compat_urlparse . urljoin ( ' http://www.viki.com ' ,  sturl ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' ext ' :  ' vtt ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } ] 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-24 07:30:05 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        return  res