2013-10-12 21:34:04 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								import  re  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  . common  import  InfoExtractor  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  . . utils  import  (  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    compat_urlparse , 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 22:21:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    compat_urllib_parse , 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 21:34:04 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    xpath_with_ns , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								)  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  InternetVideoArchiveIE ( InfoExtractor ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    _VALID_URL  =  r ' https?://video \ .internetvideoarchive \ .net/flash/players/.*? \ ?.*?publishedid.*? ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    _TEST  =  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        u ' url ' :  u ' http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        u ' file ' :  u ' 452693.mp4 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        u ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            u ' title ' :  u ' SKYFALL ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            u ' description ' :  u ' In SKYFALL, Bond \' s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost. ' , 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-21 15:07:33 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            u ' duration ' :  153 , 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 21:34:04 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    @staticmethod 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _build_url ( query ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  ' http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx? '  +  query 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 22:21:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    @staticmethod 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _clean_query ( query ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        NEEDED_ARGS  =  [ ' publishedid ' ,  ' customerid ' ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        query_dic  =  compat_urlparse . parse_qs ( query ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        cleaned_dic  =  dict ( ( k , v [ 0 ] )  for  ( k , v )  in  query_dic . items ( )  if  k  in  NEEDED_ARGS ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # Other player ids return m3u8 urls 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        cleaned_dic [ ' playerid ' ]  =  ' 247 ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        cleaned_dic [ ' videokbrate ' ]  =  ' 100000 ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  compat_urllib_parse . urlencode ( cleaned_dic ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 21:34:04 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    def  _real_extract ( self ,  url ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        query  =  compat_urlparse . urlparse ( url ) . query 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        query_dic  =  compat_urlparse . parse_qs ( query ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        video_id  =  query_dic [ ' publishedid ' ] [ 0 ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        url  =  self . _build_url ( query ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-11-26 18:48:52 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        flashconfiguration  =  self . _download_xml ( url ,  video_id , 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 21:34:04 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            u ' Downloading flash configuration ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        file_url  =  flashconfiguration . find ( ' file ' ) . text 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        file_url  =  file_url . replace ( ' /playlist.aspx ' ,  ' /mrssplaylist.aspx ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 22:21:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        # Replace some of the parameters in the query to get the best quality 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # and http links (no m3u8 manifests) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        file_url  =  re . sub ( r ' (?<= \ ?)(.+)$ ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            lambda  m :  self . _clean_query ( m . group ( ) ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            file_url ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-26 18:48:52 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        info  =  self . _download_xml ( file_url ,  video_id , 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 21:34:04 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            u ' Downloading video info ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        item  =  info . find ( ' channel/item ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        def  _bp ( p ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  xpath_with_ns ( p , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                { ' media ' :  ' http://search.yahoo.com/mrss/ ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' jwplayer ' :  ' http://developer.longtailvideo.com/trac/wiki/FlashFormats ' } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        formats  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  content  in  item . findall ( _bp ( ' media:group/media:content ' ) ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            attr  =  content . attrib 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            f_url  =  attr [ ' url ' ] 
							 
						 
					
						
							
								
									
										
										
										
											2013-12-26 21:08:52 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            width  =  int ( attr [ ' width ' ] ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            bitrate  =  int ( attr [ ' bitrate ' ] ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            format_id  =  ' %d - %d k '  %  ( width ,  bitrate ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 21:34:04 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            formats . append ( { 
							 
						 
					
						
							
								
									
										
										
										
											2013-12-26 21:08:52 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                ' format_id ' :  format_id , 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 21:34:04 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								                ' url ' :  f_url , 
							 
						 
					
						
							
								
									
										
										
										
											2013-12-26 21:08:52 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                ' width ' :  width , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' tbr ' :  bitrate , 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 21:34:04 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            } ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-12-26 21:08:52 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . _sort_formats ( formats ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 21:34:04 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-10-21 15:07:33 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        return  { 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 21:34:04 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            ' id ' :  video_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  item . find ( ' title ' ) . text , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' formats ' :  formats , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' thumbnail ' :  item . find ( _bp ( ' media:thumbnail ' ) ) . attrib [ ' url ' ] , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  item . find ( ' description ' ) . text , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' duration ' :  int ( attr [ ' duration ' ] ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        }