2014-02-09 14:22:56 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  __future__  import  unicode_literals  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-09-10 11:19:58 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								import  re  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import  json  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  . common  import  InfoExtractor  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  . . utils  import  (  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    compat_urlparse , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ExtractorError , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								)  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  SlideshareIE ( InfoExtractor ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    _VALID_URL  =  r ' https?://www \ .slideshare \ .net/[^/]+?/(?P<title>.+?)($| \ ?) ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    _TEST  =  { 
							 
						 
					
						
							
								
									
										
										
										
											2014-02-09 14:22:56 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' 25665706 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ' Managing Scale and Complexity ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  ' This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix. ' , 
							 
						 
					
						
							
								
									
										
										
										
											2013-09-10 11:19:58 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _real_extract ( self ,  url ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        mobj  =  re . match ( self . _VALID_URL ,  url ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        page_title  =  mobj . group ( ' title ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        webpage  =  self . _download_webpage ( url ,  page_title ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        slideshare_obj  =  self . _search_regex ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            r ' var slideshare_object =  ( { .*?}); var user_info = ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-02-09 14:22:56 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            webpage ,  ' slideshare object ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-09-10 11:19:58 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        info  =  json . loads ( slideshare_obj ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-02-09 14:22:56 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        if  info [ ' slideshow ' ] [ ' type ' ]  !=  ' video ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            raise  ExtractorError ( ' Webpage type is  " %s " : only video extraction is supported for Slideshare '  %  info [ ' slideshow ' ] [ ' type ' ] ,  expected = True ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-09-10 11:19:58 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        doc  =  info [ ' doc ' ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        bucket  =  info [ ' jsplayer ' ] [ ' video_bucket ' ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ext  =  info [ ' jsplayer ' ] [ ' video_extension ' ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        video_url  =  compat_urlparse . urljoin ( bucket ,  doc  +  ' -SD. '  +  ext ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-02-09 14:22:56 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        description  =  self . _html_search_regex ( 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-11 02:19:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            r ' <p \ s+(?:style= " [^ " ]* " \ s+)?class= " description.*? " [^>]*>(.*?)</p> ' ,  webpage , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' ,  fatal = False ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-09-10 11:19:58 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' _type ' :  ' video ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  info [ ' slideshow ' ] [ ' id ' ] , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  info [ ' slideshow ' ] [ ' title ' ] , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ext , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' url ' :  video_url , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' thumbnail ' :  info [ ' slideshow ' ] [ ' pin_image_url ' ] , 
							 
						 
					
						
							
								
									
										
										
										
											2014-02-09 14:22:56 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' description ' :  description , 
							 
						 
					
						
							
								
									
										
										
										
											2013-09-10 11:19:58 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        }