2014-06-21 16:31:18 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# encoding: utf-8  
						 
					
						
							
								
									
										
										
										
											2014-01-31 14:00:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  __future__  import  unicode_literals  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 22:22:08 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								import  re  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  . common  import  InfoExtractor  
						 
					
						
							
								
									
										
										
										
											2015-01-26 00:34:31 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  . . compat  import  compat_urlparse  
						 
					
						
							
								
									
										
										
										
											2014-11-16 00:51:31 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  . spiegeltv  import  SpiegeltvIE  
						 
					
						
							
								
									
										
										
										
											2013-06-23 22:22:08 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  SpiegelIE ( InfoExtractor ) :  
						 
					
						
							
								
									
										
										
										
											2014-11-13 15:02:31 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    _VALID_URL  =  r ' https?://(?:www \ .)?spiegel \ .de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?: \ .html)?(?:#.*)?$ ' 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-16 01:33:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    _TESTS  =  [ { 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-31 14:00:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' md5 ' :  ' 2c2754212136f35fb4b19767d242f66e ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-21 16:31:18 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' id ' :  ' 1259285 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-31 14:00:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' title ' :  ' Vulkanausbruch in Ecuador: Der  " Feuerschlund "  ist wieder aktiv ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-21 16:31:18 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' description ' :  ' md5:8029d8310232196eb235d27575a8b9f4 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' duration ' :  49 , 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-31 14:00:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-21 16:31:18 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-31 14:00:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' md5 ' :  ' f2cdf638d7aa47654e251e1aee360af1 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-21 16:31:18 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' id ' :  ' 1309159 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-31 14:00:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' title ' :  ' Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-21 16:31:18 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' description ' :  ' md5:c2322b65e58f385a820c10fa03b2d088 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' duration ' :  983 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
									
										
										
										
											2014-11-13 15:02:31 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' md5 ' :  ' d8eeca6bfc8f1cd6f490eb1f44695d51 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' 1519126 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  ' SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen. ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ' Fragen an Astronaut Alexander Gerst:  " Bekommen Sie die Tageszeiten mit? " ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-16 01:33:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ] 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 22:22:08 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _real_extract ( self ,  url ) : 
							 
						 
					
						
							
								
									
										
										
										
											2014-11-13 14:45:17 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        video_id  =  self . _match_id ( url ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-11-16 00:51:31 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        webpage ,  handle  =  self . _download_webpage_handle ( url ,  video_id ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  SpiegeltvIE . suitable ( handle . geturl ( ) ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  self . url_result ( handle . geturl ( ) ,  ' Spiegeltv ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 22:22:08 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-11-13 15:02:31 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        title  =  re . sub ( r ' \ s+ ' ,  '   ' ,  self . _html_search_regex ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            r ' (?s)<(?:h1|div) class= " module-title " [^>]*>(.*?)</(?:h1|div)> ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            webpage ,  ' title ' ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-21 16:31:18 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        description  =  self . _html_search_meta ( ' description ' ,  webpage ,  ' description ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 22:22:08 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-06-21 01:00:48 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        base_url  =  self . _search_regex ( 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-21 16:31:18 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            r ' var \ s+server \ s*= \ s* " ([^ " ]+) \ " ' ,  webpage ,  ' server URL ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-21 01:00:48 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        xml_url  =  base_url  +  video_id  +  ' .xml ' 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-21 16:31:18 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        idoc  =  self . _download_xml ( xml_url ,  video_id ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 22:22:08 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-01-14 20:27:14 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        formats  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  n  in  list ( idoc ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  n . tag . startswith ( ' type ' )  and  n . tag  !=  ' type6 ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                format_id  =  n . tag . rpartition ( ' type ' ) [ 2 ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                video_url  =  base_url  +  n . find ( ' ./filename ' ) . text 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                formats . append ( { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' format_id ' :  format_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' url ' :  video_url , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' width ' :  int ( n . find ( ' ./width ' ) . text ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' height ' :  int ( n . find ( ' ./height ' ) . text ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' abr ' :  int ( n . find ( ' ./audiobitrate ' ) . text ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' vbr ' :  int ( n . find ( ' ./videobitrate ' ) . text ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' vcodec ' :  n . find ( ' ./codec ' ) . text , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' acodec ' :  ' MP4A ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                } ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-16 01:33:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        duration  =  float ( idoc [ 0 ] . findall ( ' ./duration ' ) [ 0 ] . text ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-01-26 00:34:31 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        self . _check_formats ( formats ,  video_id ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-12-24 12:40:23 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        self . _sort_formats ( formats ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-01-31 14:00:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        return  { 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 22:22:08 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            ' id ' :  video_id , 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-21 16:31:18 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' title ' :  title , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  description , 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 22:22:08 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            ' duration ' :  duration , 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-16 01:33:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' formats ' :  formats , 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 22:22:08 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2014-09-13 06:55:38 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  SpiegelArticleIE ( InfoExtractor ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    _VALID_URL  =  ' https?://www \ .spiegel \ .de/(?!video/)[^?#]*?-(?P<id>[0-9]+) \ .html ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    IE_NAME  =  ' Spiegel:Article ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    IE_DESC  =  ' Articles on spiegel.de ' 
							 
						 
					
						
							
								
									
										
										
										
											2014-11-13 15:02:31 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    _TESTS  =  [ { 
							 
						 
					
						
							
								
									
										
										
										
											2014-09-13 06:55:38 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' 1516455 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ' Faszination Badminton: Nennt es bloß nicht Federball ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  ' re:^Patrick Kämnitz gehört. { 100,} ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
									
										
										
										
											2014-11-13 15:02:31 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' playlist_count ' :  6 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } ] 
							 
						 
					
						
							
								
									
										
										
										
											2014-09-13 06:55:38 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _real_extract ( self ,  url ) : 
							 
						 
					
						
							
								
									
										
										
										
											2014-11-13 14:45:17 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        video_id  =  self . _match_id ( url ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-09-13 06:55:38 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        webpage  =  self . _download_webpage ( url ,  video_id ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-11-13 15:02:31 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # Single video on top of the page 
							 
						 
					
						
							
								
									
										
										
										
											2014-09-13 06:55:38 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        video_link  =  self . _search_regex ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            r ' <a href= " ([^ " ]+) "  onclick= " return spOpenVideo \ (this, ' ,  webpage , 
							 
						 
					
						
							
								
									
										
										
										
											2014-11-13 15:02:31 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' video page URL ' ,  default = None ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  video_link : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            video_url  =  compat_urlparse . urljoin ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                self . http_scheme ( )  +  ' //spiegel.de/ ' ,  video_link ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  self . url_result ( video_url ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # Multiple embedded videos 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        embeds  =  re . findall ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            r ' <div class= " vid_holder[0-9]+.*?</div> \ s*.*?url \ s*= \ s* " ([^ " ]+) " ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            webpage ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        entries  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            self . url_result ( compat_urlparse . urljoin ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                self . http_scheme ( )  +  ' //spiegel.de/ ' ,  embed_path ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            for  embed_path  in  embeds 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  self . playlist_result ( entries )