2014-01-27 18:40:10 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# coding: utf-8 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								from  __future__  import  unicode_literals 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 20:24:07 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								import  re 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								from  . common  import  InfoExtractor 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								from  . . utils  import  ( 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-27 18:40:10 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    determine_ext , 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 20:24:07 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								    ExtractorError , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-01-27 18:40:10 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 20:24:07 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								class  ARDIE ( InfoExtractor ) : 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-27 18:40:10 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    _VALID_URL  =  r ' ^https?://(?:(?:www \ .)?ardmediathek \ .de|mediathek \ .daserste \ .de)/(?:.*/)(?P<video_id>[^/ \ ?]+)(?: \ ?.*)? ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-06-27 20:46:46 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    _TEST  =  { 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-27 18:40:10 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        ' url ' :  ' http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        ' file ' :  ' 19288786.mp4 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        ' md5 ' :  ' 515bf47ce209fb3f5a61b7aad364634c ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            ' title ' :  ' Edward Snowden im Interview - Held oder Verräter? ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            ' description ' :  ' Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite  \xdc berwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend. ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            ' thumbnail ' :  ' http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-27 20:46:46 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        } , 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-27 18:40:10 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        ' skip ' :  ' Blocked outside of Germany ' , 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-27 20:46:46 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 20:24:07 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    def  _real_extract ( self ,  url ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        # determine video id from url 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        m  =  re . match ( self . _VALID_URL ,  url ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        numid  =  re . search ( r ' documentId=([0-9]+) ' ,  url ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  numid : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            video_id  =  numid . group ( 1 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            video_id  =  m . group ( ' video_id ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-01-27 18:40:10 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        webpage  =  self . _download_webpage ( url ,  video_id ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        title  =  self . _html_search_regex ( 
							 
						 
					
						
							
								
									
										
										
										
											2014-05-30 04:59:18 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            [ r ' <h1(?: \ s+class= " boxTopHeadline " )?>(.*?)</h1> ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-03 21:56:49 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								             r ' <meta name= " dcterms.title "  content= " (.*?) " /> ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-05-30 04:59:18 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								             r ' <h4 class= " headline " >(.*?)</h4> ' ] , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            webpage ,  ' title ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-27 18:40:10 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        description  =  self . _html_search_meta ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            ' dcterms.abstract ' ,  webpage ,  ' description ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        thumbnail  =  self . _og_search_thumbnail ( webpage ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-06-03 21:56:49 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        media_info  =  self . _download_json ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            ' http://www.ardmediathek.de/play/media/ %s '  %  video_id ,  video_id ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        # The second element of the _mediaArray contains the standard http urls 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        streams  =  media_info [ ' _mediaArray ' ] [ 1 ] [ ' _mediaStreamArray ' ] 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 20:24:07 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								        if  not  streams : 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-27 18:40:10 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            if  ' " fsk " '  in  webpage : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                raise  ExtractorError ( ' This video is only available after 20:00 ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        formats  =  [ ] 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-16 16:17:49 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-01-27 18:40:10 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        for  s  in  streams : 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-16 16:17:49 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            if  type ( s [ ' _stream ' ] )  ==  list : 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-16 18:14:58 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								                for  index ,  url  in  enumerate ( s [ ' _stream ' ] [ : : - 1 ] ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                    quality  =  s [ ' _quality ' ]  +  index 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-16 16:17:49 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								                    formats . append ( { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                        ' quality ' :  quality , 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-16 18:14:58 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								                        ' url ' :  url , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                        ' format_id ' :  ' %s - %s '  %  ( determine_ext ( url ) ,  quality ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-16 16:17:49 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								                        } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                continue 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-01-27 18:40:10 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            format  =  { 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-03 21:56:49 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								                ' quality ' :  s [ ' _quality ' ] , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                ' url ' :  s [ ' _stream ' ] , 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-27 18:40:10 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            } 
							 
						 
					
						
							
								
									
										
										
										
											2014-06-03 21:56:49 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            format [ ' format_id ' ]  =  ' %s - %s '  %  ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                determine_ext ( format [ ' url ' ] ) ,  format [ ' quality ' ] ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-27 18:40:10 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            formats . append ( format ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        self . _sort_formats ( formats ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            ' id ' :  video_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            ' title ' :  title , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            ' description ' :  description , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            ' formats ' :  formats , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            ' thumbnail ' :  thumbnail , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        }