2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								# coding: utf-8  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  __future__  import  unicode_literals  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import  re  
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								import  os . path  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								from  . common  import  InfoExtractor  
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  . . compat  import  compat_urlparse  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  . . utils  import  (  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    url_basename , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    remove_start , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								)  
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  DemocracynowIE ( InfoExtractor ) :  
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    _VALID_URL  =  r ' https?://(?:www \ .)?democracynow.org/(?P<id>[^ \ ?]*) ' 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    IE_NAME  =  ' democracynow ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    _TESTS  =  [ { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.democracynow.org/shows/2015/7/3 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-03 21:24:10 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' md5 ' :  ' fbb8fe3d7a56a5e12431ce2f9b2fab0d ' , 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' 2015-0703-001 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ' July 03, 2015 - Democracy Now! ' , 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' description ' :  ' A daily independent global news hour with Amy Goodman & Juan González  " What to the Slave is 4th of July? " : James Earl Jones Reads Frederick Douglass \u2019  Historic Speech :  " This Flag Comes Down Today " : Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag :  " We Shall Overcome " : Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs ' , 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-17 02:57:08 -05:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree ' , 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-03 21:24:10 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' md5 ' :  ' fbb8fe3d7a56a5e12431ce2f9b2fab0d ' , 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' 2015-0703-001 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ' " This Flag Comes Down Today " : Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  ' md5:4d2bc4f0d29f5553c2210a4bc7761a21 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _real_extract ( self ,  url ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        display_id  =  self . _match_id ( url ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        webpage  =  self . _download_webpage ( url ,  display_id ) 
							 
						 
					
						
							
								
									
										
										
										
											2015-10-31 22:21:52 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        description  =  self . _og_search_description ( webpage ) 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-11-04 00:09:55 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        json_data  =  self . _parse_json ( self . _search_regex ( 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            r ' <script[^>]+type= " text/json " [^>]*> \ s*( { [^>]+}) ' ,  webpage ,  ' json ' ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            display_id ) 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        video_id  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        formats  =  [ ] 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        default_lang  =  ' en ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        subtitles  =  { } 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        def  add_subtitle_item ( lang ,  info_dict ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  lang  not  in  subtitles : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                subtitles [ lang ]  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            subtitles [ lang ] . append ( info_dict ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # chapter_file are not subtitles 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-04 00:09:55 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        if  ' caption_file '  in  json_data : 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            add_subtitle_item ( default_lang ,  { 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-04 00:09:55 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                ' url ' :  compat_urlparse . urljoin ( url ,  json_data [ ' caption_file ' ] ) , 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-11-04 00:09:55 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        for  subtitle_item  in  json_data . get ( ' captions ' ,  [ ] ) : 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            lang  =  subtitle_item . get ( ' language ' ,  ' ' ) . lower ( )  or  default_lang 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            add_subtitle_item ( lang ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' url ' :  compat_urlparse . urljoin ( url ,  subtitle_item [ ' url ' ] ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-10-31 22:21:52 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        for  key  in  ( ' file ' ,  ' audio ' ,  ' video ' ) : 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-04 00:09:55 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            media_url  =  json_data . get ( key ,  ' ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            if  not  media_url : 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								                continue 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            media_url  =  re . sub ( r ' \ ?.* ' ,  ' ' ,  compat_urlparse . urljoin ( url ,  media_url ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            video_id  =  video_id  or  remove_start ( os . path . splitext ( url_basename ( media_url ) ) [ 0 ] ,  ' dn ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            formats . append ( { 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                ' url ' :  media_url , 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            } ) 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        self . _sort_formats ( formats ) 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-03 18:36:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  { 
							 
						 
					
						
							
								
									
										
										
										
											2015-11-04 00:13:00 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' id ' :  video_id  or  display_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  json_data [ ' title ' ] , 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-13 07:41:38 -05:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            ' description ' :  description , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' subtitles ' :  subtitles , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' formats ' :  formats , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        }