2014-01-05 04:30:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  __future__  import  unicode_literals  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-06-26 17:55:54 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								import  re  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  . common  import  InfoExtractor  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  . . utils  import  (  
						 
					
						
							
								
									
										
										
										
											2014-04-03 05:56:28 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    int_or_none , 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-05 04:30:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    unescapeHTML , 
							 
						 
					
						
							
								
									
										
										
										
											2014-02-02 18:24:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    find_xpath_attr , 
							 
						 
					
						
							
								
									
										
										
										
											2015-04-21 03:18:38 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    smuggle_url , 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-26 17:55:54 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								)  
						 
					
						
							
								
									
										
										
										
											2015-04-21 03:18:38 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  . senateisvp  import  SenateISVPIE  
						 
					
						
							
								
									
										
										
										
											2013-06-26 17:55:54 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-01-05 04:30:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-06-26 17:55:54 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								class  CSpanIE ( InfoExtractor ) :  
						 
					
						
							
								
									
										
										
										
											2014-03-21 02:10:24 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    _VALID_URL  =  r ' http://(?:www \ .)?c-span \ .org/video/ \ ?(?P<id>[0-9a-f]+) ' 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-05 04:30:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    IE_DESC  =  ' C-SPAN ' 
							 
						 
					
						
							
								
									
										
										
										
											2014-03-21 02:10:24 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    _TESTS  =  [ { 
							 
						 
					
						
							
								
									
										
										
										
											2014-02-02 18:24:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.c-span.org/video/?313572-1/HolderonV ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-05 04:30:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' md5 ' :  ' 8e44ce11f0f725527daccc453f553eb0 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
									
										
										
										
											2014-02-02 18:24:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' id ' :  ' 315139 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-05 04:30:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' title ' :  ' Attorney General Eric Holder on Voting Rights Act Decision ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-02-02 18:24:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' description ' :  ' Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review. ' , 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-27 20:46:46 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-22 15:10:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' skip ' :  ' Regularly fails on travis, for unknown reasons ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-03-21 02:10:24 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.c-span.org/video/?c4486943/cspan-international-health-care-models ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-03-21 08:01:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        # For whatever reason, the served video alternates between 
							 
						 
					
						
							
								
									
										
										
										
											2014-03-21 02:10:24 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        # two different ones 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' 340723 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ' International Health Care Models ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  ' md5:7a985a2d595dba00af3d9c9f0783c967 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-28 00:58:24 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall ' , 
							 
						 
					
						
							
								
									
										
										
										
											2015-04-21 03:30:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' md5 ' :  ' 446562a736c6bf97118e389433ed88d4 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-28 00:58:24 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' 342759 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2015-04-21 03:30:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-28 00:58:24 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' title ' :  ' General Motors Ignition Switch Recall ' , 
							 
						 
					
						
							
								
									
										
										
										
											2015-04-21 03:30:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' duration ' :  14848 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  ' md5:70c7c3b8fa63fa60d42772440596034c ' 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-28 00:58:24 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
									
										
										
										
											2015-04-21 03:18:38 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # Video from senate.gov 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' judiciary031715 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' flv ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ' Immigration Reforms Needed to Protect Skilled American Workers ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2014-03-21 02:10:24 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ] 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-26 17:55:54 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _real_extract ( self ,  url ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        mobj  =  re . match ( self . _VALID_URL ,  url ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-02-02 18:24:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        page_id  =  mobj . group ( ' id ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        webpage  =  self . _download_webpage ( url ,  page_id ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-03-21 02:10:24 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        video_id  =  self . _search_regex ( r ' progid= \' ?([0-9]+) \' ?> ' ,  webpage ,  ' video id ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-02-02 18:24:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        description  =  self . _html_search_regex ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                # The full description 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                r ' <div class= \' expandable \' >(.*?)<a href= \' # \' ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                # If the description is small enough the other div is not 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                # present, otherwise this is a stripped version 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                r ' <p class= \' initial \' >(.*?)</p> ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ] , 
							 
						 
					
						
							
								
									
										
										
										
											2015-04-21 03:18:38 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            webpage ,  ' description ' ,  flags = re . DOTALL ,  default = None ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-05 04:30:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        info_url  =  ' http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id= '  +  video_id 
							 
						 
					
						
							
								
									
										
										
										
											2014-02-02 18:24:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        data  =  self . _download_json ( info_url ,  video_id ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-01-05 04:30:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-04-03 05:56:28 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        doc  =  self . _download_xml ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' http://www.c-span.org/common/services/flashXml.php?programid= '  +  video_id , 
							 
						 
					
						
							
								
									
										
										
										
											2014-02-02 18:24:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            video_id ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-04-03 05:56:28 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        title  =  find_xpath_attr ( doc ,  ' .//string ' ,  ' name ' ,  ' title ' ) . text 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        thumbnail  =  find_xpath_attr ( doc ,  ' .//string ' ,  ' name ' ,  ' poster ' ) . text 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-04-21 03:18:38 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        senate_isvp_url  =  SenateISVPIE . _search_iframe_url ( webpage ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  senate_isvp_url : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            surl  =  smuggle_url ( senate_isvp_url ,  { ' force_title ' :  title } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  self . url_result ( surl ,  ' SenateISVP ' ,  video_id ,  title ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-04-03 05:56:28 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        files  =  data [ ' video ' ] [ ' files ' ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        entries  =  [ { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' %s _ %d '  %  ( video_id ,  partnum  +  1 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                title  if  len ( files )  ==  1  else 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' %s  part  %d '  %  ( title ,  partnum  +  1 ) ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' url ' :  unescapeHTML ( f [ ' path ' ] [ ' #text ' ] ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  description , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' thumbnail ' :  thumbnail , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' duration ' :  int_or_none ( f . get ( ' length ' ,  { } ) . get ( ' #text ' ) ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        }  for  partnum ,  f  in  enumerate ( files ) ] 
							 
						 
					
						
							
								
									
										
										
										
											2014-02-02 18:24:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-04-21 03:30:54 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        if  len ( entries )  ==  1 : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            entry  =  dict ( entries [ 0 ] ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            entry [ ' id ' ]  =  video_id 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  entry 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' _type ' :  ' playlist ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' entries ' :  entries , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' title ' :  title , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' id ' :  video_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            }