2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								#!/usr/bin/env python  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# -*- coding: utf-8 -*-  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-08-28 12:57:10 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								import  datetime  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import  email . utils  
						 
					
						
							
								
									
										
										
										
											2013-05-13 09:20:08 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								import  errno  
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								import  gzip  
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:09:17 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								import  io  
						 
					
						
							
								
									
										
										
										
											2012-12-20 13:13:24 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								import  json  
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								import  locale  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import  os  
						 
					
						
							
								
									
										
										
										
											2013-10-12 13:49:27 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								import  pipes  
						 
					
						
							
								
									
										
										
										
											2013-08-28 12:57:10 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								import  platform  
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								import  re  
						 
					
						
							
								
									
										
										
										
											2013-08-28 12:57:10 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								import  socket  
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								import  sys  
						 
					
						
							
								
									
										
										
										
											2013-01-03 15:39:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								import  traceback  
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								import  zlib  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-27 23:54:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    import  urllib . request  as  compat_urllib_request 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-27 23:54:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								except  ImportError :  # Python 2  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    import  urllib2  as  compat_urllib_request 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-27 23:54:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    import  urllib . error  as  compat_urllib_error 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-27 23:54:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								except  ImportError :  # Python 2  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    import  urllib2  as  compat_urllib_error 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-27 23:54:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    import  urllib . parse  as  compat_urllib_parse 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-27 23:54:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								except  ImportError :  # Python 2  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    import  urllib  as  compat_urllib_parse 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-27 23:54:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 04:51:27 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    from  urllib . parse  import  urlparse  as  compat_urllib_parse_urlparse 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								except  ImportError :  # Python 2  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    from  urlparse  import  urlparse  as  compat_urllib_parse_urlparse 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-07-12 14:53:28 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    import  urllib . parse  as  compat_urlparse 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								except  ImportError :  # Python 2  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    import  urlparse  as  compat_urlparse 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-27 23:54:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    import  http . cookiejar  as  compat_cookiejar 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-27 23:54:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								except  ImportError :  # Python 2  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    import  cookielib  as  compat_cookiejar 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-27 23:54:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:02:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    import  html . entities  as  compat_html_entities 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:17:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								except  ImportError :  # Python 2  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    import  htmlentitydefs  as  compat_html_entities 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:02:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:06:28 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    import  html . parser  as  compat_html_parser 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:17:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								except  ImportError :  # Python 2  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    import  HTMLParser  as  compat_html_parser 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:06:28 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:13:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    import  http . client  as  compat_http_client 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:17:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								except  ImportError :  # Python 2  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    import  httplib  as  compat_http_client 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:13:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-08-28 04:25:38 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
									
										
										
										
											2013-08-28 10:18:39 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    from  urllib . error  import  HTTPError  as  compat_HTTPError 
							 
						 
					
						
							
								
									
										
										
										
											2013-08-28 04:25:38 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								except  ImportError :   # Python 2  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    from  urllib2  import  HTTPError  as  compat_HTTPError 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-09-21 14:19:30 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    from  urllib . request  import  urlretrieve  as  compat_urlretrieve 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								except  ImportError :   # Python 2  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    from  urllib  import  urlretrieve  as  compat_urlretrieve 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-12-16 12:29:03 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    from  subprocess  import  DEVNULL 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    compat_subprocess_get_DEVNULL  =  lambda :  DEVNULL 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								except  ImportError :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    compat_subprocess_get_DEVNULL  =  lambda :  open ( os . path . devnull ,  ' w ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:17:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    from  urllib . parse  import  parse_qs  as  compat_parse_qs 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:17:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								except  ImportError :  # Python 2  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # Python 2's version is apparently totally broken 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _unquote ( string ,  encoding = ' utf-8 ' ,  errors = ' replace ' ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  string  ==  ' ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  string 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        res  =  string . split ( ' % ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  len ( res )  ==  1 : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  string 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  encoding  is  None : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            encoding  =  ' utf-8 ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  errors  is  None : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            errors  =  ' replace ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # pct_sequence: contiguous sequence of percent-encoded bytes, decoded 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        pct_sequence  =  b ' ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        string  =  res [ 0 ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  item  in  res [ 1 : ] : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            try : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                if  not  item : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    raise  ValueError 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                pct_sequence  + =  item [ : 2 ] . decode ( ' hex ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                rest  =  item [ 2 : ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                if  not  rest : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    # This segment was just a single percent-encoded character. 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    # May be part of a sequence of code units, so delay decoding. 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    # (Stored in pct_sequence). 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    continue 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            except  ValueError : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                rest  =  ' % '  +  item 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            # Encountered non-percent-encoded characters. Flush the current 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            # pct_sequence. 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            string  + =  pct_sequence . decode ( encoding ,  errors )  +  rest 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            pct_sequence  =  b ' ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  pct_sequence : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            # Flush the final pct_sequence 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            string  + =  pct_sequence . decode ( encoding ,  errors ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  string 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _parse_qsl ( qs ,  keep_blank_values = False ,  strict_parsing = False , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                encoding = ' utf-8 ' ,  errors = ' replace ' ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        qs ,  _coerce_result  =  qs ,  unicode 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        pairs  =  [ s2  for  s1  in  qs . split ( ' & ' )  for  s2  in  s1 . split ( ' ; ' ) ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        r  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  name_value  in  pairs : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  not  name_value  and  not  strict_parsing : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                continue 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            nv  =  name_value . split ( ' = ' ,  1 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  len ( nv )  !=  2 : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                if  strict_parsing : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    raise  ValueError ( " bad query field:  %r "  %  ( name_value , ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                # Handle case of a control-name with no equal sign 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                if  keep_blank_values : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    nv . append ( ' ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    continue 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  len ( nv [ 1 ] )  or  keep_blank_values : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                name  =  nv [ 0 ] . replace ( ' + ' ,  '   ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                name  =  _unquote ( name ,  encoding = encoding ,  errors = errors ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                name  =  _coerce_result ( name ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                value  =  nv [ 1 ] . replace ( ' + ' ,  '   ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                value  =  _unquote ( value ,  encoding = encoding ,  errors = errors ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                value  =  _coerce_result ( value ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                r . append ( ( name ,  value ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  r 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  compat_parse_qs ( qs ,  keep_blank_values = False ,  strict_parsing = False , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                encoding = ' utf-8 ' ,  errors = ' replace ' ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        parsed_result  =  { } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        pairs  =  _parse_qsl ( qs ,  keep_blank_values ,  strict_parsing , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                        encoding = encoding ,  errors = errors ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  name ,  value  in  pairs : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  name  in  parsed_result : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                parsed_result [ name ] . append ( value ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                parsed_result [ name ]  =  [ value ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  parsed_result 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:13:00 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:02:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    compat_str  =  unicode  # Python 2 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:02:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								except  NameError :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    compat_str  =  str 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:02:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								try :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    compat_chr  =  unichr  # Python 2 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:02:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								except  NameError :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    compat_chr  =  chr 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:02:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-05-20 11:57:10 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  compat_ord ( c ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  type ( c )  is  int :  return  c 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    else :  return  ord ( c ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-06-06 14:35:08 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# This is not clearly defined otherwise  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								compiled_regex_type  =  type ( re . compile ( ' ' ) )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:02:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								std_headers  =  {  
						 
					
						
							
								
									
										
										
										
											2013-11-18 13:52:24 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    ' User-Agent ' :  ' Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome) ' , 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    ' Accept-Charset ' :  ' ISO-8859-1,utf-8;q=0.7,*;q=0.7 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ' Accept ' :  ' text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ' Accept-Encoding ' :  ' gzip, deflate ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ' Accept-Language ' :  ' en-us,en;q=0.5 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:02:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
									
										
										
										
											2012-12-30 18:22:36 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								def  preferredencoding ( ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ Get preferred encoding. 
 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    Returns  the  best  encoding  scheme  for  the  system ,  based  on 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    locale . getpreferredencoding ( )  and  some  further  tweaks . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    try : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        pref  =  locale . getpreferredencoding ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        u ' TEST ' . encode ( pref ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    except : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        pref  =  ' UTF-8 ' 
							 
						 
					
						
							
								
									
										
										
										
											2012-07-01 18:21:27 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    return  pref 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:46:21 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								if  sys . version_info  <  ( 3 , 0 ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  compat_print ( s ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        print ( s . encode ( preferredencoding ( ) ,  ' xmlcharrefreplace ' ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:46:21 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								else :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  compat_print ( s ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        assert  type ( s )  ==  type ( u ' ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        print ( s ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-12-20 13:13:24 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# In Python 2.x, json.dump expects a bytestream.  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# In Python 3.x, it writes to a character stream  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								if  sys . version_info  <  ( 3 , 0 ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  write_json_file ( obj ,  fn ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        with  open ( fn ,  ' wb ' )  as  f : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            json . dump ( obj ,  f ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								else :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  write_json_file ( obj ,  fn ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        with  open ( fn ,  ' w ' ,  encoding = ' utf-8 ' )  as  f : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            json . dump ( obj ,  f ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-07-11 16:12:08 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								if  sys . version_info  > =  ( 2 , 7 ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  find_xpath_attr ( node ,  xpath ,  key ,  val ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        """  Find the xpath xpath[@key=val]  """ 
							 
						 
					
						
							
								
									
										
										
										
											2013-07-11 16:16:02 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        assert  re . match ( r ' ^[a-zA-Z]+$ ' ,  key ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-08-29 19:16:07 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        assert  re . match ( r ' ^[a-zA-Z0-9@ \ s]*$ ' ,  val ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-07-11 16:12:08 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        expr  =  xpath  +  u " [@ %s = ' %s ' ] "  %  ( key ,  val ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  node . find ( expr ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								else :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  find_xpath_attr ( node ,  xpath ,  key ,  val ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  f  in  node . findall ( xpath ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  f . attrib . get ( key )  ==  val : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                return  f 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 21:34:04 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# On python2.6 the xml.etree.ElementTree.Element methods don't support  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# the namespace parameter  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  xpath_with_ns ( path ,  ns_map ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    components  =  [ c . split ( ' : ' )  for  c  in  path . split ( ' / ' ) ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    replaced  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    for  c  in  components : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  len ( c )  ==  1 : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            replaced . append ( c [ 0 ] ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ns ,  tag  =  c 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            replaced . append ( ' { %s } %s '  %  ( ns_map [ ns ] ,  tag ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  ' / ' . join ( replaced ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								def  htmlentity_transform ( matchobj ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ Transforms an HTML entity to a character. 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    This  function  receives  a  match  object  and  is  intended  to  be  used  with 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    the  re . sub ( )  function . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    entity  =  matchobj . group ( 1 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # Known non-numeric HTML entity 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  entity  in  compat_html_entities . name2codepoint : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  compat_chr ( compat_html_entities . name2codepoint [ entity ] ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    mobj  =  re . match ( u ' (?u)#(x? \\ d+) ' ,  entity ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  mobj  is  not  None : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        numstr  =  mobj . group ( 1 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  numstr . startswith ( u ' x ' ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            base  =  16 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            numstr  =  u ' 0 %s '  %  numstr 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            base  =  10 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  compat_chr ( int ( numstr ,  base ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # Unknown entity in name, return its literal representation 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  ( u ' & %s ; '  %  entity ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:06:28 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								compat_html_parser . locatestarttagend  =  re . compile ( r """ <[a-zA-Z][-.a-zA-Z0-9:_]*(?: \ s+(?:(?<=[ ' " \ s])[^ \ s/>][^ \ s/=>]*(?: \ s*=+ \ s*(?: ' [^ ' ]* ' | " [^ " ]* " |(?![ ' " ])[^> \ s]*))? \ s*)*)? \ s* """ ,  re . VERBOSE )  # backport bugfix  
						 
					
						
							
								
									
										
										
										
											2013-09-13 22:05:29 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  BaseHTMLParser ( compat_html_parser . HTMLParser ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  __init ( self ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        compat_html_parser . HTMLParser . __init__ ( self ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . html  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  loads ( self ,  html ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . html  =  html 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . feed ( html ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . close ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  AttrParser ( BaseHTMLParser ) :  
						 
					
						
							
								
									
										
										
										
											2012-12-19 14:21:14 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ Modified HTMLParser that isolates a tag with the specified attribute """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  __init__ ( self ,  attribute ,  value ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . attribute  =  attribute 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . value  =  value 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        self . result  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . started  =  False 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . depth  =  { } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . watch_startpos  =  False 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . error_count  =  0 
							 
						 
					
						
							
								
									
										
										
										
											2013-09-13 22:05:29 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        BaseHTMLParser . __init__ ( self ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  error ( self ,  message ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  self . error_count  >  10  or  self . started : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            raise  compat_html_parser . HTMLParseError ( message ,  self . getpos ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . rawdata  =  ' \n ' . join ( self . html . split ( ' \n ' ) [ self . getpos ( ) [ 0 ] : ] )  # skip one line 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . error_count  + =  1 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . goahead ( 1 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  handle_starttag ( self ,  tag ,  attrs ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        attrs  =  dict ( attrs ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  self . started : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            self . find_startpos ( None ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-12-19 14:21:14 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        if  self . attribute  in  attrs  and  attrs [ self . attribute ]  ==  self . value : 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            self . result  =  [ tag ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            self . started  =  True 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            self . watch_startpos  =  True 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  self . started : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  not  tag  in  self . depth :  self . depth [ tag ]  =  0 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            self . depth [ tag ]  + =  1 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  handle_endtag ( self ,  tag ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  self . started : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  tag  in  self . depth :  self . depth [ tag ]  - =  1 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  self . depth [ self . result [ 0 ] ]  ==  0 : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                self . started  =  False 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                self . result . append ( self . getpos ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  find_startpos ( self ,  x ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        """ Needed to put the start position of the result (self.result[1]) 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        after  the  opening  tag  with  the  requested  id """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  self . watch_startpos : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            self . watch_startpos  =  False 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            self . result . append ( self . getpos ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    handle_entityref  =  handle_charref  =  handle_data  =  handle_comment  =  \
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    handle_decl  =  handle_pi  =  unknown_decl  =  find_startpos 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  get_result ( self ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  self . result  is  None : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  len ( self . result )  !=  3 : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        lines  =  self . html . split ( ' \n ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        lines  =  lines [ self . result [ 1 ] [ 0 ] - 1 : self . result [ 2 ] [ 0 ] ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        lines [ 0 ]  =  lines [ 0 ] [ self . result [ 1 ] [ 1 ] : ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  len ( lines )  ==  1 : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            lines [ - 1 ]  =  lines [ - 1 ] [ : self . result [ 2 ] [ 1 ] - self . result [ 1 ] [ 1 ] ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        lines [ - 1 ]  =  lines [ - 1 ] [ : self . result [ 2 ] [ 1 ] ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  ' \n ' . join ( lines ) . strip ( ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-02-01 17:29:50 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# Hack for https://github.com/rg3/youtube-dl/issues/662  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								if  sys . version_info  <  ( 2 ,  7 ,  3 ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    AttrParser . parse_endtag  =  ( lambda  self ,  i : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        i  +  len ( " </scr ' + ' ipt> " ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  self . rawdata [ i : ] . startswith ( " </scr ' + ' ipt> " ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        else  compat_html_parser . HTMLParser . parse_endtag ( self ,  i ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-04-11 00:22:51 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  get_element_by_id ( id ,  html ) :  
						 
					
						
							
								
									
										
										
										
											2012-12-19 14:21:14 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ Return the content of the tag with the specified ID in the passed HTML document """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  get_element_by_attribute ( " id " ,  id ,  html ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  get_element_by_attribute ( attribute ,  value ,  html ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ Return the content of the tag with the specified attribute in the passed HTML document """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    parser  =  AttrParser ( attribute ,  value ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    try : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        parser . loads ( html ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    except  compat_html_parser . HTMLParseError : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        pass 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  parser . get_result ( ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-04-11 00:22:51 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-09-13 22:05:29 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  MetaParser ( BaseHTMLParser ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    Modified  HTMLParser  that  isolates  a  meta  tag  with  the  specified  name  
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    attribute . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  __init__ ( self ,  name ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        BaseHTMLParser . __init__ ( self ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . name  =  name 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . content  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . result  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  handle_starttag ( self ,  tag ,  attrs ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  tag  !=  ' meta ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        attrs  =  dict ( attrs ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  attrs . get ( ' name ' )  ==  self . name : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            self . result  =  attrs . get ( ' content ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  get_result ( self ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  self . result 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  get_meta_content ( name ,  html ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    Return  the  content  attribute  from  the  meta  tag  with  the  given  name  attribute . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    parser  =  MetaParser ( name ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    try : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        parser . loads ( html ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    except  compat_html_parser . HTMLParseError : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        pass 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  parser . get_result ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-04-11 00:22:51 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  clean_html ( html ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ Clean an HTML snippet into a readable string """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # Newline vs <br /> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    html  =  html . replace ( ' \n ' ,  '   ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-12-20 16:30:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    html  =  re . sub ( r ' \ s*< \ s*br \ s*/? \ s*> \ s* ' ,  ' \n ' ,  html ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    html  =  re . sub ( r ' < \ s*/ \ s*p \ s*> \ s*< \ s*p[^>]*> ' ,  ' \n ' ,  html ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    # Strip html tags 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    html  =  re . sub ( ' <.*?> ' ,  ' ' ,  html ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # Replace html entities 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    html  =  unescapeHTML ( html ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-03-29 15:59:13 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    return  html . strip ( ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-04-11 00:22:51 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								def  sanitize_open ( filename ,  open_mode ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ Try to open the given filename, and slightly tweak it if this fails. 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    Attempts  to  open  the  given  filename .  If  this  fails ,  it  tries  to  change 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    the  filename  slightly ,  step  by  step ,  until  it ' s either able to open it 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    or  it  fails  and  raises  a  final  exception ,  like  the  standard  open ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    function . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    It  returns  the  tuple  ( stream ,  definitive_file_name ) . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    try : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  filename  ==  u ' - ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  sys . platform  ==  ' win32 ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                import  msvcrt 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                msvcrt . setmode ( sys . stdout . fileno ( ) ,  os . O_BINARY ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-03-28 13:13:03 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            return  ( sys . stdout . buffer  if  hasattr ( sys . stdout ,  ' buffer ' )  else  sys . stdout ,  filename ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        stream  =  open ( encodeFilename ( filename ) ,  open_mode ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  ( stream ,  filename ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    except  ( IOError ,  OSError )  as  err : 
							 
						 
					
						
							
								
									
										
										
										
											2013-05-13 09:20:08 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        if  err . errno  in  ( errno . EACCES , ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            raise 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-05-13 09:20:08 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        # In case of error, try to remove win32 forbidden chars 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        alt_filename  =  os . path . join ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                        re . sub ( u ' [/<>: " \\ | \\ \\ ? \\ *] ' ,  u ' # ' ,  path_part ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                        for  path_part  in  os . path . split ( filename ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                       ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  alt_filename  ==  filename : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            raise 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            # An exception here should be caught in the caller 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            stream  =  open ( encodeFilename ( filename ) ,  open_mode ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  ( stream ,  alt_filename ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  timeconvert ( timestr ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ Convert RFC 2822 defined time string into system timestamp """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    timestamp  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    timetuple  =  email . utils . parsedate_tz ( timestr ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  timetuple  is  not  None : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        timestamp  =  email . utils . mktime_tz ( timetuple ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  timestamp 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-26 23:58:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-12-03 15:36:24 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  sanitize_filename ( s ,  restricted = False ,  is_id = False ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ Sanitizes a string so it could be used as part of a filename. 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    If  restricted  is  set ,  use  a  stricter  subset  of  allowed  characters . 
							 
						 
					
						
							
								
									
										
										
										
											2012-12-03 15:36:24 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    Set  is_id  if  this  is  not  an  arbitrary  string ,  but  an  ID  that  should  be  kept  if  possible 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  replace_insane ( char ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  char  ==  ' ? '  or  ord ( char )  <  32  or  ord ( char )  ==  127 : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  ' ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        elif  char  ==  ' " ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  ' '  if  restricted  else  ' \' ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        elif  char  ==  ' : ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  ' _- '  if  restricted  else  '  - ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        elif  char  in  ' \\ /|*<> ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  ' _ ' 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 12:59:27 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        if  restricted  and  ( char  in  ' !& \' ()[] {} $;`^,# '  or  char . isspace ( ) ) : 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            return  ' _ ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  restricted  and  ord ( char )  >  127 : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  ' _ ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  char 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    result  =  u ' ' . join ( map ( replace_insane ,  s ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-12-03 15:36:24 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    if  not  is_id : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        while  ' __ '  in  result : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            result  =  result . replace ( ' __ ' ,  ' _ ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        result  =  result . strip ( ' _ ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # Common case of "Foreign band name - English song title" 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  restricted  and  result . startswith ( ' -_ ' ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            result  =  result [ 2 : ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  not  result : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            result  =  ' _ ' 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    return  result 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  orderedSet ( iterable ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """  Remove all duplicates from the input iterable  """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    res  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    for  el  in  iterable : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  el  not  in  res : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            res . append ( el ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  res 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  unescapeHTML ( s ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    @param  s  a  string 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  type ( s )  ==  type ( u ' ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    result  =  re . sub ( u ' (?u)&(.+?); ' ,  htmlentity_transform ,  s ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  result 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  encodeFilename ( s ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    @param  s  The  name  of  the  file 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  type ( s )  ==  type ( u ' ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    # Python 3 has a Unicode API 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  sys . version_info  > =  ( 3 ,  0 ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  s 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 00:56:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    if  sys . platform  ==  ' win32 '  and  sys . getwindowsversion ( ) [ 0 ]  > =  5 : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # Pass u'' directly to use Unicode APIs on Windows 2000 and up 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # (Detecting Windows NT 4 is tricky because 'major >= 4' would 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # match Windows 9x series as well. Besides, NT 4 is obsolete.) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  s 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    else : 
							 
						 
					
						
							
								
									
										
										
										
											2013-01-20 01:48:05 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        encoding  =  sys . getfilesystemencoding ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  encoding  is  None : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            encoding  =  ' utf-8 ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  s . encode ( encoding ,  ' ignore ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-02-21 17:09:39 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  decodeOption ( optval ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  optval  is  None : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  optval 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  isinstance ( optval ,  bytes ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        optval  =  optval . decode ( preferredencoding ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( optval ,  compat_str ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  optval 
							 
						 
					
						
							
								
									
										
										
										
											2013-01-01 20:27:53 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-05-04 12:02:18 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  formatSeconds ( secs ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  secs  >  3600 : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  ' %d : %02d : %02d '  %  ( secs  / /  3600 ,  ( secs  %  3600 )  / /  60 ,  secs  %  60 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    elif  secs  >  60 : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  ' %d : %02d '  %  ( secs  / /  60 ,  secs  %  60 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  ' %d '  %  secs 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-05-04 12:19:02 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  make_HTTPS_handler ( opts ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  sys . version_info  <  ( 3 , 2 ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # Python's 2.x handler is very simplistic 
							 
						 
					
						
							
								
									
										
										
										
											2013-08-27 23:15:01 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        return  compat_urllib_request . HTTPSHandler ( ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-05-04 12:19:02 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        import  ssl 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        context  =  ssl . SSLContext ( ssl . PROTOCOL_SSLv23 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        context . set_default_verify_paths ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        context . verify_mode  =  ( ssl . CERT_NONE 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                               if  opts . no_check_certificate 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                               else  ssl . CERT_REQUIRED ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-08-27 23:15:01 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        return  compat_urllib_request . HTTPSHandler ( context = context ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-05-04 12:19:02 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-01-01 20:27:53 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  ExtractorError ( Exception ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ Error during info extraction. """ 
							 
						 
					
						
							
								
									
										
										
										
											2013-08-28 04:25:38 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  __init__ ( self ,  msg ,  tb = None ,  expected = False ,  cause = None ) : 
							 
						 
					
						
							
								
									
										
										
										
											2013-07-02 08:40:21 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        """  tb, if given, is the original traceback (so that it can be printed out). 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        If  expected  is  set ,  this  is  a  normal  error  message  and  most  likely  not  a  bug  in  youtube - dl . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  sys . exc_info ( ) [ 0 ]  in  ( compat_urllib_error . URLError ,  socket . timeout ,  UnavailableVideoError ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            expected  =  True 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  not  expected : 
							 
						 
					
						
							
								
									
										
										
										
											2013-08-11 06:46:24 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            msg  =  msg  +  u ' ; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update. ' 
							 
						 
					
						
							
								
									
										
										
										
											2013-01-01 20:27:53 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        super ( ExtractorError ,  self ) . __init__ ( msg ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-09 11:55:08 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-01-01 20:27:53 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        self . traceback  =  tb 
							 
						 
					
						
							
								
									
										
										
										
											2013-03-09 10:05:43 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        self . exc_info  =  sys . exc_info ( )   # preserve original exception 
							 
						 
					
						
							
								
									
										
										
										
											2013-08-28 04:25:38 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        self . cause  =  cause 
							 
						 
					
						
							
								
									
										
										
										
											2013-01-01 20:27:53 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-01-03 15:39:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  format_traceback ( self ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  self . traceback  is  None : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  u ' ' . join ( traceback . format_tb ( self . traceback ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-01-01 20:27:53 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-10-23 14:38:03 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  RegexNotFoundError ( ExtractorError ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ Error when a regex didn ' t match """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    pass 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								class  DownloadError ( Exception ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ Download Error exception. 
 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    This  exception  may  be  thrown  by  FileDownloader  objects  if  they  are  not 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    configured  to  continue  on  errors .  They  will  contain  the  appropriate 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    error  message . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
									
										
										
										
											2013-03-09 10:05:43 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  __init__ ( self ,  msg ,  exc_info = None ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        """  exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()).  """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        super ( DownloadError ,  self ) . __init__ ( msg ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . exc_info  =  exc_info 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  SameFileError ( Exception ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ Same File exception. 
 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    This  exception  will  be  thrown  by  FileDownloader  objects  if  they  detect 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    multiple  files  would  have  to  be  downloaded  to  the  same  file  on  disk . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    pass 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  PostProcessingError ( Exception ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ Post Processing exception. 
 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    This  exception  may  be  raised  by  PostProcessor ' s .run() method to 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    indicate  an  error  in  the  postprocessing  task . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
									
										
										
										
											2013-01-12 15:07:59 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  __init__ ( self ,  msg ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . msg  =  msg 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  MaxDownloadsReached ( Exception ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """  --max-downloads limit has been reached.  """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    pass 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  UnavailableVideoError ( Exception ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ Unavailable Format exception. 
 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    This  exception  will  be  thrown  when  a  video  is  requested 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    in  a  format  that  is  not  available  for  that  video . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    pass 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  ContentTooShortError ( Exception ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ Content Too Short exception. 
 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    This  exception  may  be  raised  by  FileDownloader  objects  when  a  file  they 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    download  is  too  small  for  what  the  server  announced  first ,  indicating 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    the  connection  was  probably  interrupted . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # Both in bytes 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    downloaded  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    expected  =  None 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  __init__ ( self ,  downloaded ,  expected ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . downloaded  =  downloaded 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . expected  =  expected 
							 
						 
					
						
							
								
									
										
										
										
											2012-03-25 03:07:37 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-08-27 23:15:01 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  YoutubeDLHandler ( compat_urllib_request . HTTPHandler ) :  
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ Handler for HTTP requests and responses. 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    This  class ,  when  installed  with  an  OpenerDirector ,  automatically  adds 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    the  standard  headers  to  every  HTTP  request  and  handles  gzipped  and 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    deflated  responses  from  web  servers .  If  compression  is  to  be  avoided  in 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    a  particular  request ,  the  original  request  in  the  program  code  only  has 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    to  include  the  HTTP  header  " Youtubedl-No-Compression " ,  which  will  be 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    removed  before  making  the  real  request . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    Part  of  this  code  was  copied  from : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    http : / / techknack . net / python - urllib2 - handlers / 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    Andrew  Rowls ,  the  author  of  that  code ,  agreed  to  release  it  to  the 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    public  domain . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    @staticmethod 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  deflate ( data ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        try : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  zlib . decompress ( data ,  - zlib . MAX_WBITS ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        except  zlib . error : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  zlib . decompress ( data ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    @staticmethod 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  addinfourl_wrapper ( stream ,  headers ,  url ,  code ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  hasattr ( compat_urllib_request . addinfourl ,  ' getcode ' ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return  compat_urllib_request . addinfourl ( stream ,  headers ,  url ,  code ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ret  =  compat_urllib_request . addinfourl ( stream ,  headers ,  url ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ret . code  =  code 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  ret 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-08-27 23:15:01 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  http_request ( self ,  req ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  h , v  in  std_headers . items ( ) : 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            if  h  in  req . headers : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                del  req . headers [ h ] 
							 
						 
					
						
							
								
									
										
										
										
											2013-01-12 18:38:23 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            req . add_header ( h ,  v ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        if  ' Youtubedl-no-compression '  in  req . headers : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  ' Accept-encoding '  in  req . headers : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                del  req . headers [ ' Accept-encoding ' ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            del  req . headers [ ' Youtubedl-no-compression ' ] 
							 
						 
					
						
							
								
									
										
										
										
											2013-01-12 16:49:13 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        if  ' Youtubedl-user-agent '  in  req . headers : 
							 
						 
					
						
							
								
									
										
										
										
											2013-01-12 18:38:23 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            if  ' User-agent '  in  req . headers : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                del  req . headers [ ' User-agent ' ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            req . headers [ ' User-agent ' ]  =  req . headers [ ' Youtubedl-user-agent ' ] 
							 
						 
					
						
							
								
									
										
										
										
											2013-01-12 16:49:13 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            del  req . headers [ ' Youtubedl-user-agent ' ] 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        return  req 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-08-27 23:15:01 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  http_response ( self ,  req ,  resp ) : 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        old_resp  =  resp 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # gzip 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  resp . headers . get ( ' Content-encoding ' ,  ' ' )  ==  ' gzip ' : 
							 
						 
					
						
							
								
									
										
										
										
											2013-08-28 11:57:13 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            content  =  resp . read ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            gz  =  gzip . GzipFile ( fileobj = io . BytesIO ( content ) ,  mode = ' rb ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            try : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                uncompressed  =  io . BytesIO ( gz . read ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            except  IOError  as  original_ioerror : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                # There may be junk add the end of the file 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                # See http://stackoverflow.com/q/4928560/35070 for details 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                for  i  in  range ( 1 ,  1024 ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    try : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                        gz  =  gzip . GzipFile ( fileobj = io . BytesIO ( content [ : - i ] ) ,  mode = ' rb ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                        uncompressed  =  io . BytesIO ( gz . read ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    except  IOError : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                        continue 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    break 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    raise  original_ioerror 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            resp  =  self . addinfourl_wrapper ( uncompressed ,  old_resp . headers ,  old_resp . url ,  old_resp . code ) 
							 
						 
					
						
							
								
									
										
										
										
											2012-11-28 02:04:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            resp . msg  =  old_resp . msg 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # deflate 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  resp . headers . get ( ' Content-encoding ' ,  ' ' )  ==  ' deflate ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            gz  =  io . BytesIO ( self . deflate ( resp . read ( ) ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            resp  =  self . addinfourl_wrapper ( gz ,  old_resp . headers ,  old_resp . url ,  old_resp . code ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            resp . msg  =  old_resp . msg 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  resp 
							 
						 
					
						
							
								
									
										
										
										
											2012-12-07 00:39:44 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-08-27 23:15:01 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    https_request  =  http_request 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    https_response  =  http_response 
							 
						 
					
						
							
								
									
										
										
										
											2013-04-27 15:14:20 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  unified_strdate ( date_str ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ Return a string with the date in the format YYYYMMDD """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    upload_date  =  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    #Replace commas 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    date_str  =  date_str . replace ( ' , ' , '   ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # %z (UTC offset) is only supported in python>=3.2 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    date_str  =  re . sub ( r '  ( \ +|-)[ \ d]*$ ' ,  ' ' ,  date_str ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-09-14 14:26:42 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    format_expressions  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' %d   % B  % Y ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' % B  %d   % Y ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' % b  %d   % Y ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' % Y- % m- %d ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' %d / % m/ % Y ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' % Y/ % m/ %d   % H: % M: % S ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' %d . % m. % Y  % H: % M ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' % Y- % m- %d T % H: % M: % SZ ' , 
							 
						 
					
						
							
								
									
										
										
										
											2013-11-20 06:13:19 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' % Y- % m- %d T % H: % M: % S. %f Z ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' % Y- % m- %d T % H: % M: % S. %f 0Z ' , 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-10 15:25:11 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' % Y- % m- %d T % H: % M: % S ' , 
							 
						 
					
						
							
								
									
										
										
										
											2013-09-14 14:26:42 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    ] 
							 
						 
					
						
							
								
									
										
										
										
											2013-04-27 15:14:20 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    for  expression  in  format_expressions : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        try : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            upload_date  =  datetime . datetime . strptime ( date_str ,  expression ) . strftime ( ' % Y % m %d ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        except : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            pass 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  upload_date 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-07-12 21:52:59 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  determine_ext ( url ,  default_ext = u ' unknown_video ' ) :  
						 
					
						
							
								
									
										
										
										
											2013-07-08 01:13:55 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    guess  =  url . partition ( u ' ? ' ) [ 0 ] . rpartition ( u ' . ' ) [ 2 ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  re . match ( r ' ^[A-Za-z0-9]+$ ' ,  guess ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  guess 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    else : 
							 
						 
					
						
							
								
									
										
										
										
											2013-07-12 21:52:59 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        return  default_ext 
							 
						 
					
						
							
								
									
										
										
										
											2013-07-08 01:13:55 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-07-20 12:48:57 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  subtitles_filename ( filename ,  sub_lang ,  sub_format ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  filename . rsplit ( ' . ' ,  1 ) [ 0 ]  +  u ' . '  +  sub_lang  +  u ' . '  +  sub_format 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-04-27 14:01:55 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  date_from_str ( date_str ) :  
						 
					
						
							
								
									
										
										
										
											2013-04-28 11:39:37 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    Return  a  datetime  object  from  a  string  in  the  format  YYYYMMDD  or 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ( now | today ) [ + - ] [ 0 - 9 ] ( day | week | month | year ) ( s ) ? """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    today  =  datetime . date . today ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  date_str  ==  ' now ' or  date_str  ==  ' today ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  today 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    match  =  re . match ( ' (now|today)(?P<sign>[+-])(?P<time> \ d+)(?P<unit>day|week|month|year)(s)? ' ,  date_str ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  match  is  not  None : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        sign  =  match . group ( ' sign ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        time  =  int ( match . group ( ' time ' ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  sign  ==  ' - ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            time  =  - time 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        unit  =  match . group ( ' unit ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        #A bad aproximation? 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  unit  ==  ' month ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            unit  =  ' day ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            time  * =  30 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        elif  unit  ==  ' year ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            unit  =  ' day ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            time  * =  365 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        unit  + =  ' s ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        delta  =  datetime . timedelta ( * * { unit :  time } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  today  +  delta 
							 
						 
					
						
							
								
									
										
										
										
											2013-04-27 14:01:55 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    return  datetime . datetime . strptime ( date_str ,  " % Y % m %d " ) . date ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  DateRange ( object ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """ Represents a time interval between two dates """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  __init__ ( self ,  start = None ,  end = None ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        """ start and end must be strings in the format accepted by date """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  start  is  not  None : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            self . start  =  date_from_str ( start ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            self . start  =  datetime . datetime . min . date ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  end  is  not  None : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            self . end  =  date_from_str ( end ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            self . end  =  datetime . datetime . max . date ( ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-04-28 11:39:37 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        if  self . start  >  self . end : 
							 
						 
					
						
							
								
									
										
										
										
											2013-04-27 14:01:55 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            raise  ValueError ( ' Date range:  " %s "  , the start date must be before the end date '  %  self ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    @classmethod 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  day ( cls ,  day ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        """ Returns a range that only contains the given day """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  cls ( day , day ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  __contains__ ( self ,  date ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        """ Check if the date is in the range """ 
							 
						 
					
						
							
								
									
										
										
										
											2013-04-28 11:39:37 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        if  not  isinstance ( date ,  datetime . date ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            date  =  date_from_str ( date ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  self . start  < =  date  < =  self . end 
							 
						 
					
						
							
								
									
										
										
										
											2013-04-27 14:01:55 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  __str__ ( self ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  ' %s  -  %s '  %  (  self . start . isoformat ( ) ,  self . end . isoformat ( ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-08-28 12:57:10 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  platform_name ( ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """  Returns the platform name as a compat_str  """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    res  =  platform . platform ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  isinstance ( res ,  bytes ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        res  =  res . decode ( preferredencoding ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( res ,  compat_str ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  res 
							 
						 
					
						
							
								
									
										
										
										
											2013-08-28 18:22:28 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-09-16 06:55:33 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  write_string ( s ,  out = None ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  out  is  None : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        out  =  sys . stderr 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  type ( s )  ==  type ( u ' ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  ( ' b '  in  getattr ( out ,  ' mode ' ,  ' ' )  or 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            sys . version_info [ 0 ]  <  3 ) :   # Python 2 lies about mode of sys.stderr 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        s  =  s . encode ( preferredencoding ( ) ,  ' ignore ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    out . write ( s ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    out . flush ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-08-28 14:28:55 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  bytes_to_intlist ( bs ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  not  bs : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  isinstance ( bs [ 0 ] ,  int ) :   # Python 3 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  list ( bs ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  [ ord ( c )  for  c  in  bs ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-08-28 18:22:28 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-08-28 15:59:07 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  intlist_to_bytes ( xs ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  not  xs : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  b ' ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  isinstance ( chr ( 0 ) ,  bytes ) :   # Python 2 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  ' ' . join ( [ chr ( x )  for  x  in  xs ] ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  bytes ( xs ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-02 08:41:03 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  get_cachedir ( params = { } ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    cache_root  =  os . environ . get ( ' XDG_CACHE_HOME ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                                os . path . expanduser ( ' ~/.cache ' ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  params . get ( ' cachedir ' ,  os . path . join ( cache_root ,  ' youtube-dl ' ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-06 04:27:09 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# Cross-platform file locking  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								if  sys . platform  ==  ' win32 ' :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    import  ctypes . wintypes 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    import  msvcrt 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    class  OVERLAPPED ( ctypes . Structure ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        _fields_  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ( ' Internal ' ,  ctypes . wintypes . LPVOID ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ( ' InternalHigh ' ,  ctypes . wintypes . LPVOID ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ( ' Offset ' ,  ctypes . wintypes . DWORD ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ( ' OffsetHigh ' ,  ctypes . wintypes . DWORD ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ( ' hEvent ' ,  ctypes . wintypes . HANDLE ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    kernel32  =  ctypes . windll . kernel32 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    LockFileEx  =  kernel32 . LockFileEx 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    LockFileEx . argtypes  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ctypes . wintypes . HANDLE ,      # hFile 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ctypes . wintypes . DWORD ,       # dwFlags 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ctypes . wintypes . DWORD ,       # dwReserved 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ctypes . wintypes . DWORD ,       # nNumberOfBytesToLockLow 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ctypes . wintypes . DWORD ,       # nNumberOfBytesToLockHigh 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ctypes . POINTER ( OVERLAPPED )   # Overlapped 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    LockFileEx . restype  =  ctypes . wintypes . BOOL 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    UnlockFileEx  =  kernel32 . UnlockFileEx 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    UnlockFileEx . argtypes  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ctypes . wintypes . HANDLE ,      # hFile 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ctypes . wintypes . DWORD ,       # dwReserved 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ctypes . wintypes . DWORD ,       # nNumberOfBytesToLockLow 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ctypes . wintypes . DWORD ,       # nNumberOfBytesToLockHigh 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ctypes . POINTER ( OVERLAPPED )   # Overlapped 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    UnlockFileEx . restype  =  ctypes . wintypes . BOOL 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    whole_low  =  0xffffffff 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    whole_high  =  0x7fffffff 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _lock_file ( f ,  exclusive ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        overlapped  =  OVERLAPPED ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        overlapped . Offset  =  0 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        overlapped . OffsetHigh  =  0 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        overlapped . hEvent  =  0 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        f . _lock_file_overlapped_p  =  ctypes . pointer ( overlapped ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        handle  =  msvcrt . get_osfhandle ( f . fileno ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  not  LockFileEx ( handle ,  0x2  if  exclusive  else  0x0 ,  0 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                          whole_low ,  whole_high ,  f . _lock_file_overlapped_p ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            raise  OSError ( ' Locking file failed:  %r '  %  ctypes . FormatError ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _unlock_file ( f ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        assert  f . _lock_file_overlapped_p 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        handle  =  msvcrt . get_osfhandle ( f . fileno ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  not  UnlockFileEx ( handle ,  0 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                            whole_low ,  whole_high ,  f . _lock_file_overlapped_p ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            raise  OSError ( ' Unlocking file failed:  %r '  %  ctypes . FormatError ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								else :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    import  fcntl 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _lock_file ( f ,  exclusive ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        fcntl . lockf ( f ,  fcntl . LOCK_EX  if  exclusive  else  fcntl . LOCK_SH ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _unlock_file ( f ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        fcntl . lockf ( f ,  fcntl . LOCK_UN ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  locked_file ( object ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  __init__ ( self ,  filename ,  mode ,  encoding = None ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        assert  mode  in  [ ' r ' ,  ' a ' ,  ' w ' ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . f  =  io . open ( filename ,  mode ,  encoding = encoding ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        self . mode  =  mode 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  __enter__ ( self ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        exclusive  =  self . mode  !=  ' r ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        try : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            _lock_file ( self . f ,  exclusive ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        except  IOError : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            self . f . close ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            raise 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  self 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  __exit__ ( self ,  etype ,  value ,  traceback ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        try : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            _unlock_file ( self . f ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        finally : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            self . f . close ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  __iter__ ( self ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  iter ( self . f ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  write ( self ,  * args ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  self . f . write ( * args ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  read ( self ,  * args ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  self . f . read ( * args ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-12 13:49:27 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  shell_quote ( args ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  '   ' . join ( map ( pipes . quote ,  args ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-10-15 12:05:13 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-10-18 00:46:35 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  takewhile_inclusive ( pred ,  seq ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """  Like itertools.takewhile, but include the latest evaluated element 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ( the  first  element  so  that  Not  pred ( e ) )  """ 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    for  e  in  seq : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        yield  e 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  not  pred ( e ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            return 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-10-15 12:05:13 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  smuggle_url ( url ,  data ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    """  Pass additional data in a URL for internal use.  """ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    sdata  =  compat_urllib_parse . urlencode ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        { u ' __youtubedl_smuggle ' :  json . dumps ( data ) } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  url  +  u ' # '  +  sdata 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  unsmuggle_url ( smug_url ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  not  ' #__youtubedl_smuggle '  in  smug_url : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  smug_url ,  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    url ,  _ ,  sdata  =  smug_url . rpartition ( u ' # ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    jsond  =  compat_parse_qs ( sdata ) [ u ' __youtubedl_smuggle ' ] [ 0 ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    data  =  json . loads ( jsond ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  url ,  data