some HTMLParser bugfixes
This commit is contained in:
		
							parent
							
								
									9e6dd23876
								
							
						
					
					
						commit
						9beb5af82e
					
				
							
								
								
									
										
											BIN
										
									
								
								youtube-dl
									
									
									
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								youtube-dl
									
									
									
									
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								youtube-dl.exe
									
									
									
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								youtube-dl.exe
									
									
									
									
									
								
							
										
											Binary file not shown.
										
									
								
							| @ -359,8 +359,8 @@ class YoutubeIE(InfoExtractor): | ||||
| 					pass | ||||
| 
 | ||||
| 		# description | ||||
| 		video_description = get_element_by_id("eow-description", video_webpage) | ||||
| 		if video_description: video_description = clean_html(video_description.decode('utf8')) | ||||
| 		video_description = get_element_by_id("eow-description", video_webpage.decode('utf8')) | ||||
| 		if video_description: video_description = clean_html(video_description) | ||||
| 		else: video_description = '' | ||||
| 			 | ||||
| 		# closed captions | ||||
| @ -1055,8 +1055,8 @@ class VimeoIE(InfoExtractor): | ||||
| 		video_thumbnail = config["video"]["thumbnail"] | ||||
| 
 | ||||
| 		# Extract video description | ||||
| 		video_description = get_element_by_id("description", webpage) | ||||
| 		if video_description: video_description = clean_html(video_description.decode('utf8')) | ||||
| 		video_description = get_element_by_id("description", webpage.decode('utf8')) | ||||
| 		if video_description: video_description = clean_html(video_description) | ||||
| 		else: video_description = '' | ||||
| 
 | ||||
| 		# Extract upload date | ||||
|  | ||||
| @ -73,7 +73,7 @@ def htmlentity_transform(matchobj): | ||||
| 	# Unknown entity in name, return its literal representation | ||||
| 	return (u'&%s;' % entity) | ||||
| 
 | ||||
| 
 | ||||
| HTMLParser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix | ||||
| class IDParser(HTMLParser.HTMLParser): | ||||
| 	"""Modified HTMLParser that isolates a tag with the specified id""" | ||||
| 	def __init__(self, id): | ||||
| @ -83,8 +83,17 @@ class IDParser(HTMLParser.HTMLParser): | ||||
| 		self.depth = {} | ||||
| 		self.html = None | ||||
| 		self.watch_startpos = False | ||||
| 		self.error_count = 0 | ||||
| 		HTMLParser.HTMLParser.__init__(self) | ||||
| 
 | ||||
| 	def error(self, message): | ||||
| 		print self.getpos() | ||||
| 		if self.error_count > 10 or self.started: | ||||
| 			raise HTMLParser.HTMLParseError(message, self.getpos()) | ||||
| 		self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line | ||||
| 		self.error_count += 1 | ||||
| 		self.goahead(1) | ||||
| 
 | ||||
| 	def loads(self, html): | ||||
| 		self.html = html | ||||
| 		self.feed(html) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user