[collegehumor] Encode the xml before calling xml.etree.ElementTree.fromstring (fixes #1822)
Uses a new helper method in InfoExtractor: _download_xml
This commit is contained in:
		
							parent
							
								
									f459d17018
								
							
						
					
					
						commit
						267ed0c5d3
					
				| @ -1,5 +1,4 @@ | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
| @ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor): | ||||
| 
 | ||||
|         self.report_extraction(video_id) | ||||
|         xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id | ||||
|         metaXml = self._download_webpage(xmlUrl, video_id, | ||||
|         mdoc = self._download_xml(xmlUrl, video_id, | ||||
|                                          u'Downloading info XML', | ||||
|                                          u'Unable to download video info XML') | ||||
| 
 | ||||
|         mdoc = xml.etree.ElementTree.fromstring(metaXml) | ||||
|         try: | ||||
|             videoNode = mdoc.findall('./video')[0] | ||||
|             youtubeIdNode = videoNode.find('./youtubeID') | ||||
| @ -65,11 +63,10 @@ class CollegeHumorIE(InfoExtractor): | ||||
| 
 | ||||
|         if next_url.endswith(u'manifest.f4m'): | ||||
|             manifest_url = next_url + '?hdcore=2.10.3' | ||||
|             manifestXml = self._download_webpage(manifest_url, video_id, | ||||
|             adoc = self._download_xml(manifest_url, video_id, | ||||
|                                          u'Downloading XML manifest', | ||||
|                                          u'Unable to download video info XML') | ||||
| 
 | ||||
|             adoc = xml.etree.ElementTree.fromstring(manifestXml) | ||||
|             try: | ||||
|                 video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text | ||||
|             except IndexError: | ||||
|  | ||||
| @ -4,6 +4,7 @@ import re | ||||
| import socket | ||||
| import sys | ||||
| import netrc | ||||
| import xml.etree.ElementTree | ||||
| 
 | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
| @ -208,6 +209,11 @@ class InfoExtractor(object): | ||||
|         """ Returns the data of the page as a string """ | ||||
|         return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0] | ||||
| 
 | ||||
|     def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'): | ||||
|         """Return the xml as an xml.etree.ElementTree.Element""" | ||||
|         xml_string = self._download_webpage(url_or_request, video_id, note, errnote) | ||||
|         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) | ||||
| 
 | ||||
|     def to_screen(self, msg): | ||||
|         """Print msg to screen, prefixing it with '[ie_name]'""" | ||||
|         self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg)) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user