[collegehumor] Encode the xml before calling xml.etree.ElementTree.fromstring (fixes #1822)
Uses a new helper method in InfoExtractor: _download_xml
This commit is contained in:
		
							parent
							
								
									f459d17018
								
							
						
					
					
						commit
						267ed0c5d3
					
				| @ -1,5 +1,4 @@ | |||||||
| import re | import re | ||||||
| import xml.etree.ElementTree |  | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
| @ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor): | |||||||
| 
 | 
 | ||||||
|         self.report_extraction(video_id) |         self.report_extraction(video_id) | ||||||
|         xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id |         xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id | ||||||
|         metaXml = self._download_webpage(xmlUrl, video_id, |         mdoc = self._download_xml(xmlUrl, video_id, | ||||||
|                                          u'Downloading info XML', |                                          u'Downloading info XML', | ||||||
|                                          u'Unable to download video info XML') |                                          u'Unable to download video info XML') | ||||||
| 
 | 
 | ||||||
|         mdoc = xml.etree.ElementTree.fromstring(metaXml) |  | ||||||
|         try: |         try: | ||||||
|             videoNode = mdoc.findall('./video')[0] |             videoNode = mdoc.findall('./video')[0] | ||||||
|             youtubeIdNode = videoNode.find('./youtubeID') |             youtubeIdNode = videoNode.find('./youtubeID') | ||||||
| @ -65,11 +63,10 @@ class CollegeHumorIE(InfoExtractor): | |||||||
| 
 | 
 | ||||||
|         if next_url.endswith(u'manifest.f4m'): |         if next_url.endswith(u'manifest.f4m'): | ||||||
|             manifest_url = next_url + '?hdcore=2.10.3' |             manifest_url = next_url + '?hdcore=2.10.3' | ||||||
|             manifestXml = self._download_webpage(manifest_url, video_id, |             adoc = self._download_xml(manifest_url, video_id, | ||||||
|                                          u'Downloading XML manifest', |                                          u'Downloading XML manifest', | ||||||
|                                          u'Unable to download video info XML') |                                          u'Unable to download video info XML') | ||||||
| 
 | 
 | ||||||
|             adoc = xml.etree.ElementTree.fromstring(manifestXml) |  | ||||||
|             try: |             try: | ||||||
|                 video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text |                 video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text | ||||||
|             except IndexError: |             except IndexError: | ||||||
|  | |||||||
| @ -4,6 +4,7 @@ import re | |||||||
| import socket | import socket | ||||||
| import sys | import sys | ||||||
| import netrc | import netrc | ||||||
|  | import xml.etree.ElementTree | ||||||
| 
 | 
 | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     compat_http_client, |     compat_http_client, | ||||||
| @ -208,6 +209,11 @@ class InfoExtractor(object): | |||||||
|         """ Returns the data of the page as a string """ |         """ Returns the data of the page as a string """ | ||||||
|         return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0] |         return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0] | ||||||
| 
 | 
 | ||||||
|  |     def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'): | ||||||
|  |         """Return the xml as an xml.etree.ElementTree.Element""" | ||||||
|  |         xml_string = self._download_webpage(url_or_request, video_id, note, errnote) | ||||||
|  |         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) | ||||||
|  | 
 | ||||||
|     def to_screen(self, msg): |     def to_screen(self, msg): | ||||||
|         """Print msg to screen, prefixing it with '[ie_name]'""" |         """Print msg to screen, prefixing it with '[ie_name]'""" | ||||||
|         self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg)) |         self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg)) | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user