[dhm] Simplify
This commit is contained in:
		
							parent
							
								
									79fd11ab8e
								
							
						
					
					
						commit
						af8c93086c
					
				| @ -1,53 +1,64 @@ | |||||||
| # coding: utf-8 |  | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| 
 | from ..utils import ( | ||||||
| import urllib2 |     xpath_text, | ||||||
| import xml.etree.ElementTree as ET |     parse_duration, | ||||||
| import re | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class DHMIE(InfoExtractor): | class DHMIE(InfoExtractor): | ||||||
|     IE_DESC = 'Deutsches Historisches Museum' |     IE_DESC = 'Filmarchiv - Deutsches Historisches Museum' | ||||||
|     _VALID_URL = r'http://www\.dhm\.de/filmarchiv/(?P<id>.*?)' |     _VALID_URL = r'http://www\.dhm\.de/filmarchiv/die-filme/(?P<id>[^/]+)' | ||||||
| 
 | 
 | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/', |         'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/', | ||||||
|         'md5': '11c475f670209bf6acca0b2b7ef51827', |         'md5': '11c475f670209bf6acca0b2b7ef51827', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 'marshallwg', |             'id': 'the-marshallplan-at-work-in-west-germany', | ||||||
|             'ext': 'flv', |             'ext': 'flv', | ||||||
|             'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE', |             'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE', | ||||||
|             'thumbnail': 'http://www.dhm.de/filmarchiv/video/mpworkwg.jpg', |             'description': 'md5:1fabd480c153f97b07add61c44407c82', | ||||||
|  |             'duration': 660, | ||||||
|  |             'thumbnail': 're:^https?://.*\.jpg$', | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = '' |         video_id = self._match_id(url) | ||||||
|  | 
 | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
| 
 | 
 | ||||||
|         title = self._html_search_regex( |         playlist_url = self._search_regex( | ||||||
|             r'dc:title=\"(.*?)\"', webpage, 'title') |             r"file\s*:\s*'([^']+)'", webpage, 'playlist url') | ||||||
| 
 | 
 | ||||||
|         playlist_url = self._html_search_regex( |         playlist = self._download_xml(playlist_url, video_id) | ||||||
|             r'file: \'(.*?)\'', webpage, 'playlist URL') |  | ||||||
| 
 | 
 | ||||||
|         xml_file = urllib2.urlopen(playlist_url) |         track = playlist.find( | ||||||
|         data = xml_file.read() |             './{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track') | ||||||
|         xml_file.close() |  | ||||||
| 
 | 
 | ||||||
|         root = ET.fromstring(data) |         video_url = xpath_text( | ||||||
|         video_url = root[0][0][0].text |             track, './{http://xspf.org/ns/0/}location', | ||||||
|         thumbnail = root[0][0][2].text |             'video url', fatal=True) | ||||||
|  |         thumbnail = xpath_text( | ||||||
|  |             track, './{http://xspf.org/ns/0/}image', | ||||||
|  |             'thumbnail') | ||||||
| 
 | 
 | ||||||
|         m = re.search('video/(.+?).flv', video_url) |         title = self._search_regex( | ||||||
|         if m: |             [r'dc:title="([^"]+)"', r'<title> »([^<]+)</title>'], | ||||||
|             video_id = m.group(1) |             webpage, 'title').strip() | ||||||
|  |         description = self._html_search_regex( | ||||||
|  |             r'<p><strong>Description:</strong>(.+?)</p>', | ||||||
|  |             webpage, 'description', fatal=False) | ||||||
|  |         duration = parse_duration(self._search_regex( | ||||||
|  |             r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)', | ||||||
|  |             webpage, 'duration', fatal=False)) | ||||||
| 
 | 
 | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': title, |  | ||||||
|             'url': video_url, |             'url': video_url, | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |             'duration': duration, | ||||||
|             'thumbnail': thumbnail, |             'thumbnail': thumbnail, | ||||||
|         } |         } | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user