| 
									
										
										
										
											2014-04-21 06:25:21 +02:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-10 18:40:50 +01:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-16 05:44:34 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-10 18:40:50 +01:00
										 |  |  | class MDRIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2014-04-21 06:25:21 +02:00
										 |  |  |     _VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)' | 
					
						
							| 
									
										
										
										
											2014-11-23 20:41:03 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-20 17:23:59 +01:00
										 |  |  |     # No tests, MDR regularily deletes its videos | 
					
						
							| 
									
										
										
										
											2014-04-21 06:25:21 +02:00
										 |  |  |     _TEST = { | 
					
						
							|  |  |  |         'url': 'http://www.mdr.de/fakt/video189002.html', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2013-12-10 18:40:50 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         m = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         video_id = m.group('video_id') | 
					
						
							|  |  |  |         domain = m.group('domain') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # determine title and media streams from webpage | 
					
						
							|  |  |  |         html = self._download_webpage(url, video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-04-21 06:25:21 +02:00
										 |  |  |         title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title') | 
					
						
							| 
									
										
										
										
											2013-12-16 05:44:34 +01:00
										 |  |  |         xmlurl = self._search_regex( | 
					
						
							| 
									
										
										
										
											2014-04-21 06:25:21 +02:00
										 |  |  |             r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL') | 
					
						
							| 
									
										
										
										
											2013-12-16 05:44:34 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         doc = self._download_xml(domain + xmlurl, video_id) | 
					
						
							|  |  |  |         formats = [] | 
					
						
							|  |  |  |         for a in doc.findall('./assets/asset'): | 
					
						
							| 
									
										
										
										
											2015-08-03 12:00:08 +02:00
										 |  |  |             url_el = a.find('./progressiveDownloadUrl') | 
					
						
							| 
									
										
										
										
											2013-12-16 05:44:34 +01:00
										 |  |  |             if url_el is None: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             abr = int(a.find('bitrateAudio').text) // 1000 | 
					
						
							|  |  |  |             media_type = a.find('mediaType').text | 
					
						
							|  |  |  |             format = { | 
					
						
							|  |  |  |                 'abr': abr, | 
					
						
							|  |  |  |                 'filesize': int(a.find('fileSize').text), | 
					
						
							|  |  |  |                 'url': url_el.text, | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             vbr_el = a.find('bitrateVideo') | 
					
						
							|  |  |  |             if vbr_el is None: | 
					
						
							|  |  |  |                 format.update({ | 
					
						
							|  |  |  |                     'vcodec': 'none', | 
					
						
							| 
									
										
										
										
											2014-04-21 06:25:21 +02:00
										 |  |  |                     'format_id': '%s-%d' % (media_type, abr), | 
					
						
							| 
									
										
										
										
											2013-12-16 05:44:34 +01:00
										 |  |  |                 }) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 vbr = int(vbr_el.text) // 1000 | 
					
						
							|  |  |  |                 format.update({ | 
					
						
							|  |  |  |                     'vbr': vbr, | 
					
						
							|  |  |  |                     'width': int(a.find('frameWidth').text), | 
					
						
							|  |  |  |                     'height': int(a.find('frameHeight').text), | 
					
						
							| 
									
										
										
										
											2014-04-21 06:25:21 +02:00
										 |  |  |                     'format_id': '%s-%d' % (media_type, vbr), | 
					
						
							| 
									
										
										
										
											2013-12-16 05:44:34 +01:00
										 |  |  |                 }) | 
					
						
							|  |  |  |             formats.append(format) | 
					
						
							| 
									
										
										
										
											2013-12-24 23:34:11 +01:00
										 |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-16 05:44:34 +01:00
										 |  |  |         return { | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							|  |  |  |         } |