| 
									
										
										
										
											2014-08-18 22:43:35 +02:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import json | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  | from ..compat import ( | 
					
						
							| 
									
										
										
										
											2014-08-18 22:43:35 +02:00
										 |  |  |     compat_urllib_parse, | 
					
						
							| 
									
										
										
										
											2014-10-23 21:26:48 +02:00
										 |  |  |     compat_urlparse, | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2014-08-18 22:43:35 +02:00
										 |  |  |     get_element_by_attribute, | 
					
						
							|  |  |  |     parse_duration, | 
					
						
							|  |  |  |     strip_jsonp, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class MiTeleIE(InfoExtractor): | 
					
						
							|  |  |  |     IE_NAME = 'mitele.es' | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  |     _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' | 
					
						
							| 
									
										
										
										
											2014-08-18 22:43:35 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     _TEST = { | 
					
						
							|  |  |  |         'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', | 
					
						
							|  |  |  |         'md5': '6a75fe9d0d3275bead0cb683c616fddb', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '0fce117d', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'Programa 144 - Tor, la web invisible', | 
					
						
							|  |  |  |             'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', | 
					
						
							|  |  |  |             'display_id': 'programa-144', | 
					
						
							|  |  |  |             'duration': 2913, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  |         episode = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2014-08-18 22:43:35 +02:00
										 |  |  |         webpage = self._download_webpage(url, episode) | 
					
						
							|  |  |  |         embed_data_json = self._search_regex( | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  |             r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data', | 
					
						
							| 
									
										
										
										
											2014-08-18 22:43:35 +02:00
										 |  |  |         ).replace('\'', '"') | 
					
						
							|  |  |  |         embed_data = json.loads(embed_data_json) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-23 20:08:55 +02:00
										 |  |  |         domain = embed_data['mediaUrl'] | 
					
						
							|  |  |  |         if not domain.startswith('http'): | 
					
						
							|  |  |  |             # only happens in telecinco.es videos | 
					
						
							|  |  |  |             domain = 'http://' + domain | 
					
						
							| 
									
										
										
										
											2014-10-23 21:26:48 +02:00
										 |  |  |         info_url = compat_urlparse.urljoin( | 
					
						
							| 
									
										
										
										
											2014-10-23 20:08:55 +02:00
										 |  |  |             domain, | 
					
						
							|  |  |  |             compat_urllib_parse.unquote(embed_data['flashvars']['host']) | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2014-08-18 22:43:35 +02:00
										 |  |  |         info_el = self._download_xml(info_url, episode).find('./video/info') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         video_link = info_el.find('videoUrl/link').text | 
					
						
							|  |  |  |         token_query = compat_urllib_parse.urlencode({'id': video_link}) | 
					
						
							|  |  |  |         token_info = self._download_json( | 
					
						
							| 
									
										
										
										
											2014-10-23 20:08:55 +02:00
										 |  |  |             embed_data['flashvars']['ov_tk'] + '?' + token_query, | 
					
						
							|  |  |  |             episode, | 
					
						
							| 
									
										
										
										
											2014-08-18 22:43:35 +02:00
										 |  |  |             transform_source=strip_jsonp | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'id': embed_data['videoId'], | 
					
						
							|  |  |  |             'display_id': episode, | 
					
						
							|  |  |  |             'title': info_el.find('title').text, | 
					
						
							|  |  |  |             'url': token_info['tokenizedUrl'], | 
					
						
							|  |  |  |             'description': get_element_by_attribute('class', 'text', webpage), | 
					
						
							|  |  |  |             'thumbnail': info_el.find('thumb').text, | 
					
						
							|  |  |  |             'duration': parse_duration(info_el.find('duration').text), | 
					
						
							|  |  |  |         } |