| 
									
										
										
										
											2014-10-27 02:26:05 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-18 23:32:37 +02:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											2013-09-16 14:45:14 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2013-09-18 23:32:37 +02:00
										 |  |  |     clean_html, | 
					
						
							| 
									
										
										
										
											2015-09-09 10:42:47 +01:00
										 |  |  |     determine_ext, | 
					
						
							| 
									
										
										
										
											2015-09-25 17:48:51 +08:00
										 |  |  |     ExtractorError, | 
					
						
							| 
									
										
										
										
											2013-09-16 14:45:14 +02:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-18 23:32:37 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-16 14:45:14 +02:00
										 |  |  | class FKTVIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2014-10-27 02:26:05 +01:00
										 |  |  |     IE_NAME = 'fernsehkritik.tv' | 
					
						
							| 
									
										
										
										
											2015-01-08 16:03:13 +01:00
										 |  |  |     _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?' | 
					
						
							| 
									
										
										
										
											2013-09-16 14:45:14 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-18 23:32:37 +02:00
										 |  |  |     _TEST = { | 
					
						
							| 
									
										
										
										
											2014-10-27 02:26:05 +01:00
										 |  |  |         'url': 'http://fernsehkritik.tv/folge-1', | 
					
						
							| 
									
										
										
										
											2015-09-09 10:42:47 +01:00
										 |  |  |         'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79', | 
					
						
							| 
									
										
										
										
											2014-10-27 02:26:05 +01:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2015-09-09 10:42:47 +01:00
										 |  |  |             'id': '1', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2014-10-27 02:26:05 +01:00
										 |  |  |             'title': 'Folge 1 vom 10. April 2007', | 
					
						
							| 
									
										
										
										
											2015-09-25 17:58:44 +08:00
										 |  |  |             'thumbnail': 're:^https?://.*\.jpg$', | 
					
						
							| 
									
										
										
										
											2013-09-18 23:32:37 +02:00
										 |  |  |         }, | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2015-09-09 10:42:47 +01:00
										 |  |  |         episode = self._match_id(url) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-09-25 18:01:08 +08:00
										 |  |  |         webpage = self._download_webpage( | 
					
						
							|  |  |  |             'http://fernsehkritik.tv/folge-%s/play' % episode, episode) | 
					
						
							|  |  |  |         title = clean_html(self._html_search_regex( | 
					
						
							|  |  |  |             '<h3>([^<]+)</h3>', webpage, 'title')) | 
					
						
							|  |  |  |         matches = re.search( | 
					
						
							| 
									
										
										
										
											2015-09-25 18:17:48 +08:00
										 |  |  |             r'(?s)<video(?:(?!poster)[^>])+(?:poster="([^"]+)")?[^>]*>(.*)</video>', | 
					
						
							|  |  |  |             webpage) | 
					
						
							| 
									
										
										
										
											2015-09-25 17:48:51 +08:00
										 |  |  |         if matches is None: | 
					
						
							|  |  |  |             raise ExtractorError('Unable to extract the video') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         poster, sources = matches.groups() | 
					
						
							| 
									
										
										
										
											2015-09-25 17:58:44 +08:00
										 |  |  |         if poster is None: | 
					
						
							|  |  |  |             self.report_warning('unable to extract thumbnail') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-09-25 17:51:48 +08:00
										 |  |  |         urls = re.findall(r'<source[^>]+src="([^"]+)"', sources) | 
					
						
							| 
									
										
										
										
											2015-09-25 18:01:08 +08:00
										 |  |  |         formats = [{ | 
					
						
							| 
									
										
										
										
											2015-09-25 21:58:45 +02:00
										 |  |  |             'url': furl, | 
					
						
							| 
									
										
										
										
											2015-09-26 07:51:11 +01:00
										 |  |  |             'format_id': determine_ext(furl), | 
					
						
							| 
									
										
										
										
											2015-09-25 21:58:45 +02:00
										 |  |  |         } for furl in urls] | 
					
						
							| 
									
										
										
										
											2015-09-25 17:48:51 +08:00
										 |  |  |         return { | 
					
						
							|  |  |  |             'id': episode, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							|  |  |  |             'thumbnail': poster, | 
					
						
							|  |  |  |         } |