78 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			78 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| 
								 | 
							
								from __future__ import unicode_literals
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								import re
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from .common import InfoExtractor
							 | 
						||
| 
								 | 
							
								from ..utils import (
							 | 
						||
| 
								 | 
							
								    clean_html,
							 | 
						||
| 
								 | 
							
								    determine_ext,
							 | 
						||
| 
								 | 
							
								    int_or_none,
							 | 
						||
| 
								 | 
							
								    js_to_json,
							 | 
						||
| 
								 | 
							
								    mimetype2ext,
							 | 
						||
| 
								 | 
							
								    parse_filesize,
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class MassengeschmackTVIE(InfoExtractor):
							 | 
						||
| 
								 | 
							
								    IE_NAME = 'massengeschmack.tv'
							 | 
						||
| 
								 | 
							
								    _VALID_URL = r'https?://(?:www\.)?massengeschmack\.tv/play/(?P<id>[^?&#]+)'
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    _TEST = {
							 | 
						||
| 
								 | 
							
								        'url': 'https://massengeschmack.tv/play/fktv202',
							 | 
						||
| 
								 | 
							
								        'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
							 | 
						||
| 
								 | 
							
								        'info_dict': {
							 | 
						||
| 
								 | 
							
								            'id': 'fktv202',
							 | 
						||
| 
								 | 
							
								            'ext': 'mp4',
							 | 
						||
| 
								 | 
							
								            'title': 'Fernsehkritik-TV - Folge 202',
							 | 
						||
| 
								 | 
							
								        },
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def _real_extract(self, url):
							 | 
						||
| 
								 | 
							
								        episode = self._match_id(url)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        webpage = self._download_webpage(url, episode)
							 | 
						||
| 
								 | 
							
								        title = clean_html(self._html_search_regex(
							 | 
						||
| 
								 | 
							
								            '<h3>([^<]+)</h3>', webpage, 'title'))
							 | 
						||
| 
								 | 
							
								        thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
							 | 
						||
| 
								 | 
							
								        sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        formats = []
							 | 
						||
| 
								 | 
							
								        for source in sources:
							 | 
						||
| 
								 | 
							
								            furl = source.get('src')
							 | 
						||
| 
								 | 
							
								            if not furl:
							 | 
						||
| 
								 | 
							
								                continue
							 | 
						||
| 
								 | 
							
								            furl = self._proto_relative_url(furl)
							 | 
						||
| 
								 | 
							
								            ext = determine_ext(furl) or mimetype2ext(source.get('type'))
							 | 
						||
| 
								 | 
							
								            if ext == 'm3u8':
							 | 
						||
| 
								 | 
							
								                formats.extend(self._extract_m3u8_formats(
							 | 
						||
| 
								 | 
							
								                    furl, episode, 'mp4', 'm3u8_native',
							 | 
						||
| 
								 | 
							
								                    m3u8_id='hls', fatal=False))
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                formats.append({
							 | 
						||
| 
								 | 
							
								                    'url': furl,
							 | 
						||
| 
								 | 
							
								                    'format_id': determine_ext(furl),
							 | 
						||
| 
								 | 
							
								                })
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        for (durl, format_id, width, height, filesize) in re.findall(r'''(?x)
							 | 
						||
| 
								 | 
							
								                                   <a[^>]+?href="(?P<url>(?:https:)?//[^"]+)".*?
							 | 
						||
| 
								 | 
							
								                                   <strong>(?P<format_id>.+?)</strong>.*?
							 | 
						||
| 
								 | 
							
								                                   <small>(?:(?P<width>\d+)x(?P<height>\d+))?\s+?\((?P<filesize>[\d,]+\s*[GM]iB)\)</small>
							 | 
						||
| 
								 | 
							
								                                ''', webpage):
							 | 
						||
| 
								 | 
							
								            formats.append({
							 | 
						||
| 
								 | 
							
								                'url': durl,
							 | 
						||
| 
								 | 
							
								                'format_id': format_id,
							 | 
						||
| 
								 | 
							
								                'width': int_or_none(width),
							 | 
						||
| 
								 | 
							
								                'height': int_or_none(height),
							 | 
						||
| 
								 | 
							
								                'filesize': parse_filesize(filesize),
							 | 
						||
| 
								 | 
							
								                'vcodec': 'none' if format_id.startswith('Audio') else None,
							 | 
						||
| 
								 | 
							
								            })
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr'))
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return {
							 | 
						||
| 
								 | 
							
								            'id': episode,
							 | 
						||
| 
								 | 
							
								            'title': title,
							 | 
						||
| 
								 | 
							
								            'formats': formats,
							 | 
						||
| 
								 | 
							
								            'thumbnail': thumbnail,
							 | 
						||
| 
								 | 
							
								        }
							 |