| 
									
										
										
										
											2014-06-08 22:45:12 +07:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							| 
									
										
										
										
											2013-07-08 01:13:55 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							|  |  |  |     int_or_none, | 
					
						
							|  |  |  |     unified_strdate, | 
					
						
							|  |  |  |     xpath_text, | 
					
						
							|  |  |  |     determine_ext, | 
					
						
							|  |  |  |     float_or_none, | 
					
						
							|  |  |  |     ExtractorError, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2013-07-08 01:13:55 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  | class DreiSatIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2013-07-08 01:13:55 +02:00
										 |  |  |     IE_NAME = '3sat' | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |     _GEO_COUNTRIES = ['DE'] | 
					
						
							|  |  |  |     _VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)' | 
					
						
							| 
									
										
										
										
											2015-04-30 21:26:55 +03:00
										 |  |  |     _TESTS = [ | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', | 
					
						
							|  |  |  |             'md5': 'be37228896d30a88f315b638900a026e', | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': '45918', | 
					
						
							|  |  |  |                 'ext': 'mp4', | 
					
						
							|  |  |  |                 'title': 'Waidmannsheil', | 
					
						
							|  |  |  |                 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', | 
					
						
							| 
									
										
										
										
											2016-07-07 16:39:39 -05:00
										 |  |  |                 'uploader': 'SCHWEIZWEIT', | 
					
						
							|  |  |  |                 'uploader_id': '100000210', | 
					
						
							| 
									
										
										
										
											2015-04-30 21:26:55 +03:00
										 |  |  |                 'upload_date': '20140913' | 
					
						
							| 
									
										
										
										
											2016-07-07 16:39:39 -05:00
										 |  |  |             }, | 
					
						
							|  |  |  |             'params': { | 
					
						
							|  |  |  |                 'skip_download': True,  # m3u8 downloads | 
					
						
							| 
									
										
										
										
											2015-04-30 21:26:55 +03:00
										 |  |  |             } | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066', | 
					
						
							|  |  |  |             'only_matching': True, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |     ] | 
					
						
							| 
									
										
										
										
											2013-07-08 01:13:55 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  |     def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): | 
					
						
							|  |  |  |         param_groups = {} | 
					
						
							|  |  |  |         for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)): | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |             group_id = param_group.get(self._xpath_ns( | 
					
						
							|  |  |  |                 'id', 'http://www.w3.org/XML/1998/namespace')) | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  |             params = {} | 
					
						
							|  |  |  |             for param in param_group: | 
					
						
							|  |  |  |                 params[param.get('name')] = param.get('value') | 
					
						
							|  |  |  |             param_groups[group_id] = params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         formats = [] | 
					
						
							|  |  |  |         for video in smil.findall(self._xpath_ns('.//video', namespace)): | 
					
						
							|  |  |  |             src = video.get('src') | 
					
						
							|  |  |  |             if not src: | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |             bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  |             group_id = video.get('paramGroup') | 
					
						
							|  |  |  |             param_group = param_groups[group_id] | 
					
						
							|  |  |  |             for proto in param_group['protocols'].split(','): | 
					
						
							|  |  |  |                 formats.append({ | 
					
						
							|  |  |  |                     'url': '%s://%s' % (proto, param_group['host']), | 
					
						
							|  |  |  |                     'app': param_group['app'], | 
					
						
							|  |  |  |                     'play_path': src, | 
					
						
							|  |  |  |                     'ext': 'flv', | 
					
						
							|  |  |  |                     'format_id': '%s-%d' % (proto, bitrate), | 
					
						
							|  |  |  |                     'tbr': bitrate, | 
					
						
							|  |  |  |                 }) | 
					
						
							|  |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  |         return formats | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def extract_from_xml_url(self, video_id, xml_url): | 
					
						
							|  |  |  |         doc = self._download_xml( | 
					
						
							|  |  |  |             xml_url, video_id, | 
					
						
							|  |  |  |             note='Downloading video info', | 
					
						
							|  |  |  |             errnote='Failed to download video info') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |         status_code = xpath_text(doc, './status/statuscode') | 
					
						
							|  |  |  |         if status_code and status_code != 'ok': | 
					
						
							|  |  |  |             if status_code == 'notVisibleAnymore': | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  |                 message = 'Video %s is not available' % video_id | 
					
						
							|  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |                 message = '%s returned error: %s' % (self.IE_NAME, status_code) | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  |             raise ExtractorError(message, expected=True) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |         title = xpath_text(doc, './/information/title', 'title', True) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         urls = [] | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  |         formats = [] | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |         for fnode in doc.findall('.//formitaeten/formitaet'): | 
					
						
							|  |  |  |             video_url = xpath_text(fnode, 'url') | 
					
						
							|  |  |  |             if not video_url or video_url in urls: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             urls.append(video_url) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  |             is_available = 'http://www.metafilegenerator' not in video_url | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |             geoloced = 'static_geoloced_online' in video_url | 
					
						
							|  |  |  |             if not is_available or geoloced: | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  |             format_id = fnode.attrib['basetype'] | 
					
						
							|  |  |  |             format_m = re.match(r'''(?x)
 | 
					
						
							|  |  |  |                 (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ | 
					
						
							|  |  |  |                 (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) | 
					
						
							|  |  |  |             ''', format_id)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             ext = determine_ext(video_url, None) or format_m.group('container') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if ext == 'meta': | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             elif ext == 'smil': | 
					
						
							|  |  |  |                 formats.extend(self._extract_smil_formats( | 
					
						
							|  |  |  |                     video_url, video_id, fatal=False)) | 
					
						
							|  |  |  |             elif ext == 'm3u8': | 
					
						
							|  |  |  |                 # the certificates are misconfigured (see | 
					
						
							|  |  |  |                 # https://github.com/rg3/youtube-dl/issues/8665) | 
					
						
							|  |  |  |                 if video_url.startswith('https://'): | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 formats.extend(self._extract_m3u8_formats( | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |                     video_url, video_id, 'mp4', 'm3u8_native', | 
					
						
							|  |  |  |                     m3u8_id=format_id, fatal=False)) | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  |             elif ext == 'f4m': | 
					
						
							|  |  |  |                 formats.extend(self._extract_f4m_formats( | 
					
						
							|  |  |  |                     video_url, video_id, f4m_id=format_id, fatal=False)) | 
					
						
							|  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |                 quality = xpath_text(fnode, './quality') | 
					
						
							|  |  |  |                 if quality: | 
					
						
							|  |  |  |                     format_id += '-' + quality | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |                 abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000) | 
					
						
							|  |  |  |                 vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000) | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |                 tbr = int_or_none(self._search_regex( | 
					
						
							|  |  |  |                     r'_(\d+)k', video_url, 'bitrate', None)) | 
					
						
							|  |  |  |                 if tbr and vbr and not abr: | 
					
						
							|  |  |  |                     abr = tbr - vbr | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 formats.append({ | 
					
						
							|  |  |  |                     'format_id': format_id, | 
					
						
							|  |  |  |                     'url': video_url, | 
					
						
							|  |  |  |                     'ext': ext, | 
					
						
							|  |  |  |                     'acodec': format_m.group('acodec'), | 
					
						
							|  |  |  |                     'vcodec': format_m.group('vcodec'), | 
					
						
							|  |  |  |                     'abr': abr, | 
					
						
							|  |  |  |                     'vbr': vbr, | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |                     'tbr': tbr, | 
					
						
							|  |  |  |                     'width': int_or_none(xpath_text(fnode, './width')), | 
					
						
							|  |  |  |                     'height': int_or_none(xpath_text(fnode, './height')), | 
					
						
							|  |  |  |                     'filesize': int_or_none(xpath_text(fnode, './filesize')), | 
					
						
							|  |  |  |                     'protocol': format_m.group('proto').lower(), | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  |                 }) | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         geolocation = xpath_text(doc, './/details/geolocation') | 
					
						
							|  |  |  |         if not formats and geolocation and geolocation != 'none': | 
					
						
							|  |  |  |             self.raise_geo_restricted(countries=self._GEO_COUNTRIES) | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |         thumbnails = [] | 
					
						
							|  |  |  |         for node in doc.findall('.//teaserimages/teaserimage'): | 
					
						
							|  |  |  |             thumbnail_url = node.text | 
					
						
							|  |  |  |             if not thumbnail_url: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             thumbnail = { | 
					
						
							|  |  |  |                 'url': thumbnail_url, | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             thumbnail_key = node.get('key') | 
					
						
							|  |  |  |             if thumbnail_key: | 
					
						
							|  |  |  |                 m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key) | 
					
						
							|  |  |  |                 if m: | 
					
						
							|  |  |  |                     thumbnail['width'] = int(m.group(1)) | 
					
						
							|  |  |  |                     thumbnail['height'] = int(m.group(2)) | 
					
						
							|  |  |  |             thumbnails.append(thumbnail) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         upload_date = unified_strdate(xpath_text(doc, './/details/airtime')) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  |         return { | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |             'description': xpath_text(doc, './/information/detail'), | 
					
						
							|  |  |  |             'duration': int_or_none(xpath_text(doc, './/details/lengthSec')), | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  |             'thumbnails': thumbnails, | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |             'uploader': xpath_text(doc, './/details/originChannelTitle'), | 
					
						
							|  |  |  |             'uploader_id': xpath_text(doc, './/details/originChannelId'), | 
					
						
							| 
									
										
										
										
											2017-01-06 17:13:53 +07:00
										 |  |  |             'upload_date': upload_date, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-07-08 01:13:55 +02:00
										 |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2018-05-16 11:24:44 +01:00
										 |  |  |         video_id = self._match_id(url) | 
					
						
							|  |  |  |         details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id | 
					
						
							| 
									
										
										
										
											2016-01-02 21:29:10 +01:00
										 |  |  |         return self.extract_from_xml_url(video_id, details_url) |