| 
									
										
										
										
											2017-05-02 16:17:15 +02:00
										 |  |  | # coding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-14 06:28:40 +07:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  | from .theplatform import ThePlatformBaseIE | 
					
						
							| 
									
										
										
										
											2018-09-26 05:38:41 +07:00
										 |  |  | from ..compat import ( | 
					
						
							|  |  |  |     compat_parse_qs, | 
					
						
							|  |  |  |     compat_str, | 
					
						
							|  |  |  |     compat_urllib_parse_urlparse, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2017-05-02 16:17:15 +02:00
										 |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |     ExtractorError, | 
					
						
							|  |  |  |     int_or_none, | 
					
						
							|  |  |  |     update_url_query, | 
					
						
							| 
									
										
										
										
											2017-05-02 16:17:15 +02:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  | class MediasetIE(ThePlatformBaseIE): | 
					
						
							|  |  |  |     _TP_TLD = 'eu' | 
					
						
							| 
									
										
										
										
											2017-05-14 05:27:51 +07:00
										 |  |  |     _VALID_URL = r'''(?x)
 | 
					
						
							| 
									
										
										
										
											2017-05-14 06:28:40 +07:00
										 |  |  |                     (?: | 
					
						
							|  |  |  |                         mediaset:| | 
					
						
							|  |  |  |                         https?:// | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |                             (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/ | 
					
						
							| 
									
										
										
										
											2017-05-14 06:28:40 +07:00
										 |  |  |                             (?: | 
					
						
							|  |  |  |                                 (?:video|on-demand)/(?:[^/]+/)+[^/]+_| | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |                                 player/index\.html\?.*?\bprogramGuid= | 
					
						
							| 
									
										
										
										
											2017-05-14 06:28:40 +07:00
										 |  |  |                             ) | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |                     )(?P<id>[0-9A-Z]{16}) | 
					
						
							| 
									
										
										
										
											2017-05-14 05:27:51 +07:00
										 |  |  |                     '''
 | 
					
						
							| 
									
										
										
										
											2017-05-02 16:17:15 +02:00
										 |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         # full episode | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |         'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824', | 
					
						
							| 
									
										
										
										
											2017-05-02 16:17:15 +02:00
										 |  |  |         'md5': '9b75534d42c44ecef7bf1ffeacb7f85d', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |             'id': 'FAFU000000661824', | 
					
						
							| 
									
										
										
										
											2017-05-02 16:17:15 +02:00
										 |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'Quarta puntata', | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | 
					
						
							| 
									
										
										
										
											2017-05-14 05:27:51 +07:00
										 |  |  |             'thumbnail': r're:^https?://.*\.jpg$', | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |             'duration': 1414.26, | 
					
						
							| 
									
										
										
										
											2017-05-14 06:39:47 +07:00
										 |  |  |             'upload_date': '20161107', | 
					
						
							| 
									
										
										
										
											2017-05-14 05:27:51 +07:00
										 |  |  |             'series': 'Hello Goodbye', | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |             'timestamp': 1478532900, | 
					
						
							|  |  |  |             'uploader': 'Rete 4', | 
					
						
							|  |  |  |             'uploader_id': 'R4', | 
					
						
							| 
									
										
										
										
											2017-05-14 05:27:51 +07:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2018-06-18 11:50:06 +02:00
										 |  |  |     }, { | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |         'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501', | 
					
						
							|  |  |  |         'md5': '288532f0ad18307705b01e581304cd7b', | 
					
						
							| 
									
										
										
										
											2018-06-18 11:50:06 +02:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |             'id': 'F309013801000501', | 
					
						
							| 
									
										
										
										
											2018-06-18 11:50:06 +02:00
										 |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'Puntata del 25 maggio', | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | 
					
						
							| 
									
										
										
										
											2018-06-18 11:50:06 +02:00
										 |  |  |             'thumbnail': r're:^https?://.*\.jpg$', | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |             'duration': 6565.007, | 
					
						
							|  |  |  |             'upload_date': '20180526', | 
					
						
							| 
									
										
										
										
											2018-06-18 11:50:06 +02:00
										 |  |  |             'series': 'Matrix', | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |             'timestamp': 1527326245, | 
					
						
							|  |  |  |             'uploader': 'Canale 5', | 
					
						
							|  |  |  |             'uploader_id': 'C5', | 
					
						
							| 
									
										
										
										
											2018-06-18 11:50:06 +02:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2018-06-30 02:16:44 +07:00
										 |  |  |         'expected_warnings': ['HTTP Error 403: Forbidden'], | 
					
						
							| 
									
										
										
										
											2017-05-02 16:17:15 +02:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         # clip | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |         'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680', | 
					
						
							| 
									
										
										
										
											2017-05-14 05:27:51 +07:00
										 |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2017-05-02 16:17:15 +02:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         # iframe simple | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |         'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924', | 
					
						
							| 
									
										
										
										
											2017-05-14 05:27:51 +07:00
										 |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2017-05-02 16:17:15 +02:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/) | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |         'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104', | 
					
						
							| 
									
										
										
										
											2017-05-14 05:27:51 +07:00
										 |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2017-05-14 06:28:40 +07:00
										 |  |  |     }, { | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |         'url': 'mediaset:FAFU000000665924', | 
					
						
							| 
									
										
										
										
											2017-05-14 06:28:40 +07:00
										 |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2017-05-02 16:17:15 +02:00
										 |  |  |     }] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-14 06:29:16 +07:00
										 |  |  |     @staticmethod | 
					
						
							| 
									
										
										
										
											2018-09-26 05:38:41 +07:00
										 |  |  |     def _extract_urls(ie, webpage): | 
					
						
							|  |  |  |         def _qs(url): | 
					
						
							|  |  |  |             return compat_parse_qs(compat_urllib_parse_urlparse(url).query) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def _program_guid(qs): | 
					
						
							|  |  |  |             return qs.get('programGuid', [None])[0] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         entries = [] | 
					
						
							|  |  |  |         for mobj in re.finditer( | 
					
						
							|  |  |  |                 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1', | 
					
						
							|  |  |  |                 webpage): | 
					
						
							|  |  |  |             embed_url = mobj.group('url') | 
					
						
							|  |  |  |             embed_qs = _qs(embed_url) | 
					
						
							|  |  |  |             program_guid = _program_guid(embed_qs) | 
					
						
							|  |  |  |             if program_guid: | 
					
						
							|  |  |  |                 entries.append(embed_url) | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             video_id = embed_qs.get('id', [None])[0] | 
					
						
							|  |  |  |             if not video_id: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             urlh = ie._request_webpage( | 
					
						
							|  |  |  |                 embed_url, video_id, note='Following embed URL redirect') | 
					
						
							|  |  |  |             embed_url = compat_str(urlh.geturl()) | 
					
						
							|  |  |  |             program_guid = _program_guid(_qs(embed_url)) | 
					
						
							|  |  |  |             if program_guid: | 
					
						
							|  |  |  |                 entries.append(embed_url) | 
					
						
							|  |  |  |         return entries | 
					
						
							| 
									
										
										
										
											2017-05-14 06:29:16 +07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-02 16:17:15 +02:00
										 |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |         guid = self._match_id(url) | 
					
						
							|  |  |  |         tp_path = 'PR1GhC/media/guid/2702976343/' + guid | 
					
						
							|  |  |  |         info = self._extract_theplatform_metadata(tp_path, guid) | 
					
						
							| 
									
										
										
										
											2017-05-02 16:17:15 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-14 05:27:51 +07:00
										 |  |  |         formats = [] | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |         subtitles = {} | 
					
						
							|  |  |  |         first_e = None | 
					
						
							|  |  |  |         for asset_type in ('SD', 'HD'): | 
					
						
							|  |  |  |             for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'): | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     tp_formats, tp_subtitles = self._extract_theplatform_smil( | 
					
						
							|  |  |  |                         update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), { | 
					
						
							|  |  |  |                             'mbr': 'true', | 
					
						
							|  |  |  |                             'formats': f, | 
					
						
							|  |  |  |                             'assetTypes': asset_type, | 
					
						
							|  |  |  |                         }), guid, 'Downloading %s %s SMIL data' % (f, asset_type)) | 
					
						
							|  |  |  |                 except ExtractorError as e: | 
					
						
							|  |  |  |                     if not first_e: | 
					
						
							|  |  |  |                         first_e = e | 
					
						
							|  |  |  |                     break | 
					
						
							|  |  |  |                 for tp_f in tp_formats: | 
					
						
							|  |  |  |                     tp_f['quality'] = 1 if asset_type == 'HD' else 0 | 
					
						
							|  |  |  |                 formats.extend(tp_formats) | 
					
						
							|  |  |  |                 subtitles = self._merge_subtitles(subtitles, tp_subtitles) | 
					
						
							|  |  |  |         if first_e and not formats: | 
					
						
							|  |  |  |             raise first_e | 
					
						
							| 
									
										
										
										
											2017-05-14 05:27:51 +07:00
										 |  |  |         self._sort_formats(formats) | 
					
						
							| 
									
										
										
										
											2017-05-02 16:17:15 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |         fields = [] | 
					
						
							|  |  |  |         for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))): | 
					
						
							|  |  |  |             fields.extend(templ % repl for repl in repls) | 
					
						
							|  |  |  |         feed_data = self._download_json( | 
					
						
							|  |  |  |             'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid, | 
					
						
							|  |  |  |             guid, fatal=False, query={'fields': ','.join(fields)}) | 
					
						
							|  |  |  |         if feed_data: | 
					
						
							|  |  |  |             publish_info = feed_data.get('mediasetprogram$publishInfo') or {} | 
					
						
							|  |  |  |             info.update({ | 
					
						
							|  |  |  |                 'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')), | 
					
						
							|  |  |  |                 'season_number': int_or_none(feed_data.get('tvSeasonNumber')), | 
					
						
							|  |  |  |                 'series': feed_data.get('mediasetprogram$brandTitle'), | 
					
						
							|  |  |  |                 'uploader': publish_info.get('description'), | 
					
						
							|  |  |  |                 'uploader_id': publish_info.get('channel'), | 
					
						
							|  |  |  |                 'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')), | 
					
						
							|  |  |  |             }) | 
					
						
							| 
									
										
										
										
											2017-05-02 16:17:15 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |         info.update({ | 
					
						
							|  |  |  |             'id': guid, | 
					
						
							| 
									
										
										
										
											2017-05-14 05:27:51 +07:00
										 |  |  |             'formats': formats, | 
					
						
							| 
									
										
										
										
											2018-07-18 18:33:33 +01:00
										 |  |  |             'subtitles': subtitles, | 
					
						
							|  |  |  |         }) | 
					
						
							|  |  |  |         return info |