| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  | # coding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2016-11-11 03:26:29 +07:00
										 |  |  | from ..compat import ( | 
					
						
							|  |  |  |     compat_HTTPError, | 
					
						
							|  |  |  |     compat_str, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  | from ..utils import ( | 
					
						
							|  |  |  |     ExtractorError, | 
					
						
							|  |  |  |     int_or_none, | 
					
						
							| 
									
										
										
										
											2017-07-09 15:57:33 +07:00
										 |  |  |     unsmuggle_url, | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class EaglePlatformIE(InfoExtractor): | 
					
						
							|  |  |  |     _VALID_URL = r'''(?x)
 | 
					
						
							|  |  |  |                     (?: | 
					
						
							|  |  |  |                         eagleplatform:(?P<custom_host>[^/]+):| | 
					
						
							|  |  |  |                         https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id= | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                     (?P<id>\d+) | 
					
						
							|  |  |  |                 '''
 | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         # http://lenta.ru/news/2015/03/06/navalny/ | 
					
						
							|  |  |  |         'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', | 
					
						
							| 
									
										
										
										
											2016-04-25 22:48:17 +08:00
										 |  |  |         # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '227304', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'Навальный вышел на свободу', | 
					
						
							|  |  |  |             'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', | 
					
						
							| 
									
										
										
										
											2017-01-02 20:08:07 +08:00
										 |  |  |             'thumbnail': r're:^https?://.*\.jpg$', | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  |             'duration': 87, | 
					
						
							|  |  |  |             'view_count': int, | 
					
						
							|  |  |  |             'age_limit': 0, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         # http://muz-tv.ru/play/7129/ | 
					
						
							|  |  |  |         # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true | 
					
						
							|  |  |  |         'url': 'eagleplatform:media.clipyou.ru:12820', | 
					
						
							| 
									
										
										
										
											2016-04-22 14:32:38 +01:00
										 |  |  |         'md5': '358597369cf8ba56675c1df15e7af624', | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '12820', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': "'O Sole Mio", | 
					
						
							| 
									
										
										
										
											2017-01-02 20:08:07 +08:00
										 |  |  |             'thumbnail': r're:^https?://.*\.jpg$', | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  |             'duration': 216, | 
					
						
							|  |  |  |             'view_count': int, | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-04-04 23:36:45 +06:00
										 |  |  |         'skip': 'Georestricted', | 
					
						
							| 
									
										
										
										
											2017-07-09 15:57:33 +07:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/) | 
					
						
							| 
									
										
										
										
											2017-07-10 00:14:41 +07:00
										 |  |  |         'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306', | 
					
						
							| 
									
										
										
										
											2017-07-09 15:57:33 +07:00
										 |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  |     }] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-29 23:01:34 +07:00
										 |  |  |     @staticmethod | 
					
						
							|  |  |  |     def _extract_url(webpage): | 
					
						
							| 
									
										
										
										
											2016-08-23 07:22:14 +07:00
										 |  |  |         # Regular iframe embedding | 
					
						
							| 
									
										
										
										
											2016-06-29 23:01:34 +07:00
										 |  |  |         mobj = re.search( | 
					
						
							|  |  |  |             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1', | 
					
						
							|  |  |  |             webpage) | 
					
						
							|  |  |  |         if mobj is not None: | 
					
						
							|  |  |  |             return mobj.group('url') | 
					
						
							| 
									
										
										
										
											2017-07-09 15:55:04 +07:00
										 |  |  |         PLAYER_JS_RE = r'''
 | 
					
						
							|  |  |  |                         <script[^>]+ | 
					
						
							|  |  |  |                             src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs) | 
					
						
							|  |  |  |                         .+? | 
					
						
							|  |  |  |                     '''
 | 
					
						
							|  |  |  |         # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/) | 
					
						
							| 
									
										
										
										
											2016-08-23 07:22:14 +07:00
										 |  |  |         mobj = re.search( | 
					
						
							|  |  |  |             r'''(?xs)
 | 
					
						
							| 
									
										
										
										
											2017-07-09 15:55:04 +07:00
										 |  |  |                     %s | 
					
						
							| 
									
										
										
										
											2016-08-23 07:22:14 +07:00
										 |  |  |                     <div[^>]+ | 
					
						
							| 
									
										
										
										
											2017-07-09 15:55:04 +07:00
										 |  |  |                         class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+ | 
					
						
							| 
									
										
										
										
											2016-08-23 07:22:14 +07:00
										 |  |  |                         data-id=["\'](?P<id>\d+) | 
					
						
							| 
									
										
										
										
											2017-07-09 15:55:04 +07:00
										 |  |  |             ''' % PLAYER_JS_RE, webpage)
 | 
					
						
							|  |  |  |         if mobj is not None: | 
					
						
							|  |  |  |             return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() | 
					
						
							|  |  |  |         # Generalization of "Javascript code usage", "Combined usage" and | 
					
						
							|  |  |  |         # "Usage without attaching to DOM" embeddings (see | 
					
						
							|  |  |  |         # http://dultonmedia.github.io/eplayer/) | 
					
						
							|  |  |  |         mobj = re.search( | 
					
						
							|  |  |  |             r'''(?xs)
 | 
					
						
							|  |  |  |                     %s | 
					
						
							|  |  |  |                     <script> | 
					
						
							|  |  |  |                     .+? | 
					
						
							|  |  |  |                     new\s+EaglePlayer\( | 
					
						
							|  |  |  |                         (?:[^,]+\s*,\s*)? | 
					
						
							|  |  |  |                         { | 
					
						
							|  |  |  |                             .+? | 
					
						
							|  |  |  |                             \bid\s*:\s*["\']?(?P<id>\d+) | 
					
						
							|  |  |  |                             .+? | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  |                     \s*\) | 
					
						
							|  |  |  |                     .+? | 
					
						
							|  |  |  |                     </script> | 
					
						
							|  |  |  |             ''' % PLAYER_JS_RE, webpage)
 | 
					
						
							| 
									
										
										
										
											2016-08-23 07:22:14 +07:00
										 |  |  |         if mobj is not None: | 
					
						
							|  |  |  |             return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() | 
					
						
							| 
									
										
										
										
											2016-06-29 23:01:34 +07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-09-27 01:12:46 +06:00
										 |  |  |     @staticmethod | 
					
						
							|  |  |  |     def _handle_error(response): | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  |         status = int_or_none(response.get('status', 200)) | 
					
						
							|  |  |  |         if status != 200: | 
					
						
							|  |  |  |             raise ExtractorError(' '.join(response['errors']), expected=True) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-07-09 15:57:33 +07:00
										 |  |  |     def _download_json(self, url_or_request, video_id, *args, **kwargs): | 
					
						
							| 
									
										
										
										
											2016-04-16 16:47:16 +08:00
										 |  |  |         try: | 
					
						
							| 
									
										
										
										
											2017-07-09 15:57:33 +07:00
										 |  |  |             response = super(EaglePlatformIE, self)._download_json( | 
					
						
							|  |  |  |                 url_or_request, video_id, *args, **kwargs) | 
					
						
							| 
									
										
										
										
											2016-04-16 16:47:16 +08:00
										 |  |  |         except ExtractorError as ee: | 
					
						
							|  |  |  |             if isinstance(ee.cause, compat_HTTPError): | 
					
						
							|  |  |  |                 response = self._parse_json(ee.cause.read().decode('utf-8'), video_id) | 
					
						
							|  |  |  |                 self._handle_error(response) | 
					
						
							|  |  |  |             raise | 
					
						
							| 
									
										
										
										
											2015-09-26 17:37:30 +01:00
										 |  |  |         return response | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): | 
					
						
							|  |  |  |         return self._download_json(url_or_request, video_id, note)['data'][0] | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2017-07-09 15:57:33 +07:00
										 |  |  |         url, smuggled_data = unsmuggle_url(url, {}) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-07-09 15:57:33 +07:00
										 |  |  |         headers = {} | 
					
						
							|  |  |  |         query = { | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         referrer = smuggled_data.get('referrer') | 
					
						
							|  |  |  |         if referrer: | 
					
						
							|  |  |  |             headers['Referer'] = referrer | 
					
						
							|  |  |  |             query['referrer'] = referrer | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  |         player_data = self._download_json( | 
					
						
							| 
									
										
										
										
											2017-07-09 15:57:33 +07:00
										 |  |  |             'http://%s/api/player_data' % host, video_id, | 
					
						
							|  |  |  |             headers=headers, query=query) | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  | 
 | 
					
						
							|  |  |  |         media = player_data['data']['playlist']['viewports'][0]['medialist'][0] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         title = media['title'] | 
					
						
							|  |  |  |         description = media.get('description') | 
					
						
							| 
									
										
										
										
											2015-09-27 01:17:44 +06:00
										 |  |  |         thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:') | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  |         duration = int_or_none(media.get('duration')) | 
					
						
							|  |  |  |         view_count = int_or_none(media.get('views')) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         age_restriction = media.get('age_restriction') | 
					
						
							|  |  |  |         age_limit = None | 
					
						
							|  |  |  |         if age_restriction: | 
					
						
							|  |  |  |             age_limit = 0 if age_restriction == 'allow_all' else 18 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-09-26 18:53:57 +01:00
										 |  |  |         secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:') | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-22 14:32:38 +01:00
										 |  |  |         formats = [] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-09-26 15:30:02 +01:00
										 |  |  |         m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON') | 
					
						
							| 
									
										
										
										
											2016-04-22 14:32:38 +01:00
										 |  |  |         m3u8_formats = self._extract_m3u8_formats( | 
					
						
							| 
									
										
										
										
											2016-11-11 03:26:29 +07:00
										 |  |  |             m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', | 
					
						
							|  |  |  |             m3u8_id='hls', fatal=False) | 
					
						
							| 
									
										
										
										
											2016-04-22 14:32:38 +01:00
										 |  |  |         formats.extend(m3u8_formats) | 
					
						
							| 
									
										
										
										
											2015-09-26 15:30:02 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-11 03:26:29 +07:00
										 |  |  |         m3u8_formats_dict = {} | 
					
						
							|  |  |  |         for f in m3u8_formats: | 
					
						
							|  |  |  |             if f.get('height') is not None: | 
					
						
							|  |  |  |                 m3u8_formats_dict[f['height']] = f | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         mp4_data = self._download_json( | 
					
						
							| 
									
										
										
										
											2015-09-27 01:10:39 +06:00
										 |  |  |             # Secure mp4 URL is constructed according to Player.prototype.mp4 from | 
					
						
							|  |  |  |             # http://lentaru.media.eagleplatform.com/player/player.js | 
					
						
							| 
									
										
										
										
											2016-11-11 03:26:29 +07:00
										 |  |  |             re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8), | 
					
						
							|  |  |  |             video_id, 'Downloading mp4 JSON', fatal=False) | 
					
						
							|  |  |  |         if mp4_data: | 
					
						
							|  |  |  |             for format_id, format_url in mp4_data.get('data', {}).items(): | 
					
						
							|  |  |  |                 if not isinstance(format_url, compat_str): | 
					
						
							| 
									
										
										
										
											2016-04-25 22:48:17 +08:00
										 |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2016-11-11 03:26:29 +07:00
										 |  |  |                 height = int_or_none(format_id) | 
					
						
							|  |  |  |                 if height is not None and m3u8_formats_dict.get(height): | 
					
						
							|  |  |  |                     f = m3u8_formats_dict[height].copy() | 
					
						
							|  |  |  |                     f.update({ | 
					
						
							|  |  |  |                         'format_id': f['format_id'].replace('hls', 'http'), | 
					
						
							|  |  |  |                         'protocol': 'http', | 
					
						
							|  |  |  |                     }) | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     f = { | 
					
						
							|  |  |  |                         'format_id': 'http-%s' % format_id, | 
					
						
							|  |  |  |                         'height': int_or_none(format_id), | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 f['url'] = format_url | 
					
						
							|  |  |  |                 formats.append(f) | 
					
						
							| 
									
										
										
										
											2015-09-26 15:30:02 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-07 22:16:23 +06:00
										 |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							|  |  |  |             'description': description, | 
					
						
							|  |  |  |             'thumbnail': thumbnail, | 
					
						
							|  |  |  |             'duration': duration, | 
					
						
							|  |  |  |             'view_count': view_count, | 
					
						
							|  |  |  |             'age_limit': age_limit, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							|  |  |  |         } |