| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  | # coding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							| 
									
										
										
										
											2015-11-21 01:34:02 +06:00
										 |  |  | import base64 | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2015-11-21 01:34:02 +06:00
										 |  |  | from ..compat import ( | 
					
						
							|  |  |  |     compat_urlparse, | 
					
						
							| 
									
										
										
										
											2016-03-11 16:52:07 +01:00
										 |  |  |     compat_parse_qs, | 
					
						
							| 
									
										
										
										
											2015-11-21 01:34:02 +06:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2015-11-21 01:39:29 +06:00
										 |  |  |     clean_html, | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |     ExtractorError, | 
					
						
							|  |  |  |     int_or_none, | 
					
						
							| 
									
										
										
										
											2015-11-21 01:34:02 +06:00
										 |  |  |     unsmuggle_url, | 
					
						
							| 
									
										
										
										
											2016-07-04 17:57:44 +01:00
										 |  |  |     smuggle_url, | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class KalturaIE(InfoExtractor): | 
					
						
							|  |  |  |     _VALID_URL = r'''(?x)
 | 
					
						
							| 
									
										
										
										
											2015-08-29 20:21:59 +06:00
										 |  |  |                 (?: | 
					
						
							| 
									
										
										
										
											2016-03-11 16:52:07 +01:00
										 |  |  |                     kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)| | 
					
						
							| 
									
										
										
										
											2015-08-29 20:21:59 +06:00
										 |  |  |                     https?:// | 
					
						
							| 
									
										
										
										
											2017-02-10 01:24:14 +07:00
										 |  |  |                         (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/ | 
					
						
							| 
									
										
										
										
											2015-08-29 20:21:59 +06:00
										 |  |  |                         (?: | 
					
						
							|  |  |  |                             (?: | 
					
						
							|  |  |  |                                 # flash player | 
					
						
							| 
									
										
										
										
											2017-02-09 16:24:54 +01:00
										 |  |  |                                 index\.php/(?:kwidget|extwidget/preview)| | 
					
						
							| 
									
										
										
										
											2015-08-29 20:23:06 +06:00
										 |  |  |                                 # html5 player | 
					
						
							| 
									
										
										
										
											2016-03-11 16:52:07 +01:00
										 |  |  |                                 html5/html5lib/[^/]+/mwEmbedFrame\.php | 
					
						
							| 
									
										
										
										
											2015-08-29 20:26:51 +06:00
										 |  |  |                             ) | 
					
						
							| 
									
										
										
										
											2016-03-11 16:52:07 +01:00
										 |  |  |                         )(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))? | 
					
						
							| 
									
										
										
										
											2015-08-29 20:21:59 +06:00
										 |  |  |                 ) | 
					
						
							|  |  |  |                 '''
 | 
					
						
							| 
									
										
										
										
											2016-07-04 17:57:44 +01:00
										 |  |  |     _SERVICE_URL = 'http://cdnapi.kaltura.com' | 
					
						
							|  |  |  |     _SERVICE_BASE = '/api_v3/index.php' | 
					
						
							| 
									
										
										
										
											2016-08-21 21:01:01 +07:00
										 |  |  |     # See https://github.com/kaltura/server/blob/master/plugins/content/caption/base/lib/model/enums/CaptionType.php | 
					
						
							|  |  |  |     _CAPTION_TYPES = { | 
					
						
							|  |  |  |         1: 'srt', | 
					
						
							|  |  |  |         2: 'ttml', | 
					
						
							|  |  |  |         3: 'vtt', | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |     _TESTS = [ | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             'url': 'kaltura:269692:1_1jc2y3e4', | 
					
						
							|  |  |  |             'md5': '3adcbdb3dcc02d647539e53f284ba171', | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': '1_1jc2y3e4', | 
					
						
							|  |  |  |                 'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2015-12-11 23:40:12 +01:00
										 |  |  |                 'title': 'Straight from the Heart', | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |                 'upload_date': '20131219', | 
					
						
							|  |  |  |                 'uploader_id': 'mlundberg@wolfgangsvault.com', | 
					
						
							|  |  |  |                 'description': 'The Allman Brothers Band, 12/16/1981', | 
					
						
							|  |  |  |                 'thumbnail': 're:^https?://.*/thumbnail/.*', | 
					
						
							|  |  |  |                 'timestamp': int, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4', | 
					
						
							|  |  |  |             'only_matching': True, | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-02-28 00:19:31 +06:00
										 |  |  |         { | 
					
						
							|  |  |  |             'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3', | 
					
						
							|  |  |  |             'only_matching': True, | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-08-29 02:12:15 -04:00
										 |  |  |         { | 
					
						
							|  |  |  |             'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342', | 
					
						
							|  |  |  |             'only_matching': True, | 
					
						
							| 
									
										
										
										
											2016-08-04 09:38:37 +01:00
										 |  |  |         }, | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             # video with subtitles | 
					
						
							|  |  |  |             'url': 'kaltura:111032:1_cw786r8q', | 
					
						
							|  |  |  |             'only_matching': True, | 
					
						
							| 
									
										
										
										
											2016-08-21 08:37:01 +07:00
										 |  |  |         }, | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             # video with ttml subtitles (no fileExt) | 
					
						
							|  |  |  |             'url': 'kaltura:1926081:0_l5ye1133', | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': '0_l5ye1133', | 
					
						
							|  |  |  |                 'ext': 'mp4', | 
					
						
							|  |  |  |                 'title': 'What Can You Do With Python?', | 
					
						
							|  |  |  |                 'upload_date': '20160221', | 
					
						
							|  |  |  |                 'uploader_id': 'stork', | 
					
						
							|  |  |  |                 'thumbnail': 're:^https?://.*/thumbnail/.*', | 
					
						
							|  |  |  |                 'timestamp': int, | 
					
						
							|  |  |  |                 'subtitles': { | 
					
						
							|  |  |  |                     'en': [{ | 
					
						
							|  |  |  |                         'ext': 'ttml', | 
					
						
							|  |  |  |                     }], | 
					
						
							|  |  |  |                 }, | 
					
						
							|  |  |  |             }, | 
					
						
							| 
									
										
										
										
											2017-04-08 05:40:57 -04:00
										 |  |  |             'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/', | 
					
						
							| 
									
										
										
										
											2016-08-21 08:37:01 +07:00
										 |  |  |             'params': { | 
					
						
							|  |  |  |                 'skip_download': True, | 
					
						
							|  |  |  |             }, | 
					
						
							| 
									
										
										
										
											2017-02-09 16:24:54 +01:00
										 |  |  |         }, | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto', | 
					
						
							|  |  |  |             'only_matching': True, | 
					
						
							| 
									
										
										
										
											2017-02-10 01:24:14 +07:00
										 |  |  |         }, | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto', | 
					
						
							|  |  |  |             'only_matching': True, | 
					
						
							| 
									
										
										
										
											2015-08-29 02:12:15 -04:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |     ] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-27 22:44:17 +07:00
										 |  |  |     @staticmethod | 
					
						
							|  |  |  |     def _extract_url(webpage): | 
					
						
							| 
									
										
										
										
											2017-04-08 16:48:27 +07:00
										 |  |  |         # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site | 
					
						
							| 
									
										
										
										
											2016-06-27 22:44:17 +07:00
										 |  |  |         mobj = ( | 
					
						
							|  |  |  |             re.search( | 
					
						
							|  |  |  |                 r"""(?xs)
 | 
					
						
							|  |  |  |                     kWidget\.(?:thumb)?[Ee]mbed\( | 
					
						
							|  |  |  |                     \{.*? | 
					
						
							| 
									
										
										
										
											2017-04-08 16:48:27 +07:00
										 |  |  |                         (?P<q1>['"])wid(?P=q1)\s*:\s* | 
					
						
							|  |  |  |                         (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*? | 
					
						
							|  |  |  |                         (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s* | 
					
						
							|  |  |  |                         (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) | 
					
						
							| 
									
										
										
										
											2016-06-27 22:44:17 +07:00
										 |  |  |                 """, webpage) or
 | 
					
						
							|  |  |  |             re.search( | 
					
						
							|  |  |  |                 r'''(?xs)
 | 
					
						
							| 
									
										
										
										
											2017-04-08 16:48:27 +07:00
										 |  |  |                     (?P<q1>["']) | 
					
						
							| 
									
										
										
										
											2017-02-10 01:24:14 +07:00
										 |  |  |                         (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)* | 
					
						
							| 
									
										
										
										
											2016-06-27 22:44:17 +07:00
										 |  |  |                     (?P=q1).*? | 
					
						
							|  |  |  |                     (?: | 
					
						
							| 
									
										
										
										
											2017-12-23 21:16:32 +07:00
										 |  |  |                         (?: | 
					
						
							|  |  |  |                             entry_?[Ii]d| | 
					
						
							|  |  |  |                             (?P<q2>["'])entry_?[Ii]d(?P=q2) | 
					
						
							|  |  |  |                         )\s*:\s*| | 
					
						
							| 
									
										
										
										
											2017-12-23 21:22:41 +07:00
										 |  |  |                         \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s* | 
					
						
							| 
									
										
										
										
											2017-12-23 21:16:32 +07:00
										 |  |  |                     ) | 
					
						
							| 
									
										
										
										
											2017-04-08 16:48:27 +07:00
										 |  |  |                     (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3) | 
					
						
							| 
									
										
										
										
											2017-04-08 05:40:57 -04:00
										 |  |  |                 ''', webpage) or
 | 
					
						
							|  |  |  |             re.search( | 
					
						
							|  |  |  |                 r'''(?xs)
 | 
					
						
							| 
									
										
										
										
											2017-04-08 16:48:27 +07:00
										 |  |  |                     <iframe[^>]+src=(?P<q1>["']) | 
					
						
							|  |  |  |                       (?:https?:)?//(?:www\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+) | 
					
						
							| 
									
										
										
										
											2017-04-08 05:40:57 -04:00
										 |  |  |                       (?:(?!(?P=q1)).)* | 
					
						
							| 
									
										
										
										
											2017-04-08 16:48:27 +07:00
										 |  |  |                       [?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+) | 
					
						
							| 
									
										
										
										
											2017-04-08 05:40:57 -04:00
										 |  |  |                     (?P=q1) | 
					
						
							|  |  |  |                 ''', webpage)
 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2016-06-27 22:44:17 +07:00
										 |  |  |         if mobj: | 
					
						
							| 
									
										
										
										
											2016-07-04 17:57:44 +01:00
										 |  |  |             embed_info = mobj.groupdict() | 
					
						
							|  |  |  |             url = 'kaltura:%(partner_id)s:%(id)s' % embed_info | 
					
						
							| 
									
										
										
										
											2016-07-04 21:34:27 +01:00
										 |  |  |             escaped_pid = re.escape(embed_info['partner_id']) | 
					
						
							| 
									
										
										
										
											2016-07-04 17:57:44 +01:00
										 |  |  |             service_url = re.search( | 
					
						
							| 
									
										
										
										
											2016-07-04 21:34:27 +01:00
										 |  |  |                 r'<script[^>]+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid), | 
					
						
							| 
									
										
										
										
											2016-07-04 17:57:44 +01:00
										 |  |  |                 webpage) | 
					
						
							|  |  |  |             if service_url: | 
					
						
							|  |  |  |                 url = smuggle_url(url, {'service_url': service_url.group(1)}) | 
					
						
							|  |  |  |             return url | 
					
						
							| 
									
										
										
										
											2016-06-27 22:44:17 +07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-04 21:34:27 +01:00
										 |  |  |     def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |         params = actions[0] | 
					
						
							|  |  |  |         if len(actions) > 1: | 
					
						
							|  |  |  |             for i, a in enumerate(actions[1:], start=1): | 
					
						
							|  |  |  |                 for k, v in a.items(): | 
					
						
							|  |  |  |                     params['%d:%s' % (i, k)] = v | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-04 17:57:44 +01:00
										 |  |  |         data = self._download_json( | 
					
						
							| 
									
										
										
										
											2016-07-04 21:34:27 +01:00
										 |  |  |             (service_url or self._SERVICE_URL) + self._SERVICE_BASE, | 
					
						
							| 
									
										
										
										
											2016-07-04 17:57:44 +01:00
										 |  |  |             video_id, query=params, *args, **kwargs) | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         status = data if len(actions) == 1 else data[0] | 
					
						
							|  |  |  |         if status.get('objectType') == 'KalturaAPIException': | 
					
						
							|  |  |  |             raise ExtractorError( | 
					
						
							|  |  |  |                 '%s said: %s' % (self.IE_NAME, status['message'])) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return data | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-04 21:34:27 +01:00
										 |  |  |     def _get_video_info(self, video_id, partner_id, service_url=None): | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |         actions = [ | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 'action': 'null', | 
					
						
							|  |  |  |                 'apiVersion': '3.1.5', | 
					
						
							|  |  |  |                 'clientTag': 'kdp:v3.8.5', | 
					
						
							|  |  |  |                 'format': 1,  # JSON, 2 = XML, 3 = PHP | 
					
						
							|  |  |  |                 'service': 'multirequest', | 
					
						
							| 
									
										
										
										
											2016-08-04 09:38:37 +01:00
										 |  |  |             }, | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 'expiry': 86400, | 
					
						
							|  |  |  |                 'service': 'session', | 
					
						
							|  |  |  |                 'action': 'startWidgetSession', | 
					
						
							|  |  |  |                 'widgetId': '_%s' % partner_id, | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |             }, | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 'action': 'get', | 
					
						
							|  |  |  |                 'entryId': video_id, | 
					
						
							|  |  |  |                 'service': 'baseentry', | 
					
						
							| 
									
										
										
										
											2016-08-04 09:38:37 +01:00
										 |  |  |                 'ks': '{1:result:ks}', | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |             }, | 
					
						
							|  |  |  |             { | 
					
						
							| 
									
										
										
										
											2015-12-11 23:40:12 +01:00
										 |  |  |                 'action': 'getbyentryid', | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |                 'entryId': video_id, | 
					
						
							| 
									
										
										
										
											2015-12-11 23:40:12 +01:00
										 |  |  |                 'service': 'flavorAsset', | 
					
						
							| 
									
										
										
										
											2016-08-04 09:38:37 +01:00
										 |  |  |                 'ks': '{1:result:ks}', | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 'action': 'list', | 
					
						
							|  |  |  |                 'filter:entryIdEqual': video_id, | 
					
						
							|  |  |  |                 'service': 'caption_captionasset', | 
					
						
							|  |  |  |                 'ks': '{1:result:ks}', | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |             }, | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |         return self._kaltura_api_call( | 
					
						
							| 
									
										
										
										
											2016-07-04 21:34:27 +01:00
										 |  |  |             video_id, actions, service_url, note='Downloading video info JSON') | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2015-11-21 01:34:02 +06:00
										 |  |  |         url, smuggled_data = unsmuggle_url(url, {}) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							| 
									
										
										
										
											2016-03-11 16:52:07 +01:00
										 |  |  |         partner_id, entry_id = mobj.group('partner_id', 'id') | 
					
						
							| 
									
										
										
										
											2016-03-13 02:01:10 +06:00
										 |  |  |         ks = None | 
					
						
							| 
									
										
										
										
											2016-08-04 09:38:37 +01:00
										 |  |  |         captions = None | 
					
						
							| 
									
										
										
										
											2016-03-11 16:52:07 +01:00
										 |  |  |         if partner_id and entry_id: | 
					
						
							| 
									
										
										
										
											2016-08-04 09:38:37 +01:00
										 |  |  |             _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url')) | 
					
						
							| 
									
										
										
										
											2016-03-11 16:52:07 +01:00
										 |  |  |         else: | 
					
						
							|  |  |  |             path, query = mobj.group('path', 'query') | 
					
						
							|  |  |  |             if not path and not query: | 
					
						
							|  |  |  |                 raise ExtractorError('Invalid URL', expected=True) | 
					
						
							|  |  |  |             params = {} | 
					
						
							|  |  |  |             if query: | 
					
						
							|  |  |  |                 params = compat_parse_qs(query) | 
					
						
							|  |  |  |             if path: | 
					
						
							|  |  |  |                 splitted_path = path.split('/') | 
					
						
							| 
									
										
										
										
											2016-03-12 18:43:45 +01:00
										 |  |  |                 params.update(dict((zip(splitted_path[::2], [[v] for v in splitted_path[1::2]])))) | 
					
						
							| 
									
										
										
										
											2016-03-11 16:52:07 +01:00
										 |  |  |             if 'wid' in params: | 
					
						
							|  |  |  |                 partner_id = params['wid'][0][1:] | 
					
						
							|  |  |  |             elif 'p' in params: | 
					
						
							|  |  |  |                 partner_id = params['p'][0] | 
					
						
							| 
									
										
										
										
											2017-02-09 16:24:54 +01:00
										 |  |  |             elif 'partner_id' in params: | 
					
						
							|  |  |  |                 partner_id = params['partner_id'][0] | 
					
						
							| 
									
										
										
										
											2016-03-11 16:52:07 +01:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 raise ExtractorError('Invalid URL', expected=True) | 
					
						
							|  |  |  |             if 'entry_id' in params: | 
					
						
							|  |  |  |                 entry_id = params['entry_id'][0] | 
					
						
							| 
									
										
										
										
											2016-08-04 09:38:37 +01:00
										 |  |  |                 _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id) | 
					
						
							| 
									
										
										
										
											2016-03-11 16:52:07 +01:00
										 |  |  |             elif 'uiconf_id' in params and 'flashvars[referenceId]' in params: | 
					
						
							|  |  |  |                 reference_id = params['flashvars[referenceId]'][0] | 
					
						
							|  |  |  |                 webpage = self._download_webpage(url, reference_id) | 
					
						
							|  |  |  |                 entry_data = self._parse_json(self._search_regex( | 
					
						
							|  |  |  |                     r'window\.kalturaIframePackageData\s*=\s*({.*});', | 
					
						
							|  |  |  |                     webpage, 'kalturaIframePackageData'), | 
					
						
							|  |  |  |                     reference_id)['entryResult'] | 
					
						
							|  |  |  |                 info, flavor_assets = entry_data['meta'], entry_data['contextData']['flavorAssets'] | 
					
						
							|  |  |  |                 entry_id = info['id'] | 
					
						
							| 
									
										
										
										
											2016-08-21 08:26:45 +07:00
										 |  |  |                 # Unfortunately, data returned in kalturaIframePackageData lacks | 
					
						
							|  |  |  |                 # captions so we will try requesting the complete data using | 
					
						
							|  |  |  |                 # regular approach since we now know the entry_id | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     _, info, flavor_assets, captions = self._get_video_info( | 
					
						
							|  |  |  |                         entry_id, partner_id) | 
					
						
							|  |  |  |                 except ExtractorError: | 
					
						
							|  |  |  |                     # Regular scenario failed but we already have everything | 
					
						
							|  |  |  |                     # extracted apart from captions and can process at least | 
					
						
							|  |  |  |                     # with this | 
					
						
							|  |  |  |                     pass | 
					
						
							| 
									
										
										
										
											2016-03-11 16:52:07 +01:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 raise ExtractorError('Invalid URL', expected=True) | 
					
						
							| 
									
										
										
										
											2016-03-13 02:01:10 +06:00
										 |  |  |             ks = params.get('flashvars[ks]', [None])[0] | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-21 01:34:02 +06:00
										 |  |  |         source_url = smuggled_data.get('source_url') | 
					
						
							|  |  |  |         if source_url: | 
					
						
							|  |  |  |             referrer = base64.b64encode( | 
					
						
							|  |  |  |                 '://'.join(compat_urlparse.urlparse(source_url)[:2]) | 
					
						
							|  |  |  |                 .encode('utf-8')).decode('utf-8') | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             referrer = None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-13 02:01:10 +06:00
										 |  |  |         def sign_url(unsigned_url): | 
					
						
							|  |  |  |             if ks: | 
					
						
							|  |  |  |                 unsigned_url += '/ks/%s' % ks | 
					
						
							|  |  |  |             if referrer: | 
					
						
							|  |  |  |                 unsigned_url += '?referrer=%s' % referrer | 
					
						
							|  |  |  |             return unsigned_url | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-04 17:57:44 +01:00
										 |  |  |         data_url = info['dataUrl'] | 
					
						
							|  |  |  |         if '/flvclipper/' in data_url: | 
					
						
							|  |  |  |             data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-21 01:34:02 +06:00
										 |  |  |         formats = [] | 
					
						
							| 
									
										
										
										
											2015-12-11 23:40:12 +01:00
										 |  |  |         for f in flavor_assets: | 
					
						
							|  |  |  |             # Continue if asset is not ready | 
					
						
							| 
									
										
										
										
											2016-08-04 09:38:37 +01:00
										 |  |  |             if f.get('status') != 2: | 
					
						
							| 
									
										
										
										
											2015-12-11 23:40:12 +01:00
										 |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2016-09-12 22:33:00 +07:00
										 |  |  |             # Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g) | 
					
						
							|  |  |  |             # skip for now. | 
					
						
							|  |  |  |             if f.get('fileExt') == 'chun': | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2017-10-11 22:24:17 +02:00
										 |  |  |             # DRM-protected video, cannot be decrypted | 
					
						
							|  |  |  |             if f.get('fileExt') == 'wvm': | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2017-01-28 18:27:42 +07:00
										 |  |  |             if not f.get('fileExt'): | 
					
						
							| 
									
										
										
										
											2016-12-20 18:45:52 +08:00
										 |  |  |                 # QT indicates QuickTime; some videos have broken fileExt | 
					
						
							| 
									
										
										
										
											2017-01-28 18:27:42 +07:00
										 |  |  |                 if f.get('containerFormat') == 'qt': | 
					
						
							|  |  |  |                     f['fileExt'] = 'mov' | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     f['fileExt'] = 'mp4' | 
					
						
							| 
									
										
										
										
											2016-07-04 17:57:44 +01:00
										 |  |  |             video_url = sign_url( | 
					
						
							|  |  |  |                 '%s/flavorId/%s' % (data_url, f['id'])) | 
					
						
							| 
									
										
										
										
											2016-09-12 22:43:45 +07:00
										 |  |  |             # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g | 
					
						
							|  |  |  |             # -f mp4-56) | 
					
						
							|  |  |  |             vcodec = 'none' if 'videoCodecId' not in f and f.get( | 
					
						
							|  |  |  |                 'frameRate') == 0 else f.get('videoCodecId') | 
					
						
							| 
									
										
										
										
											2015-11-21 01:34:02 +06:00
										 |  |  |             formats.append({ | 
					
						
							|  |  |  |                 'format_id': '%(fileExt)s-%(bitrate)s' % f, | 
					
						
							| 
									
										
										
										
											2015-11-21 01:38:08 +06:00
										 |  |  |                 'ext': f.get('fileExt'), | 
					
						
							|  |  |  |                 'tbr': int_or_none(f['bitrate']), | 
					
						
							|  |  |  |                 'fps': int_or_none(f.get('frameRate')), | 
					
						
							| 
									
										
										
										
											2015-11-21 01:34:02 +06:00
										 |  |  |                 'filesize_approx': int_or_none(f.get('size'), invscale=1024), | 
					
						
							|  |  |  |                 'container': f.get('containerFormat'), | 
					
						
							| 
									
										
										
										
											2016-09-12 22:43:45 +07:00
										 |  |  |                 'vcodec': vcodec, | 
					
						
							| 
									
										
										
										
											2015-11-21 01:38:08 +06:00
										 |  |  |                 'height': int_or_none(f.get('height')), | 
					
						
							|  |  |  |                 'width': int_or_none(f.get('width')), | 
					
						
							| 
									
										
										
										
											2015-11-21 01:34:02 +06:00
										 |  |  |                 'url': video_url, | 
					
						
							|  |  |  |             }) | 
					
						
							| 
									
										
										
										
											2016-07-04 17:57:44 +01:00
										 |  |  |         if '/playManifest/' in data_url: | 
					
						
							|  |  |  |             m3u8_url = sign_url(data_url.replace( | 
					
						
							|  |  |  |                 'format/url', 'format/applehttp')) | 
					
						
							|  |  |  |             formats.extend(self._extract_m3u8_formats( | 
					
						
							|  |  |  |                 m3u8_url, entry_id, 'mp4', 'm3u8_native', | 
					
						
							|  |  |  |                 m3u8_id='hls', fatal=False)) | 
					
						
							| 
									
										
										
										
											2015-12-11 23:40:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-08-04 09:38:37 +01:00
										 |  |  |         subtitles = {} | 
					
						
							|  |  |  |         if captions: | 
					
						
							|  |  |  |             for caption in captions.get('objects', []): | 
					
						
							|  |  |  |                 # Continue if caption is not ready | 
					
						
							| 
									
										
										
										
											2017-07-05 23:20:50 +07:00
										 |  |  |                 if caption.get('status') != 2: | 
					
						
							| 
									
										
										
										
											2016-08-04 09:38:37 +01:00
										 |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2016-08-21 21:01:01 +07:00
										 |  |  |                 if not caption.get('id'): | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 caption_format = int_or_none(caption.get('format')) | 
					
						
							| 
									
										
										
										
											2016-08-04 09:38:37 +01:00
										 |  |  |                 subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({ | 
					
						
							|  |  |  |                     'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']), | 
					
						
							| 
									
										
										
										
											2016-08-21 21:01:01 +07:00
										 |  |  |                     'ext': caption.get('fileExt') or self._CAPTION_TYPES.get(caption_format) or 'ttml', | 
					
						
							| 
									
										
										
										
											2016-08-04 09:38:37 +01:00
										 |  |  |                 }) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |         return { | 
					
						
							| 
									
										
										
										
											2015-08-29 20:21:59 +06:00
										 |  |  |             'id': entry_id, | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |             'title': info['name'], | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							| 
									
										
										
										
											2016-08-04 09:38:37 +01:00
										 |  |  |             'subtitles': subtitles, | 
					
						
							| 
									
										
										
										
											2015-11-21 01:39:29 +06:00
										 |  |  |             'description': clean_html(info.get('description')), | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |             'thumbnail': info.get('thumbnailUrl'), | 
					
						
							|  |  |  |             'duration': info.get('duration'), | 
					
						
							|  |  |  |             'timestamp': info.get('createdAt'), | 
					
						
							| 
									
										
										
										
											2017-01-28 17:50:56 +07:00
										 |  |  |             'uploader_id': info.get('userId') if info.get('userId') != 'None' else None, | 
					
						
							| 
									
										
										
										
											2015-02-26 23:45:54 +02:00
										 |  |  |             'view_count': info.get('plays'), | 
					
						
							|  |  |  |         } |