| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | # coding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import json | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  | from ..compat import compat_HTTPError | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |     clean_html, | 
					
						
							|  |  |  |     ExtractorError, | 
					
						
							|  |  |  |     int_or_none, | 
					
						
							|  |  |  |     parse_age_limit, | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  |     sanitized_Request, | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |     try_get, | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  | class HRTiBaseIE(InfoExtractor): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |         Base Information Extractor for Croatian Radiotelevision | 
					
						
							|  |  |  |         video on demand site https://hrti.hrt.hr | 
					
						
							|  |  |  |         Reverse engineered from the JavaScript app in app.min.js | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  |     _NETRC_MACHINE = 'hrti' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |     _APP_LANGUAGE = 'hr' | 
					
						
							|  |  |  |     _APP_VERSION = '1.1' | 
					
						
							|  |  |  |     _APP_PUBLICATION_ID = 'all_in_one' | 
					
						
							|  |  |  |     _API_URL = 'http://clientapi.hrt.hr/client_api.php/config/identify/format/json' | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _initialize_api(self): | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |         init_data = { | 
					
						
							|  |  |  |             'application_publication_id': self._APP_PUBLICATION_ID | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         uuid = self._download_json( | 
					
						
							|  |  |  |             self._API_URL, None, note='Downloading uuid', | 
					
						
							|  |  |  |             errnote='Unable to download uuid', | 
					
						
							|  |  |  |             data=json.dumps(init_data).encode('utf-8'))['uuid'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         app_data = { | 
					
						
							|  |  |  |             'uuid': uuid, | 
					
						
							|  |  |  |             'application_publication_id': self._APP_PUBLICATION_ID, | 
					
						
							|  |  |  |             'application_version': self._APP_VERSION | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         req = sanitized_Request(self._API_URL, data=json.dumps(app_data).encode('utf-8')) | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  |         req.get_method = lambda: 'PUT' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         resources = self._download_json( | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |             req, None, note='Downloading session information', | 
					
						
							|  |  |  |             errnote='Unable to download session information') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self._session_id = resources['session_id'] | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         modules = resources['modules'] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |         self._search_url = modules['vod_catalog']['resources']['search']['uri'].format( | 
					
						
							|  |  |  |             language=self._APP_LANGUAGE, | 
					
						
							|  |  |  |             application_id=self._APP_PUBLICATION_ID) | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |         self._login_url = (modules['user']['resources']['login']['uri'] + | 
					
						
							|  |  |  |                            '/format/json').format(session_id=self._session_id) | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |         self._logout_url = modules['user']['resources']['logout']['uri'] | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _login(self): | 
					
						
							|  |  |  |         (username, password) = self._get_login_info() | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |         # TODO: figure out authentication with cookies | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  |         if username is None or password is None: | 
					
						
							|  |  |  |             self.raise_login_required() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |         auth_data = { | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  |             'username': username, | 
					
						
							|  |  |  |             'password': password, | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  |         try: | 
					
						
							|  |  |  |             auth_info = self._download_json( | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |                 self._login_url, None, note='Logging in', errnote='Unable to log in', | 
					
						
							|  |  |  |                 data=json.dumps(auth_data).encode('utf-8')) | 
					
						
							|  |  |  |         except ExtractorError as e: | 
					
						
							|  |  |  |             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 406: | 
					
						
							|  |  |  |                 auth_info = self._parse_json(e.cause.read().encode('utf-8'), None) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 raise | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         error_message = auth_info.get('error', {}).get('message') | 
					
						
							|  |  |  |         if error_message: | 
					
						
							|  |  |  |             raise ExtractorError( | 
					
						
							|  |  |  |                 '%s said: %s' % (self.IE_NAME, error_message), | 
					
						
							|  |  |  |                 expected=True) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self._token = auth_info['secure_streaming_token'] | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_initialize(self): | 
					
						
							|  |  |  |         self._initialize_api() | 
					
						
							|  |  |  |         self._login() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  | class HRTiIE(HRTiBaseIE): | 
					
						
							|  |  |  |     _VALID_URL = r'''(?x)
 | 
					
						
							|  |  |  |                         (?: | 
					
						
							|  |  |  |                             hrti:(?P<short_id>[0-9]+)| | 
					
						
							|  |  |  |                             https?:// | 
					
						
							|  |  |  |                                 hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)? | 
					
						
							|  |  |  |                         ) | 
					
						
							|  |  |  |                     '''
 | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         'url': 'https://hrti.hrt.hr/#/video/show/2181385/republika-dokumentarna-serija-16-hd', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '2181385', | 
					
						
							|  |  |  |             'display_id': 'republika-dokumentarna-serija-16-hd', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'REPUBLIKA, dokumentarna serija (1/6) (HD)', | 
					
						
							|  |  |  |             'description': 'md5:48af85f620e8e0e1df4096270568544f', | 
					
						
							|  |  |  |             'duration': 2922, | 
					
						
							|  |  |  |             'view_count': int, | 
					
						
							|  |  |  |             'average_rating': int, | 
					
						
							|  |  |  |             'episode_number': int, | 
					
						
							|  |  |  |             'season_number': int, | 
					
						
							|  |  |  |             'age_limit': 12, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'skip': 'Requires account credentials', | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://hrti.hrt.hr/#/video/show/2181385/', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'hrti:2181385', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         video_id = mobj.group('short_id') or mobj.group('id') | 
					
						
							|  |  |  |         display_id = mobj.group('display_id') or video_id | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |         video = self._download_json( | 
					
						
							|  |  |  |             '%s/video_id/%s/format/json' % (self._search_url, video_id), | 
					
						
							|  |  |  |             display_id, 'Downloading video metadata JSON')['video'][0] | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |         title_info = video['title'] | 
					
						
							|  |  |  |         title = title_info['title_long'] | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         movie = video['video_assets']['movie'][0] | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |         m3u8_url = movie['url'].format(TOKEN=self._token) | 
					
						
							|  |  |  |         formats = self._extract_m3u8_formats( | 
					
						
							|  |  |  |             m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native', | 
					
						
							|  |  |  |             m3u8_id='hls') | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |         description = clean_html(title_info.get('summary_long')) | 
					
						
							|  |  |  |         age_limit = parse_age_limit(video.get('parental_control', {}).get('rating')) | 
					
						
							|  |  |  |         view_count = int_or_none(video.get('views')) | 
					
						
							|  |  |  |         average_rating = int_or_none(video.get('user_rating')) | 
					
						
							|  |  |  |         duration = int_or_none(movie.get('duration')) | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |             'display_id': display_id, | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  |             'title': title, | 
					
						
							|  |  |  |             'description': description, | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  |             'duration': duration, | 
					
						
							|  |  |  |             'view_count': view_count, | 
					
						
							|  |  |  |             'average_rating': average_rating, | 
					
						
							|  |  |  |             'age_limit': age_limit, | 
					
						
							| 
									
										
										
										
											2016-05-13 15:52:52 +01:00
										 |  |  |             'formats': formats, | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class HRTiPlaylistIE(HRTiBaseIE): | 
					
						
							|  |  |  |     _VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?' | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '212', | 
					
						
							|  |  |  |             'title': 'ekumena', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist_mincount': 8, | 
					
						
							|  |  |  |         'skip': 'Requires account credentials', | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://hrti.hrt.hr/#/video/list/category/212/', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         category_id = mobj.group('id') | 
					
						
							|  |  |  |         display_id = mobj.group('display_id') or category_id | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         response = self._download_json( | 
					
						
							|  |  |  |             '%s/category_id/%s/format/json' % (self._search_url, category_id), | 
					
						
							|  |  |  |             display_id, 'Downloading video metadata JSON') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         video_ids = try_get( | 
					
						
							|  |  |  |             response, lambda x: x['video_listings'][0]['alternatives'][0]['list'], | 
					
						
							|  |  |  |             list) or [video['id'] for video in response.get('videos', []) if video.get('id')] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-02 23:02:14 +02:00
										 |  |  |         entries = [self.url_result('hrti:%s' % video_id) for video_id in video_ids] | 
					
						
							| 
									
										
										
										
											2016-07-03 02:22:14 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return self.playlist_result(entries, category_id, display_id) |