| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  | # coding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import json | 
					
						
							| 
									
										
										
										
											2016-06-09 04:00:47 +07:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							|  |  |  |     ExtractorError, | 
					
						
							|  |  |  |     parse_iso8601, | 
					
						
							| 
									
										
										
										
											2015-11-21 22:18:17 +06:00
										 |  |  |     sanitized_Request, | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class VesselIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2016-10-30 04:14:51 -07:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z-_]+)' | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  |     _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s' | 
					
						
							|  |  |  |     _LOGIN_URL = 'https://www.vessel.com/api/account/login' | 
					
						
							|  |  |  |     _NETRC_MACHINE = 'vessel' | 
					
						
							| 
									
										
										
										
											2016-06-09 04:00:47 +07:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  |         'url': 'https://www.vessel.com/videos/HDN7G5UMs', | 
					
						
							|  |  |  |         'md5': '455cdf8beb71c6dd797fd2f3818d05c4', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': 'HDN7G5UMs', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?', | 
					
						
							| 
									
										
										
										
											2017-01-02 20:08:07 +08:00
										 |  |  |             'thumbnail': r're:^https?://.*\.jpg$', | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  |             'upload_date': '20150317', | 
					
						
							|  |  |  |             'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?', | 
					
						
							|  |  |  |             'timestamp': int, | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2016-06-09 04:00:47 +07:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2016-10-30 18:17:15 +07:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://www.vessel.com/videos/F01_dsLj1', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://www.vessel.com/videos/RRX-sir-J', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2016-06-09 04:00:47 +07:00
										 |  |  |     }] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def _extract_urls(webpage): | 
					
						
							|  |  |  |         return [url for _, url in re.findall( | 
					
						
							| 
									
										
										
										
											2016-10-30 04:14:51 -07:00
										 |  |  |             r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z-_]+.*?)\1', | 
					
						
							| 
									
										
										
										
											2016-06-09 04:00:47 +07:00
										 |  |  |             webpage)] | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def make_json_request(url, data): | 
					
						
							|  |  |  |         payload = json.dumps(data).encode('utf-8') | 
					
						
							| 
									
										
										
										
											2015-11-21 22:18:17 +06:00
										 |  |  |         req = sanitized_Request(url, payload) | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  |         req.add_header('Content-Type', 'application/json; charset=utf-8') | 
					
						
							|  |  |  |         return req | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							| 
									
										
										
										
											2015-05-07 21:58:03 +03:00
										 |  |  |     def find_assets(data, asset_type, asset_id=None): | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  |         for asset in data.get('assets', []): | 
					
						
							| 
									
										
										
										
											2015-05-07 21:58:03 +03:00
										 |  |  |             if not asset.get('type') == asset_type: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             elif asset_id is not None and not asset.get('id') == asset_id: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  |                 yield asset | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _check_access_rights(self, data): | 
					
						
							|  |  |  |         access_info = data.get('__view', {}) | 
					
						
							| 
									
										
										
										
											2015-03-26 19:51:40 +01:00
										 |  |  |         if not access_info.get('allow_access', True): | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  |             err_code = access_info.get('error_code') or '' | 
					
						
							|  |  |  |             if err_code == 'ITEM_PAID_ONLY': | 
					
						
							|  |  |  |                 raise ExtractorError( | 
					
						
							|  |  |  |                     'This video requires subscription.', expected=True) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 raise ExtractorError( | 
					
						
							|  |  |  |                     'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _login(self): | 
					
						
							| 
									
										
										
										
											2018-05-26 16:12:44 +01:00
										 |  |  |         username, password = self._get_login_info() | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  |         if username is None: | 
					
						
							|  |  |  |             return | 
					
						
							|  |  |  |         self.report_login() | 
					
						
							|  |  |  |         data = { | 
					
						
							|  |  |  |             'client_id': 'web', | 
					
						
							|  |  |  |             'type': 'password', | 
					
						
							|  |  |  |             'user_key': username, | 
					
						
							|  |  |  |             'password': password, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         login_request = VesselIE.make_json_request(self._LOGIN_URL, data) | 
					
						
							|  |  |  |         self._download_webpage(login_request, None, False, 'Wrong login info') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_initialize(self): | 
					
						
							|  |  |  |         self._login() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         video_id = self._match_id(url) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         webpage = self._download_webpage(url, video_id) | 
					
						
							|  |  |  |         data = self._parse_json(self._search_regex( | 
					
						
							|  |  |  |             r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id) | 
					
						
							|  |  |  |         asset_id = data['model']['data']['id'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         req = VesselIE.make_json_request( | 
					
						
							|  |  |  |             self._API_URL_TEMPLATE % asset_id, {'client': 'web'}) | 
					
						
							|  |  |  |         data = self._download_json(req, video_id) | 
					
						
							| 
									
										
										
										
											2015-05-07 21:58:03 +03:00
										 |  |  |         video_asset_id = data.get('main_video_asset') | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self._check_access_rights(data) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         try: | 
					
						
							| 
									
										
										
										
											2015-05-07 21:58:03 +03:00
										 |  |  |             video_asset = next( | 
					
						
							|  |  |  |                 VesselIE.find_assets(data, 'video', asset_id=video_asset_id)) | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  |         except StopIteration: | 
					
						
							|  |  |  |             raise ExtractorError('No video assets found') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         formats = [] | 
					
						
							|  |  |  |         for f in video_asset.get('sources', []): | 
					
						
							| 
									
										
										
										
											2016-06-09 04:00:47 +07:00
										 |  |  |             location = f.get('location') | 
					
						
							|  |  |  |             if not location: | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2016-06-09 04:12:48 +07:00
										 |  |  |             name = f.get('name') | 
					
						
							|  |  |  |             if name == 'hls-index': | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  |                 formats.extend(self._extract_m3u8_formats( | 
					
						
							| 
									
										
										
										
											2016-06-09 04:09:32 +07:00
										 |  |  |                     location, video_id, ext='mp4', | 
					
						
							| 
									
										
										
										
											2016-06-09 04:13:38 +07:00
										 |  |  |                     entry_protocol='m3u8_native', m3u8_id='m3u8', fatal=False)) | 
					
						
							| 
									
										
										
										
											2016-06-09 04:12:48 +07:00
										 |  |  |             elif name == 'dash-index': | 
					
						
							|  |  |  |                 formats.extend(self._extract_mpd_formats( | 
					
						
							|  |  |  |                     location, video_id, mpd_id='dash', fatal=False)) | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 formats.append({ | 
					
						
							| 
									
										
										
										
											2016-06-09 04:12:48 +07:00
										 |  |  |                     'format_id': name, | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  |                     'tbr': f.get('bitrate'), | 
					
						
							|  |  |  |                     'height': f.get('height'), | 
					
						
							|  |  |  |                     'width': f.get('width'), | 
					
						
							| 
									
										
										
										
											2016-06-09 04:00:47 +07:00
										 |  |  |                     'url': location, | 
					
						
							| 
									
										
										
										
											2015-03-26 19:48:22 +02:00
										 |  |  |                 }) | 
					
						
							|  |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         thumbnails = [] | 
					
						
							|  |  |  |         for im_asset in VesselIE.find_assets(data, 'image'): | 
					
						
							|  |  |  |             thumbnails.append({ | 
					
						
							|  |  |  |                 'url': im_asset['location'], | 
					
						
							|  |  |  |                 'width': im_asset.get('width', 0), | 
					
						
							|  |  |  |                 'height': im_asset.get('height', 0), | 
					
						
							|  |  |  |             }) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': data['title'], | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							|  |  |  |             'thumbnails': thumbnails, | 
					
						
							|  |  |  |             'description': data.get('short_description'), | 
					
						
							|  |  |  |             'duration': data.get('duration'), | 
					
						
							|  |  |  |             'comment_count': data.get('comment_count'), | 
					
						
							|  |  |  |             'like_count': data.get('like_count'), | 
					
						
							|  |  |  |             'view_count': data.get('view_count'), | 
					
						
							|  |  |  |             'timestamp': parse_iso8601(data.get('released_at')), | 
					
						
							|  |  |  |         } |