| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | # encoding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							|  |  |  |     ExtractorError, | 
					
						
							| 
									
										
										
										
											2015-11-21 22:18:17 +06:00
										 |  |  |     sanitized_Request, | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |     std_headers, | 
					
						
							| 
									
										
										
										
											2015-12-11 23:59:25 +08:00
										 |  |  |     urlencode_postdata, | 
					
						
							| 
									
										
										
										
											2016-03-11 16:57:06 +01:00
										 |  |  |     update_url_query, | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class SafariBaseIE(InfoExtractor): | 
					
						
							|  |  |  |     _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/' | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |     _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>' | 
					
						
							|  |  |  |     _NETRC_MACHINE = 'safari' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-13 02:03:07 +06:00
										 |  |  |     _API_BASE = 'https://www.safaribooksonline.com/api/v1' | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |     _API_FORMAT = 'json' | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     LOGGED_IN = False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_initialize(self): | 
					
						
							| 
									
										
										
										
											2016-03-13 02:08:36 +06:00
										 |  |  |         self._login() | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _login(self): | 
					
						
							| 
									
										
										
										
											2016-03-13 02:08:36 +06:00
										 |  |  |         # We only need to log in once for courses or individual videos | 
					
						
							|  |  |  |         if self.LOGGED_IN: | 
					
						
							|  |  |  |             return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |         (username, password) = self._get_login_info() | 
					
						
							|  |  |  |         if username is None: | 
					
						
							| 
									
										
										
										
											2016-03-13 02:03:07 +06:00
										 |  |  |             return | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-06 17:38:39 +08:00
										 |  |  |         headers = std_headers.copy() | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |         if 'Referer' not in headers: | 
					
						
							|  |  |  |             headers['Referer'] = self._LOGIN_URL | 
					
						
							| 
									
										
										
										
											2016-03-06 17:38:39 +08:00
										 |  |  |         login_page_request = sanitized_Request(self._LOGIN_URL, headers=headers) | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         login_page = self._download_webpage( | 
					
						
							| 
									
										
										
										
											2016-03-06 17:38:39 +08:00
										 |  |  |             login_page_request, None, | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |             'Downloading login form') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         csrf = self._html_search_regex( | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |             r"name='csrfmiddlewaretoken'\s+value='([^']+)'", | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |             login_page, 'csrf token') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         login_form = { | 
					
						
							|  |  |  |             'csrfmiddlewaretoken': csrf, | 
					
						
							|  |  |  |             'email': username, | 
					
						
							|  |  |  |             'password1': password, | 
					
						
							|  |  |  |             'login': 'Sign In', | 
					
						
							|  |  |  |             'next': '', | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-21 22:18:17 +06:00
										 |  |  |         request = sanitized_Request( | 
					
						
							| 
									
										
										
										
											2015-12-11 23:59:25 +08:00
										 |  |  |             self._LOGIN_URL, urlencode_postdata(login_form), headers=headers) | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |         login_page = self._download_webpage( | 
					
						
							|  |  |  |             request, None, 'Logging in as %s' % username) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |             raise ExtractorError( | 
					
						
							|  |  |  |                 'Login failed; make sure your credentials are correct and try again.', | 
					
						
							|  |  |  |                 expected=True) | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-13 02:08:36 +06:00
										 |  |  |         SafariBaseIE.LOGGED_IN = True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |         self.to_screen('Login successful') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class SafariIE(SafariBaseIE): | 
					
						
							|  |  |  |     IE_NAME = 'safari' | 
					
						
							|  |  |  |     IE_DESC = 'safaribooksonline.com online video' | 
					
						
							| 
									
										
										
										
											2016-08-01 21:48:48 +07:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?#&]+)\.html' | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  | 
 | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', | 
					
						
							| 
									
										
										
										
											2016-03-11 16:57:06 +01:00
										 |  |  |         'md5': 'dcc5a425e79f2564148652616af1f2a3', | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2016-03-11 16:57:06 +01:00
										 |  |  |             'id': '0_qbqx90ic', | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2016-03-11 16:57:06 +01:00
										 |  |  |             'title': 'Introduction to Hadoop Fundamentals LiveLessons', | 
					
						
							|  |  |  |             'timestamp': 1437758058, | 
					
						
							|  |  |  |             'upload_date': '20150724', | 
					
						
							|  |  |  |             'uploader_id': 'stork', | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-06-15 21:36:30 +06:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         # non-digits in course id | 
					
						
							|  |  |  |         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2016-08-01 21:48:48 +07:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |     }] | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							| 
									
										
										
										
											2016-03-19 22:30:48 +06:00
										 |  |  |         video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part')) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         webpage = self._download_webpage(url, video_id) | 
					
						
							|  |  |  |         reference_id = self._search_regex( | 
					
						
							|  |  |  |             r'data-reference-id=(["\'])(?P<id>.+?)\1', | 
					
						
							|  |  |  |             webpage, 'kaltura reference id', group='id') | 
					
						
							|  |  |  |         partner_id = self._search_regex( | 
					
						
							|  |  |  |             r'data-partner-id=(["\'])(?P<id>.+?)\1', | 
					
						
							|  |  |  |             webpage, 'kaltura widget id', group='id') | 
					
						
							|  |  |  |         ui_id = self._search_regex( | 
					
						
							|  |  |  |             r'data-ui-id=(["\'])(?P<id>.+?)\1', | 
					
						
							|  |  |  |             webpage, 'kaltura uiconf id', group='id') | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-13 02:03:07 +06:00
										 |  |  |         query = { | 
					
						
							| 
									
										
										
										
											2016-03-11 16:57:06 +01:00
										 |  |  |             'wid': '_%s' % partner_id, | 
					
						
							|  |  |  |             'uiconf_id': ui_id, | 
					
						
							|  |  |  |             'flashvars[referenceId]': reference_id, | 
					
						
							| 
									
										
										
										
											2016-03-13 02:03:07 +06:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if self.LOGGED_IN: | 
					
						
							|  |  |  |             kaltura_session = self._download_json( | 
					
						
							|  |  |  |                 '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), | 
					
						
							| 
									
										
										
										
											2016-03-19 22:30:48 +06:00
										 |  |  |                 video_id, 'Downloading kaltura session JSON', | 
					
						
							| 
									
										
										
										
											2016-03-13 02:03:07 +06:00
										 |  |  |                 'Unable to download kaltura session JSON', fatal=False) | 
					
						
							|  |  |  |             if kaltura_session: | 
					
						
							|  |  |  |                 session = kaltura_session.get('session') | 
					
						
							|  |  |  |                 if session: | 
					
						
							|  |  |  |                     query['flashvars[ks]'] = session | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return self.url_result(update_url_query( | 
					
						
							|  |  |  |             'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query), | 
					
						
							|  |  |  |             'Kaltura') | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-19 22:30:48 +06:00
										 |  |  | class SafariApiIE(SafariBaseIE): | 
					
						
							|  |  |  |     IE_NAME = 'safari:api' | 
					
						
							| 
									
										
										
										
											2016-08-01 21:48:48 +07:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html' | 
					
						
							| 
									
										
										
										
											2016-03-19 22:30:48 +06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-08-01 21:48:48 +07:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2016-03-19 22:30:48 +06:00
										 |  |  |         'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2016-08-01 21:48:48 +07:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2016-03-19 22:30:48 +06:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         part = self._download_json( | 
					
						
							|  |  |  |             url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), | 
					
						
							|  |  |  |             'Downloading part JSON') | 
					
						
							|  |  |  |         return self.url_result(part['web_url'], SafariIE.ie_key()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | class SafariCourseIE(SafariBaseIE): | 
					
						
							|  |  |  |     IE_NAME = 'safari:course' | 
					
						
							|  |  |  |     IE_DESC = 'safaribooksonline.com online courses' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-15 00:27:43 +01:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)' | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '9780133392838', | 
					
						
							|  |  |  |             'title': 'Hadoop Fundamentals LiveLessons', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist_count': 22, | 
					
						
							|  |  |  |         'skip': 'Requires safaribooksonline account credentials', | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |         course_id = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |         course_json = self._download_json( | 
					
						
							| 
									
										
										
										
											2016-03-13 02:03:07 +06:00
										 |  |  |             '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |             course_id, 'Downloading course JSON') | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if 'chapters' not in course_json: | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |             raise ExtractorError( | 
					
						
							|  |  |  |                 'No chapters found for course %s' % course_id, expected=True) | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         entries = [ | 
					
						
							| 
									
										
										
										
											2016-03-19 22:30:48 +06:00
										 |  |  |             self.url_result(chapter, SafariApiIE.ie_key()) | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |             for chapter in course_json['chapters']] | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         course_title = course_json['title'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return self.playlist_result(entries, course_id, course_title) |