| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | # encoding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2015-11-14 05:54:16 +06:00
										 |  |  | from .brightcove import BrightcoveLegacyIE | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-21 22:18:17 +06:00
										 |  |  | from ..compat import compat_urllib_parse | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | from ..utils import ( | 
					
						
							|  |  |  |     ExtractorError, | 
					
						
							| 
									
										
										
										
											2015-11-21 22:18:17 +06:00
										 |  |  |     sanitized_Request, | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |     smuggle_url, | 
					
						
							|  |  |  |     std_headers, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class SafariBaseIE(InfoExtractor): | 
					
						
							|  |  |  |     _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/' | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |     _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>' | 
					
						
							|  |  |  |     _NETRC_MACHINE = 'safari' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _API_BASE = 'https://www.safaribooksonline.com/api/v1/book' | 
					
						
							|  |  |  |     _API_FORMAT = 'json' | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     LOGGED_IN = False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_initialize(self): | 
					
						
							|  |  |  |         # We only need to log in once for courses or individual videos | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |         if not self.LOGGED_IN: | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |             self._login() | 
					
						
							|  |  |  |             SafariBaseIE.LOGGED_IN = True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _login(self): | 
					
						
							|  |  |  |         (username, password) = self._get_login_info() | 
					
						
							|  |  |  |         if username is None: | 
					
						
							| 
									
										
										
										
											2015-08-26 21:26:24 +06:00
										 |  |  |             self.raise_login_required('safaribooksonline.com account is required') | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         headers = std_headers | 
					
						
							|  |  |  |         if 'Referer' not in headers: | 
					
						
							|  |  |  |             headers['Referer'] = self._LOGIN_URL | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         login_page = self._download_webpage( | 
					
						
							|  |  |  |             self._LOGIN_URL, None, | 
					
						
							|  |  |  |             'Downloading login form') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         csrf = self._html_search_regex( | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |             r"name='csrfmiddlewaretoken'\s+value='([^']+)'", | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |             login_page, 'csrf token') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         login_form = { | 
					
						
							|  |  |  |             'csrfmiddlewaretoken': csrf, | 
					
						
							|  |  |  |             'email': username, | 
					
						
							|  |  |  |             'password1': password, | 
					
						
							|  |  |  |             'login': 'Sign In', | 
					
						
							|  |  |  |             'next': '', | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-21 22:18:17 +06:00
										 |  |  |         request = sanitized_Request( | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |             self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers) | 
					
						
							|  |  |  |         login_page = self._download_webpage( | 
					
						
							|  |  |  |             request, None, 'Logging in as %s' % username) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |             raise ExtractorError( | 
					
						
							|  |  |  |                 'Login failed; make sure your credentials are correct and try again.', | 
					
						
							|  |  |  |                 expected=True) | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.to_screen('Login successful') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class SafariIE(SafariBaseIE): | 
					
						
							|  |  |  |     IE_NAME = 'safari' | 
					
						
							|  |  |  |     IE_DESC = 'safaribooksonline.com online video' | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |     _VALID_URL = r'''(?x)https?://
 | 
					
						
							|  |  |  |                             (?:www\.)?safaribooksonline\.com/ | 
					
						
							|  |  |  |                                 (?: | 
					
						
							|  |  |  |                                     library/view/[^/]+| | 
					
						
							|  |  |  |                                     api/v1/book | 
					
						
							|  |  |  |                                 )/ | 
					
						
							| 
									
										
										
										
											2015-06-15 00:27:43 +01:00
										 |  |  |                                 (?P<course_id>[^/]+)/ | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |                                     (?:chapter(?:-content)?/)? | 
					
						
							|  |  |  |                                 (?P<part>part\d+)\.html | 
					
						
							|  |  |  |     '''
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |         'md5': '5b0c4cc1b3c1ba15dda7344085aa5592', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |             'id': '2842601850001', | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'Introduction', | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |         }, | 
					
						
							|  |  |  |         'skip': 'Requires safaribooksonline account credentials', | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2015-06-15 21:36:30 +06:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         # non-digits in course id | 
					
						
							|  |  |  |         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |     }] | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |         course_id = mobj.group('course_id') | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |         part = mobj.group('part') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |         webpage = self._download_webpage( | 
					
						
							|  |  |  |             '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part), | 
					
						
							|  |  |  |             part) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-14 05:54:16 +06:00
										 |  |  |         bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |         if not bc_url: | 
					
						
							|  |  |  |             raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-14 06:05:46 +06:00
										 |  |  |         return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'BrightcoveLegacy') | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class SafariCourseIE(SafariBaseIE): | 
					
						
							|  |  |  |     IE_NAME = 'safari:course' | 
					
						
							|  |  |  |     IE_DESC = 'safaribooksonline.com online courses' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-15 00:27:43 +01:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)' | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '9780133392838', | 
					
						
							|  |  |  |             'title': 'Hadoop Fundamentals LiveLessons', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist_count': 22, | 
					
						
							|  |  |  |         'skip': 'Requires safaribooksonline account credentials', | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |         course_id = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |         course_json = self._download_json( | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  |             '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |             course_id, 'Downloading course JSON') | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if 'chapters' not in course_json: | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |             raise ExtractorError( | 
					
						
							|  |  |  |                 'No chapters found for course %s' % course_id, expected=True) | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         entries = [ | 
					
						
							| 
									
										
										
										
											2015-03-26 23:57:46 +06:00
										 |  |  |             self.url_result(chapter, 'Safari') | 
					
						
							|  |  |  |             for chapter in course_json['chapters']] | 
					
						
							| 
									
										
										
										
											2015-03-22 18:03:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         course_title = course_json['title'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return self.playlist_result(entries, course_id, course_title) |