| 
									
										
										
										
											2018-03-09 23:27:44 +07:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | from .vimeo import VimeoIE | 
					
						
							| 
									
										
										
										
											2018-08-13 00:15:59 +07:00
										 |  |  | from ..compat import compat_str | 
					
						
							| 
									
										
										
										
											2018-03-09 23:27:44 +07:00
										 |  |  | from ..utils import ( | 
					
						
							|  |  |  |     ExtractorError, | 
					
						
							| 
									
										
										
										
											2018-08-13 00:15:59 +07:00
										 |  |  |     int_or_none, | 
					
						
							|  |  |  |     merge_dicts, | 
					
						
							|  |  |  |     try_get, | 
					
						
							|  |  |  |     unescapeHTML, | 
					
						
							|  |  |  |     unified_timestamp, | 
					
						
							| 
									
										
										
										
											2018-03-09 23:27:44 +07:00
										 |  |  |     urljoin, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class RayWenderlichIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2018-08-13 00:15:59 +07:00
										 |  |  |     _VALID_URL = r'''(?x)
 | 
					
						
							|  |  |  |                     https?:// | 
					
						
							|  |  |  |                         (?: | 
					
						
							|  |  |  |                             videos\.raywenderlich\.com/courses| | 
					
						
							|  |  |  |                             (?:www\.)?raywenderlich\.com | 
					
						
							|  |  |  |                         )/ | 
					
						
							|  |  |  |                         (?P<course_id>[^/]+)/lessons/(?P<id>\d+) | 
					
						
							|  |  |  |                     '''
 | 
					
						
							| 
									
										
										
										
											2018-03-09 23:27:44 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2018-08-13 00:15:59 +07:00
										 |  |  |         'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1', | 
					
						
							| 
									
										
										
										
											2018-03-09 23:27:44 +07:00
										 |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '248377018', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2018-08-13 00:15:59 +07:00
										 |  |  |             'title': 'Introduction', | 
					
						
							|  |  |  |             'description': 'md5:804d031b3efa9fcb49777d512d74f722', | 
					
						
							|  |  |  |             'timestamp': 1513906277, | 
					
						
							|  |  |  |             'upload_date': '20171222', | 
					
						
							| 
									
										
										
										
											2018-03-09 23:27:44 +07:00
										 |  |  |             'duration': 133, | 
					
						
							|  |  |  |             'uploader': 'Ray Wenderlich', | 
					
						
							|  |  |  |             'uploader_id': 'user3304672', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'params': { | 
					
						
							|  |  |  |             'noplaylist': True, | 
					
						
							|  |  |  |             'skip_download': True, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'add_ie': [VimeoIE.ie_key()], | 
					
						
							|  |  |  |         'expected_warnings': ['HTTP Error 403: Forbidden'], | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', | 
					
						
							| 
									
										
										
										
											2018-08-13 00:15:59 +07:00
										 |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def _extract_video_id(data, lesson_id): | 
					
						
							|  |  |  |         if not data: | 
					
						
							|  |  |  |             return | 
					
						
							|  |  |  |         groups = try_get(data, lambda x: x['groups'], list) or [] | 
					
						
							|  |  |  |         if not groups: | 
					
						
							|  |  |  |             return | 
					
						
							|  |  |  |         for group in groups: | 
					
						
							|  |  |  |             if not isinstance(group, dict): | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             contents = try_get(data, lambda x: x['contents'], list) or [] | 
					
						
							|  |  |  |             for content in contents: | 
					
						
							|  |  |  |                 if not isinstance(content, dict): | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 ordinal = int_or_none(content.get('ordinal')) | 
					
						
							|  |  |  |                 if ordinal != lesson_id: | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 video_id = content.get('identifier') | 
					
						
							|  |  |  |                 if video_id: | 
					
						
							|  |  |  |                     return compat_str(video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         course_id, lesson_id = mobj.group('course_id', 'id') | 
					
						
							|  |  |  |         display_id = '%s/%s' % (course_id, lesson_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         webpage = self._download_webpage(url, display_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         thumbnail = self._og_search_thumbnail( | 
					
						
							|  |  |  |             webpage, default=None) or self._html_search_meta( | 
					
						
							|  |  |  |             'twitter:image', webpage, 'thumbnail') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if '>Subscribe to unlock' in webpage: | 
					
						
							|  |  |  |             raise ExtractorError( | 
					
						
							|  |  |  |                 'This content is only available for subscribers', | 
					
						
							|  |  |  |                 expected=True) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         info = { | 
					
						
							|  |  |  |             'thumbnail': thumbnail, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         vimeo_id = self._search_regex( | 
					
						
							|  |  |  |             r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if not vimeo_id: | 
					
						
							|  |  |  |             data = self._parse_json( | 
					
						
							|  |  |  |                 self._search_regex( | 
					
						
							|  |  |  |                     r'data-collection=(["\'])(?P<data>{.+?})\1', webpage, | 
					
						
							|  |  |  |                     'data collection', default='{}', group='data'), | 
					
						
							|  |  |  |                 display_id, transform_source=unescapeHTML, fatal=False) | 
					
						
							|  |  |  |             video_id = self._extract_video_id( | 
					
						
							|  |  |  |                 data, lesson_id) or self._search_regex( | 
					
						
							|  |  |  |                 r'/videos/(\d+)/', thumbnail, 'video id') | 
					
						
							|  |  |  |             headers = { | 
					
						
							|  |  |  |                 'Referer': url, | 
					
						
							|  |  |  |                 'X-Requested-With': 'XMLHttpRequest', | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             csrf_token = self._html_search_meta( | 
					
						
							|  |  |  |                 'csrf-token', webpage, 'csrf token', default=None) | 
					
						
							|  |  |  |             if csrf_token: | 
					
						
							|  |  |  |                 headers['X-CSRF-Token'] = csrf_token | 
					
						
							|  |  |  |             video = self._download_json( | 
					
						
							|  |  |  |                 'https://videos.raywenderlich.com/api/v1/videos/%s.json' | 
					
						
							|  |  |  |                 % video_id, display_id, headers=headers)['video'] | 
					
						
							|  |  |  |             vimeo_id = video['clips'][0]['provider_id'] | 
					
						
							|  |  |  |             info.update({ | 
					
						
							|  |  |  |                 '_type': 'url_transparent', | 
					
						
							|  |  |  |                 'title': video.get('name'), | 
					
						
							|  |  |  |                 'description': video.get('description') or video.get( | 
					
						
							|  |  |  |                     'meta_description'), | 
					
						
							|  |  |  |                 'duration': int_or_none(video.get('duration')), | 
					
						
							|  |  |  |                 'timestamp': unified_timestamp(video.get('created_at')), | 
					
						
							|  |  |  |             }) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return merge_dicts(info, self.url_result( | 
					
						
							|  |  |  |             VimeoIE._smuggle_referrer( | 
					
						
							|  |  |  |                 'https://player.vimeo.com/video/%s' % vimeo_id, url), | 
					
						
							|  |  |  |             ie=VimeoIE.ie_key(), video_id=vimeo_id)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class RayWenderlichCourseIE(InfoExtractor): | 
					
						
							|  |  |  |     _VALID_URL = r'''(?x)
 | 
					
						
							|  |  |  |                     https?:// | 
					
						
							|  |  |  |                         (?: | 
					
						
							|  |  |  |                             videos\.raywenderlich\.com/courses| | 
					
						
							|  |  |  |                             (?:www\.)?raywenderlich\.com | 
					
						
							|  |  |  |                         )/ | 
					
						
							|  |  |  |                         (?P<id>[^/]+) | 
					
						
							|  |  |  |                     '''
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _TEST = { | 
					
						
							|  |  |  |         'url': 'https://www.raywenderlich.com/3530-testing-in-ios', | 
					
						
							| 
									
										
										
										
											2018-03-09 23:27:44 +07:00
										 |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'title': 'Testing in iOS', | 
					
						
							| 
									
										
										
										
											2018-08-13 00:15:59 +07:00
										 |  |  |             'id': '3530-testing-in-ios', | 
					
						
							| 
									
										
										
										
											2018-03-09 23:27:44 +07:00
										 |  |  |         }, | 
					
						
							|  |  |  |         'params': { | 
					
						
							|  |  |  |             'noplaylist': False, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist_count': 29, | 
					
						
							| 
									
										
										
										
											2018-08-13 00:15:59 +07:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @classmethod | 
					
						
							|  |  |  |     def suitable(cls, url): | 
					
						
							|  |  |  |         return False if RayWenderlichIE.suitable(url) else super( | 
					
						
							|  |  |  |             RayWenderlichCourseIE, cls).suitable(url) | 
					
						
							| 
									
										
										
										
											2018-03-09 23:27:44 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2018-08-13 00:15:59 +07:00
										 |  |  |         course_id = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2018-03-09 23:27:44 +07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-13 00:15:59 +07:00
										 |  |  |         webpage = self._download_webpage(url, course_id) | 
					
						
							| 
									
										
										
										
											2018-03-09 23:27:44 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  |         entries = [] | 
					
						
							| 
									
										
										
										
											2018-08-13 00:15:59 +07:00
										 |  |  |         lesson_urls = set() | 
					
						
							|  |  |  |         for lesson_url in re.findall( | 
					
						
							|  |  |  |                 r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage): | 
					
						
							|  |  |  |             if lesson_url in lesson_urls: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             lesson_urls.add(lesson_url) | 
					
						
							| 
									
										
										
										
											2018-03-09 23:27:44 +07:00
										 |  |  |             entries.append(self.url_result( | 
					
						
							| 
									
										
										
										
											2018-08-13 00:15:59 +07:00
										 |  |  |                 urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key())) | 
					
						
							| 
									
										
										
										
											2018-03-09 23:27:44 +07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-13 00:15:59 +07:00
										 |  |  |         title = self._og_search_title( | 
					
						
							|  |  |  |             webpage, default=None) or self._html_search_meta( | 
					
						
							|  |  |  |             'twitter:title', webpage, 'title', default=None) | 
					
						
							| 
									
										
										
										
											2018-03-09 23:27:44 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return self.playlist_result(entries, course_id, title) |