| 
									
										
										
										
											2014-01-17 03:52:17 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-06-23 21:55:53 +02:00
										 |  |  | import json | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-02 19:48:39 +01:00
										 |  |  | from .subtitles import SubtitlesInfoExtractor | 
					
						
							| 
									
										
										
										
											2013-06-23 21:55:53 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-05 12:00:13 +01:00
										 |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2014-03-05 13:22:10 +01:00
										 |  |  |     compat_str, | 
					
						
							| 
									
										
										
										
											2013-11-05 12:00:13 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-17 03:52:17 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-02 19:48:39 +01:00
										 |  |  | class TEDIE(SubtitlesInfoExtractor): | 
					
						
							| 
									
										
										
										
											2014-03-05 13:27:26 +01:00
										 |  |  |     _VALID_URL = r'''(?x)http://www\.ted\.com/
 | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist | 
					
						
							|  |  |  |             | | 
					
						
							|  |  |  |             ((?P<type_talk>talks)) # We have a simple talk | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         (/lang/(.*?))? # The url may contain the language | 
					
						
							|  |  |  |         /(?P<name>\w+) # Here goes the name and then ".html" | 
					
						
							|  |  |  |         '''
 | 
					
						
							| 
									
										
										
										
											2013-06-27 20:46:46 +02:00
										 |  |  |     _TEST = { | 
					
						
							| 
									
										
										
										
											2014-01-17 03:52:17 +01:00
										 |  |  |         'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', | 
					
						
							| 
									
										
										
										
											2014-01-17 03:54:54 +01:00
										 |  |  |         'md5': '4ea1dada91e4174b53dac2bb8ace429d', | 
					
						
							| 
									
										
										
										
											2014-01-17 03:52:17 +01:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2014-03-05 14:27:45 +01:00
										 |  |  |             'id': '102', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2014-03-04 21:47:01 +01:00
										 |  |  |             'title': 'The illusion of consciousness', | 
					
						
							| 
									
										
										
										
											2014-03-05 13:27:26 +01:00
										 |  |  |             'description': ('Philosopher Dan Dennett makes a compelling ' | 
					
						
							|  |  |  |                 'argument that not only don\'t we understand our own ' | 
					
						
							|  |  |  |                 'consciousness, but that half the time our brains are ' | 
					
						
							|  |  |  |                 'actively fooling us.'), | 
					
						
							| 
									
										
										
										
											2014-03-04 21:47:01 +01:00
										 |  |  |             'uploader': 'Dan Dennett', | 
					
						
							| 
									
										
										
										
											2013-06-27 20:46:46 +02:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2013-06-23 21:55:53 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-04 21:47:01 +01:00
										 |  |  |     _FORMATS_PREFERENCE = { | 
					
						
							|  |  |  |         'low': 1, | 
					
						
							|  |  |  |         'medium': 2, | 
					
						
							|  |  |  |         'high': 3, | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2013-06-23 21:55:53 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-05 13:22:10 +01:00
										 |  |  |     def _extract_info(self, webpage): | 
					
						
							| 
									
										
										
										
											2014-03-05 13:27:26 +01:00
										 |  |  |         info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>', | 
					
						
							|  |  |  |             webpage, 'info json') | 
					
						
							| 
									
										
										
										
											2014-03-05 13:22:10 +01:00
										 |  |  |         return json.loads(info_json) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-06-23 21:55:53 +02:00
										 |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2014-03-05 13:27:26 +01:00
										 |  |  |         m = re.match(self._VALID_URL, url, re.VERBOSE) | 
					
						
							|  |  |  |         name = m.group('name') | 
					
						
							| 
									
										
										
										
											2013-06-23 21:55:53 +02:00
										 |  |  |         if m.group('type_talk'): | 
					
						
							| 
									
										
										
										
											2014-03-05 13:27:26 +01:00
										 |  |  |             return self._talk_info(url, name) | 
					
						
							|  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2014-03-05 13:22:10 +01:00
										 |  |  |             return self._playlist_videos_info(url, name) | 
					
						
							| 
									
										
										
										
											2013-06-23 21:55:53 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-05 13:22:10 +01:00
										 |  |  |     def _playlist_videos_info(self, url, name): | 
					
						
							| 
									
										
										
										
											2013-06-23 21:55:53 +02:00
										 |  |  |         '''Returns the videos of the playlist''' | 
					
						
							| 
									
										
										
										
											2013-11-15 14:33:51 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-05 13:22:10 +01:00
										 |  |  |         webpage = self._download_webpage(url, name, | 
					
						
							|  |  |  |             'Downloading playlist webpage') | 
					
						
							|  |  |  |         info = self._extract_info(webpage) | 
					
						
							|  |  |  |         playlist_info = info['playlist'] | 
					
						
							| 
									
										
										
										
											2013-06-23 21:55:53 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-15 14:33:51 +01:00
										 |  |  |         playlist_entries = [ | 
					
						
							| 
									
										
										
										
											2014-03-05 13:22:10 +01:00
										 |  |  |             self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key()) | 
					
						
							|  |  |  |             for talk in info['talks'] | 
					
						
							| 
									
										
										
										
											2013-11-15 14:33:51 +01:00
										 |  |  |         ] | 
					
						
							|  |  |  |         return self.playlist_result( | 
					
						
							| 
									
										
										
										
											2014-03-05 13:22:10 +01:00
										 |  |  |             playlist_entries, | 
					
						
							|  |  |  |             playlist_id=compat_str(playlist_info['id']), | 
					
						
							|  |  |  |             playlist_title=playlist_info['title']) | 
					
						
							| 
									
										
										
										
											2013-06-23 21:55:53 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-05 13:27:26 +01:00
										 |  |  |     def _talk_info(self, url, video_name): | 
					
						
							|  |  |  |         webpage = self._download_webpage(url, video_name) | 
					
						
							| 
									
										
										
										
											2013-06-23 21:55:53 +02:00
										 |  |  |         self.report_extraction(video_name) | 
					
						
							| 
									
										
										
										
											2013-11-02 19:48:39 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-05 13:22:10 +01:00
										 |  |  |         talk_info = self._extract_info(webpage)['talks'][0] | 
					
						
							| 
									
										
										
										
											2013-11-02 19:48:39 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-04 21:47:01 +01:00
										 |  |  |         formats = [{ | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'url': format_url, | 
					
						
							|  |  |  |             'format_id': format_id, | 
					
						
							|  |  |  |             'format': format_id, | 
					
						
							|  |  |  |             'preference': self._FORMATS_PREFERENCE.get(format_id, -1), | 
					
						
							|  |  |  |         } for (format_id, format_url) in talk_info['nativeDownloads'].items()] | 
					
						
							|  |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-05 14:27:45 +01:00
										 |  |  |         video_id = compat_str(talk_info['id']) | 
					
						
							| 
									
										
										
										
											2013-11-02 19:48:39 +01:00
										 |  |  |         # subtitles | 
					
						
							| 
									
										
										
										
											2014-03-04 21:47:01 +01:00
										 |  |  |         video_subtitles = self.extract_subtitles(video_id, talk_info) | 
					
						
							| 
									
										
										
										
											2013-11-02 19:48:39 +01:00
										 |  |  |         if self._downloader.params.get('listsubtitles', False): | 
					
						
							| 
									
										
										
										
											2014-03-04 21:47:01 +01:00
										 |  |  |             self._list_available_subtitles(video_id, talk_info) | 
					
						
							| 
									
										
										
										
											2013-11-02 19:48:39 +01:00
										 |  |  |             return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-15 14:06:38 +01:00
										 |  |  |         return { | 
					
						
							| 
									
										
										
										
											2013-11-02 19:48:39 +01:00
										 |  |  |             'id': video_id, | 
					
						
							| 
									
										
										
										
											2014-03-04 21:47:01 +01:00
										 |  |  |             'title': talk_info['title'], | 
					
						
							|  |  |  |             'uploader': talk_info['speaker'], | 
					
						
							|  |  |  |             'thumbnail': talk_info['thumb'], | 
					
						
							|  |  |  |             'description': self._og_search_description(webpage), | 
					
						
							| 
									
										
										
										
											2013-11-02 19:48:39 +01:00
										 |  |  |             'subtitles': video_subtitles, | 
					
						
							| 
									
										
										
										
											2013-10-04 10:32:34 +02:00
										 |  |  |             'formats': formats, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-04 21:47:01 +01:00
										 |  |  |     def _get_available_subtitles(self, video_id, talk_info): | 
					
						
							|  |  |  |         languages = [lang['languageCode'] for lang in talk_info.get('languages', [])] | 
					
						
							|  |  |  |         if languages: | 
					
						
							|  |  |  |             sub_lang_list = {} | 
					
						
							|  |  |  |             for l in languages: | 
					
						
							|  |  |  |                 url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) | 
					
						
							|  |  |  |                 sub_lang_list[l] = url | 
					
						
							|  |  |  |             return sub_lang_list | 
					
						
							|  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2013-11-05 12:00:13 +01:00
										 |  |  |             self._downloader.report_warning(u'video doesn\'t have subtitles') | 
					
						
							| 
									
										
										
										
											2014-03-04 21:47:01 +01:00
										 |  |  |             return {} |