| 
									
										
										
										
											2014-09-04 19:48:29 +07:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-17 08:20:58 +02:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							|  |  |  |     get_element_by_attribute, | 
					
						
							|  |  |  |     clean_html, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class TechTalksIE(InfoExtractor): | 
					
						
							|  |  |  |     _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _TEST = { | 
					
						
							| 
									
										
										
										
											2014-09-04 19:48:29 +07:00
										 |  |  |         'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '57758', | 
					
						
							|  |  |  |             'title': 'Learning Topic Models --- Going beyond SVD', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist': [ | 
					
						
							| 
									
										
										
										
											2013-10-17 08:20:58 +02:00
										 |  |  |             { | 
					
						
							| 
									
										
										
										
											2014-09-04 19:48:29 +07:00
										 |  |  |                 'info_dict': { | 
					
						
							|  |  |  |                     'id': '57758', | 
					
						
							|  |  |  |                     'ext': 'flv', | 
					
						
							|  |  |  |                     'title': 'Learning Topic Models --- Going beyond SVD', | 
					
						
							| 
									
										
										
										
											2013-10-17 08:20:58 +02:00
										 |  |  |                 }, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |             { | 
					
						
							| 
									
										
										
										
											2014-09-04 19:48:29 +07:00
										 |  |  |                 'info_dict': { | 
					
						
							|  |  |  |                     'id': '57758-slides', | 
					
						
							|  |  |  |                     'ext': 'flv', | 
					
						
							|  |  |  |                     'title': 'Learning Topic Models --- Going beyond SVD', | 
					
						
							| 
									
										
										
										
											2013-10-17 08:20:58 +02:00
										 |  |  |                 }, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ], | 
					
						
							| 
									
										
										
										
											2014-09-04 19:48:29 +07:00
										 |  |  |         'params': { | 
					
						
							| 
									
										
										
										
											2013-10-17 08:20:58 +02:00
										 |  |  |             # rtmp download | 
					
						
							| 
									
										
										
										
											2014-09-04 19:48:29 +07:00
										 |  |  |             'skip_download': True, | 
					
						
							| 
									
										
										
										
											2013-10-17 08:20:58 +02:00
										 |  |  |         }, | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         talk_id = mobj.group('id') | 
					
						
							|  |  |  |         webpage = self._download_webpage(url, talk_id) | 
					
						
							| 
									
										
										
										
											2014-09-04 19:48:29 +07:00
										 |  |  |         rtmp_url = self._search_regex( | 
					
						
							|  |  |  |             r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url') | 
					
						
							|  |  |  |         play_path = self._search_regex( | 
					
						
							|  |  |  |             r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"', | 
					
						
							|  |  |  |             webpage, 'presenter play path') | 
					
						
							| 
									
										
										
										
											2013-10-17 08:20:58 +02:00
										 |  |  |         title = clean_html(get_element_by_attribute('class', 'title', webpage)) | 
					
						
							|  |  |  |         video_info = { | 
					
						
							| 
									
										
										
										
											2014-09-04 19:48:29 +07:00
										 |  |  |             'id': talk_id, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							|  |  |  |             'url': rtmp_url, | 
					
						
							|  |  |  |             'play_path': play_path, | 
					
						
							|  |  |  |             'ext': 'flv', | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2013-10-17 08:20:58 +02:00
										 |  |  |         m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage) | 
					
						
							|  |  |  |         if m_slides is None: | 
					
						
							|  |  |  |             return video_info | 
					
						
							|  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2014-09-04 19:48:29 +07:00
										 |  |  |             return { | 
					
						
							|  |  |  |                 '_type': 'playlist', | 
					
						
							|  |  |  |                 'id': talk_id, | 
					
						
							|  |  |  |                 'title': title, | 
					
						
							|  |  |  |                 'entries': [ | 
					
						
							|  |  |  |                     video_info, | 
					
						
							|  |  |  |                     # The slides video | 
					
						
							|  |  |  |                     { | 
					
						
							|  |  |  |                         'id': talk_id + '-slides', | 
					
						
							|  |  |  |                         'title': title, | 
					
						
							|  |  |  |                         'url': rtmp_url, | 
					
						
							|  |  |  |                         'play_path': m_slides.group(1), | 
					
						
							|  |  |  |                         'ext': 'flv', | 
					
						
							|  |  |  |                     }, | 
					
						
							|  |  |  |                 ], | 
					
						
							|  |  |  |             } |