| 
									
										
										
										
											2014-03-21 14:38:37 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-01 22:39:38 +06:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-21 14:38:37 +01:00
										 |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2015-10-01 22:44:51 +06:00
										 |  |  | from ..compat import ( | 
					
						
							|  |  |  |     compat_HTTPError, | 
					
						
							|  |  |  |     compat_urlparse, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							|  |  |  |     ExtractorError, | 
					
						
							|  |  |  |     parse_duration, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2014-03-21 14:38:37 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class VideoLecturesNetIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2015-08-09 08:51:37 +06:00
										 |  |  |     _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/*(?:[#?].*)?$' | 
					
						
							| 
									
										
										
										
											2014-03-21 14:38:37 +01:00
										 |  |  |     IE_NAME = 'videolectures.net' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-01 22:45:10 +06:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2014-03-21 14:38:37 +01:00
										 |  |  |         'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': 'promogram_igor_mekjavic_eng', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'Automatics, robotics and biocybernetics', | 
					
						
							|  |  |  |             'description': 'md5:815fc1deb6b3a2bff99de2d5325be482', | 
					
						
							|  |  |  |             'upload_date': '20130627', | 
					
						
							|  |  |  |             'duration': 565, | 
					
						
							|  |  |  |             'thumbnail': 're:http://.*\.jpg', | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-10-01 23:10:36 +06:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         # video with invalid direct format links (HTTP 403) | 
					
						
							|  |  |  |         'url': 'http://videolectures.net/russir2010_filippova_nlp/', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': 'russir2010_filippova_nlp', | 
					
						
							|  |  |  |             'ext': 'flv', | 
					
						
							|  |  |  |             'title': 'NLP at Google', | 
					
						
							|  |  |  |             'description': 'md5:fc7a6d9bf0302d7cc0e53f7ca23747b3', | 
					
						
							|  |  |  |             'duration': 5352, | 
					
						
							|  |  |  |             'thumbnail': 're:http://.*\.jpg', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'params': { | 
					
						
							|  |  |  |             # rtmp download | 
					
						
							|  |  |  |             'skip_download': True, | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-10-01 22:45:10 +06:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://videolectures.net/deeplearning2015_montreal/', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': 'deeplearning2015_montreal', | 
					
						
							|  |  |  |             'title': 'Deep Learning Summer School, Montreal 2015', | 
					
						
							|  |  |  |             'description': 'md5:90121a40cc6926df1bf04dcd8563ed3b', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist_count': 30, | 
					
						
							|  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2014-03-21 14:38:37 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2015-10-01 22:19:39 +06:00
										 |  |  |         video_id = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2014-03-21 14:38:37 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id | 
					
						
							| 
									
										
										
										
											2015-10-01 22:44:51 +06:00
										 |  |  | 
 | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             smil = self._download_smil(smil_url, video_id) | 
					
						
							|  |  |  |         except ExtractorError as e: | 
					
						
							|  |  |  |             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: | 
					
						
							|  |  |  |                 # Probably a playlist | 
					
						
							|  |  |  |                 webpage = self._download_webpage(url, video_id) | 
					
						
							|  |  |  |                 entries = [ | 
					
						
							|  |  |  |                     self.url_result(compat_urlparse.urljoin(url, video_url), 'VideoLecturesNet') | 
					
						
							|  |  |  |                     for _, video_url in re.findall(r'<a[^>]+href=(["\'])(.+?)\1[^>]+id=["\']lec=\d+', webpage)] | 
					
						
							|  |  |  |                 playlist_title = self._html_search_meta('title', webpage, 'title', fatal=True) | 
					
						
							|  |  |  |                 playlist_description = self._html_search_meta('description', webpage, 'description') | 
					
						
							|  |  |  |                 return self.playlist_result(entries, video_id, playlist_title, playlist_description) | 
					
						
							| 
									
										
										
										
											2014-03-21 14:38:37 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-01 22:19:39 +06:00
										 |  |  |         info = self._parse_smil(smil, smil_url, video_id) | 
					
						
							| 
									
										
										
										
											2014-03-21 14:38:37 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-01 22:19:39 +06:00
										 |  |  |         info['id'] = video_id | 
					
						
							| 
									
										
										
										
											2014-03-21 14:38:37 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-01 22:19:39 +06:00
										 |  |  |         switch = smil.find('.//switch') | 
					
						
							|  |  |  |         if switch is not None: | 
					
						
							|  |  |  |             info['duration'] = parse_duration(switch.attrib.get('dur')) | 
					
						
							| 
									
										
										
										
											2014-03-21 14:38:37 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-01 22:19:39 +06:00
										 |  |  |         return info |