| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  | # coding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-16 20:05:02 +01:00
										 |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  | from ..utils import unified_strdate | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-16 20:05:02 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | class LibsynIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  |     _VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _TEST = { | 
					
						
							|  |  |  |         'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/', | 
					
						
							|  |  |  |         'md5': '443360ee1b58007bc3dcf09b41d093bb', | 
					
						
							| 
									
										
										
										
											2015-03-17 18:54:36 +01:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  |             'id': '3377616', | 
					
						
							|  |  |  |             'ext': 'mp3', | 
					
						
							|  |  |  |             'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart", | 
					
						
							|  |  |  |             'description': 'md5:601cb790edd05908957dae8aaa866465', | 
					
						
							|  |  |  |             'upload_date': '20150220', | 
					
						
							| 
									
										
										
										
											2015-03-17 18:54:36 +01:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2015-03-16 20:05:02 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  |         video_id = self._match_id(url) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         webpage = self._download_webpage(url, video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         formats = [{ | 
					
						
							|  |  |  |             'url': media_url, | 
					
						
							|  |  |  |         } for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         podcast_title = self._search_regex( | 
					
						
							|  |  |  |             r'<h2>([^<]+)</h2>', webpage, 'title') | 
					
						
							|  |  |  |         episode_title = self._search_regex( | 
					
						
							|  |  |  |             r'<h3>([^<]+)</h3>', webpage, 'title', default=None) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-22 11:03:52 +01:00
										 |  |  |         title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  | 
 | 
					
						
							|  |  |  |         description = self._html_search_regex( | 
					
						
							|  |  |  |             r'<div id="info_text_body">(.+?)</div>', webpage, | 
					
						
							|  |  |  |             'description', fatal=False) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         thumbnail = self._search_regex( | 
					
						
							|  |  |  |             r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"', | 
					
						
							|  |  |  |             webpage, 'thumbnail', fatal=False) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         release_date = unified_strdate(self._search_regex( | 
					
						
							|  |  |  |             r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False)) | 
					
						
							| 
									
										
										
										
											2015-03-16 20:05:02 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							|  |  |  |             'description': description, | 
					
						
							|  |  |  |             'thumbnail': thumbnail, | 
					
						
							|  |  |  |             'upload_date': release_date, | 
					
						
							| 
									
										
										
										
											2015-03-16 20:05:02 +01:00
										 |  |  |             'formats': formats, | 
					
						
							|  |  |  |         } |