| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  | # coding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-03-20 23:07:11 +01:00
										 |  |  | import json | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-16 20:05:02 +01:00
										 |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2018-03-20 23:07:11 +01:00
										 |  |  | from ..utils import ( | 
					
						
							|  |  |  |     parse_duration, | 
					
						
							|  |  |  |     unified_strdate, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-16 20:05:02 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | class LibsynIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2015-08-18 13:02:41 +02:00
										 |  |  |     _VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))' | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-08-18 13:02:41 +02:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2018-03-20 23:07:11 +01:00
										 |  |  |         'url': 'http://html5-player.libsyn.com/embed/episode/id/6385796/', | 
					
						
							|  |  |  |         'md5': '2a55e75496c790cdeb058e7e6c087746', | 
					
						
							| 
									
										
										
										
											2015-03-17 18:54:36 +01:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2018-03-20 23:07:11 +01:00
										 |  |  |             'id': '6385796', | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  |             'ext': 'mp3', | 
					
						
							| 
									
										
										
										
											2018-03-20 23:07:11 +01:00
										 |  |  |             'title': "Champion Minded - Developing a Growth Mindset", | 
					
						
							|  |  |  |             'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.', | 
					
						
							|  |  |  |             'upload_date': '20180320', | 
					
						
							| 
									
										
										
										
											2015-08-18 13:02:41 +02:00
										 |  |  |             'thumbnail': 're:^https?://.*', | 
					
						
							| 
									
										
										
										
											2015-03-17 18:54:36 +01:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-08-18 13:02:41 +02:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/', | 
					
						
							|  |  |  |         'md5': '6c5cb21acd622d754d3b1a92b582ce42', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '3727166', | 
					
						
							|  |  |  |             'ext': 'mp3', | 
					
						
							|  |  |  |             'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career', | 
					
						
							|  |  |  |             'upload_date': '20150818', | 
					
						
							|  |  |  |             'thumbnail': 're:^https?://.*', | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2015-03-16 20:05:02 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2015-08-18 13:02:41 +02:00
										 |  |  |         m = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         video_id = m.group('id') | 
					
						
							|  |  |  |         url = m.group('mainurl') | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  |         webpage = self._download_webpage(url, video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         podcast_title = self._search_regex( | 
					
						
							| 
									
										
										
										
											2018-03-20 23:07:11 +01:00
										 |  |  |             r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None) | 
					
						
							|  |  |  |         if podcast_title: | 
					
						
							|  |  |  |             podcast_title = podcast_title.strip() | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  |         episode_title = self._search_regex( | 
					
						
							| 
									
										
										
										
											2018-03-20 23:07:11 +01:00
										 |  |  |             r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title') | 
					
						
							|  |  |  |         if episode_title: | 
					
						
							|  |  |  |             episode_title = episode_title.strip() | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-22 11:03:52 +01:00
										 |  |  |         title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  | 
 | 
					
						
							|  |  |  |         description = self._html_search_regex( | 
					
						
							| 
									
										
										
										
											2018-03-20 23:07:11 +01:00
										 |  |  |             r'<p\s+id="info_text_body">(.+?)</p>', webpage, | 
					
						
							| 
									
										
										
										
											2015-08-18 13:02:41 +02:00
										 |  |  |             'description', default=None) | 
					
						
							| 
									
										
										
										
											2018-03-20 23:07:11 +01:00
										 |  |  |         if description: | 
					
						
							|  |  |  |             # Strip non-breaking and normal spaces | 
					
						
							|  |  |  |             description = description.replace('\u00A0', ' ').strip() | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  |         release_date = unified_strdate(self._search_regex( | 
					
						
							|  |  |  |             r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False)) | 
					
						
							| 
									
										
										
										
											2015-03-16 20:05:02 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-03-20 23:07:11 +01:00
										 |  |  |         data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block') | 
					
						
							|  |  |  |         data = json.loads(data_json) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         formats = [{ | 
					
						
							|  |  |  |             'url': data['media_url'], | 
					
						
							|  |  |  |             'format_id': 'main', | 
					
						
							|  |  |  |         }, { | 
					
						
							|  |  |  |             'url': data['media_url_libsyn'], | 
					
						
							|  |  |  |             'format_id': 'libsyn', | 
					
						
							|  |  |  |         }] | 
					
						
							|  |  |  |         thumbnail = data.get('thumbnail_url') | 
					
						
							|  |  |  |         duration = parse_duration(data.get('duration')) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-16 20:05:02 +01:00
										 |  |  |         return { | 
					
						
							| 
									
										
										
										
											2015-03-22 08:11:10 +06:00
										 |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							|  |  |  |             'description': description, | 
					
						
							|  |  |  |             'thumbnail': thumbnail, | 
					
						
							|  |  |  |             'upload_date': release_date, | 
					
						
							| 
									
										
										
										
											2018-03-20 23:07:11 +01:00
										 |  |  |             'duration': duration, | 
					
						
							| 
									
										
										
										
											2015-03-16 20:05:02 +01:00
										 |  |  |             'formats': formats, | 
					
						
							|  |  |  |         } |