| 
									
										
										
										
											2017-07-09 19:15:48 +07:00
										 |  |  |  | # coding: utf-8 | 
					
						
							| 
									
										
										
										
											2017-06-29 13:10:45 -06:00
										 |  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-07-09 17:00:45 +07:00
										 |  |  |  | import re | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-29 13:10:45 -06:00
										 |  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2017-07-09 17:00:45 +07:00
										 |  |  |  | from ..utils import ( | 
					
						
							|  |  |  |  |     determine_ext, | 
					
						
							|  |  |  |  |     unescapeHTML, | 
					
						
							|  |  |  |  | ) | 
					
						
							| 
									
										
										
										
											2017-06-29 13:10:45 -06:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | class CJSWIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2017-07-09 17:00:45 +07:00
										 |  |  |  |     _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P<program>[^/]+)/episode/(?P<id>\d+)' | 
					
						
							| 
									
										
										
										
											2017-07-09 17:05:11 +07:00
										 |  |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2017-06-29 13:10:45 -06:00
										 |  |  |  |         'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620', | 
					
						
							|  |  |  |  |         'md5': 'cee14d40f1e9433632c56e3d14977120', | 
					
						
							|  |  |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2017-07-09 17:00:45 +07:00
										 |  |  |  |             'id': '91d9f016-a2e7-46c5-8dcb-7cbcd7437c41', | 
					
						
							| 
									
										
										
										
											2017-06-29 13:10:45 -06:00
										 |  |  |  |             'ext': 'mp3', | 
					
						
							| 
									
										
										
										
											2017-07-09 17:00:45 +07:00
										 |  |  |  |             'title': 'Freshly Squeezed – Episode June 20, 2017', | 
					
						
							|  |  |  |  |             'description': 'md5:c967d63366c3898a80d0c7b0ff337202', | 
					
						
							|  |  |  |  |             'series': 'Freshly Squeezed', | 
					
						
							|  |  |  |  |             'episode_id': '20170620', | 
					
						
							|  |  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2017-07-09 17:05:11 +07:00
										 |  |  |  |     }, { | 
					
						
							|  |  |  |  |         # no description | 
					
						
							|  |  |  |  |         'url': 'http://cjsw.com/program/road-pops/episode/20170707/', | 
					
						
							|  |  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2017-06-29 13:10:45 -06:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2017-07-09 17:00:45 +07:00
										 |  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |  |         program, episode_id = mobj.group('program', 'id') | 
					
						
							|  |  |  |  |         audio_id = '%s/%s' % (program, episode_id) | 
					
						
							| 
									
										
										
										
											2017-06-29 13:10:45 -06:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |         webpage = self._download_webpage(url, episode_id) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-07-09 17:00:45 +07:00
										 |  |  |  |         title = unescapeHTML(self._search_regex( | 
					
						
							|  |  |  |  |             (r'<h1[^>]+class=["\']episode-header__title["\'][^>]*>(?P<title>[^<]+)', | 
					
						
							|  |  |  |  |              r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'), | 
					
						
							|  |  |  |  |             webpage, 'title', group='title')) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         audio_url = self._search_regex( | 
					
						
							|  |  |  |  |             r'<button[^>]+data-audio-src=(["\'])(?P<url>(?:(?!\1).)+)\1', | 
					
						
							|  |  |  |  |             webpage, 'audio url', group='url') | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         audio_id = self._search_regex( | 
					
						
							|  |  |  |  |             r'/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.mp3', | 
					
						
							|  |  |  |  |             audio_url, 'audio id', default=audio_id) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-29 13:10:45 -06:00
										 |  |  |  |         formats = [{ | 
					
						
							| 
									
										
										
										
											2017-07-09 17:00:45 +07:00
										 |  |  |  |             'url': audio_url, | 
					
						
							|  |  |  |  |             'ext': determine_ext(audio_url, 'mp3'), | 
					
						
							| 
									
										
										
										
											2017-06-29 13:10:45 -06:00
										 |  |  |  |             'vcodec': 'none', | 
					
						
							|  |  |  |  |         }] | 
					
						
							| 
									
										
										
										
											2017-07-09 17:00:45 +07:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |         description = self._html_search_regex( | 
					
						
							| 
									
										
										
										
											2017-07-09 17:05:11 +07:00
										 |  |  |  |             r'<p>(?P<description>.+?)</p>', webpage, 'description', | 
					
						
							|  |  |  |  |             default=None) | 
					
						
							| 
									
										
										
										
											2017-07-09 17:00:45 +07:00
										 |  |  |  |         series = self._search_regex( | 
					
						
							|  |  |  |  |             r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage, | 
					
						
							|  |  |  |  |             'series', default=program, group='name') | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-29 13:10:45 -06:00
										 |  |  |  |         return { | 
					
						
							| 
									
										
										
										
											2017-07-09 17:00:45 +07:00
										 |  |  |  |             'id': audio_id, | 
					
						
							| 
									
										
										
										
											2017-06-29 13:10:45 -06:00
										 |  |  |  |             'title': title, | 
					
						
							|  |  |  |  |             'description': description, | 
					
						
							|  |  |  |  |             'formats': formats, | 
					
						
							| 
									
										
										
										
											2017-07-09 17:00:45 +07:00
										 |  |  |  |             'series': series, | 
					
						
							|  |  |  |  |             'episode_id': episode_id, | 
					
						
							| 
									
										
										
										
											2017-06-29 13:10:45 -06:00
										 |  |  |  |         } |