| 
									
										
										
										
											2015-05-10 12:30:07 +12:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-10 16:03:09 +12:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-10 12:30:07 +12:00
										 |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2016-06-10 16:04:28 +08:00
										 |  |  | from ..compat import ( | 
					
						
							|  |  |  |     compat_str, | 
					
						
							|  |  |  |     compat_urlparse, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  | from ..utils import ( | 
					
						
							|  |  |  |     ExtractorError, | 
					
						
							|  |  |  |     determine_ext, | 
					
						
							|  |  |  |     int_or_none, | 
					
						
							| 
									
										
										
										
											2015-11-21 22:18:17 +06:00
										 |  |  |     sanitized_Request, | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2015-05-10 12:30:07 +12:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class VoiceRepublicIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  |     _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)' | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', | 
					
						
							| 
									
										
										
										
											2016-06-10 16:04:28 +08:00
										 |  |  |         'md5': 'b9174d651323f17783000876347116e3', | 
					
						
							| 
									
										
										
										
											2015-05-10 12:30:07 +12:00
										 |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '2296', | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  |             'display_id': 'watching-the-watchers-building-a-sousveillance-state', | 
					
						
							| 
									
										
										
										
											2015-05-10 12:30:07 +12:00
										 |  |  |             'ext': 'm4a', | 
					
						
							|  |  |  |             'title': 'Watching the Watchers: Building a Sousveillance State', | 
					
						
							| 
									
										
										
										
											2016-06-10 16:04:28 +08:00
										 |  |  |             'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.', | 
					
						
							| 
									
										
										
										
											2017-01-02 20:08:07 +08:00
										 |  |  |             'thumbnail': r're:^https?://.*\.(?:png|jpg)$', | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  |             'duration': 1800, | 
					
						
							|  |  |  |             'view_count': int, | 
					
						
							| 
									
										
										
										
											2015-05-10 12:30:07 +12:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2015-05-10 12:30:07 +12:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         display_id = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-21 22:18:17 +06:00
										 |  |  |         req = sanitized_Request( | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  |             compat_urlparse.urljoin(url, '/talks/%s' % display_id)) | 
					
						
							| 
									
										
										
										
											2015-05-10 12:30:07 +12:00
										 |  |  |         # Older versions of Firefox get redirected to an "upgrade browser" page | 
					
						
							|  |  |  |         req.add_header('User-Agent', 'youtube-dl') | 
					
						
							|  |  |  |         webpage = self._download_webpage(req, display_id) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  |         if '>Queued for processing, please stand by...<' in webpage: | 
					
						
							|  |  |  |             raise ExtractorError( | 
					
						
							|  |  |  |                 'Audio is still queued for processing', expected=True) | 
					
						
							| 
									
										
										
										
											2015-05-10 15:12:29 +12:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-10 18:37:52 +06:00
										 |  |  |         config = self._search_regex( | 
					
						
							|  |  |  |             r'(?s)return ({.+?});\s*\n', webpage, | 
					
						
							|  |  |  |             'data', default=None) | 
					
						
							|  |  |  |         data = self._parse_json(config, display_id, fatal=False) if config else None | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  |         if data: | 
					
						
							|  |  |  |             title = data['title'] | 
					
						
							|  |  |  |             description = data.get('teaser') | 
					
						
							| 
									
										
										
										
											2016-06-10 16:04:28 +08:00
										 |  |  |             talk_id = compat_str(data.get('talk_id') or display_id) | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  |             talk = data['talk'] | 
					
						
							|  |  |  |             duration = int_or_none(talk.get('duration')) | 
					
						
							|  |  |  |             formats = [{ | 
					
						
							|  |  |  |                 'url': compat_urlparse.urljoin(url, talk_url), | 
					
						
							|  |  |  |                 'format_id': format_id, | 
					
						
							|  |  |  |                 'ext': determine_ext(talk_url) or format_id, | 
					
						
							|  |  |  |                 'vcodec': 'none', | 
					
						
							|  |  |  |             } for format_id, talk_url in talk['links'].items()] | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             title = self._og_search_title(webpage) | 
					
						
							|  |  |  |             description = self._html_search_regex( | 
					
						
							|  |  |  |                 r"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>", | 
					
						
							|  |  |  |                 webpage, 'description', fatal=False) | 
					
						
							|  |  |  |             talk_id = self._search_regex( | 
					
						
							|  |  |  |                 [r"id='jc-(\d+)'", r"data-shareable-id='(\d+)'"], | 
					
						
							|  |  |  |                 webpage, 'talk id', default=None) or display_id | 
					
						
							|  |  |  |             duration = None | 
					
						
							| 
									
										
										
										
											2015-05-10 18:37:52 +06:00
										 |  |  |             player = self._search_regex( | 
					
						
							|  |  |  |                 r"class='vr-player jp-jplayer'([^>]+)>", webpage, 'player') | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  |             formats = [{ | 
					
						
							|  |  |  |                 'url': compat_urlparse.urljoin(url, talk_url), | 
					
						
							|  |  |  |                 'format_id': format_id, | 
					
						
							|  |  |  |                 'ext': determine_ext(talk_url) or format_id, | 
					
						
							|  |  |  |                 'vcodec': 'none', | 
					
						
							| 
									
										
										
										
											2015-05-10 18:37:52 +06:00
										 |  |  |             } for format_id, talk_url in re.findall(r"data-([^=]+)='([^']+)'", player)] | 
					
						
							| 
									
										
										
										
											2015-05-10 15:12:29 +12:00
										 |  |  |         self._sort_formats(formats) | 
					
						
							| 
									
										
										
										
											2015-05-10 12:30:07 +12:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  |         thumbnail = self._og_search_thumbnail(webpage) | 
					
						
							|  |  |  |         view_count = int_or_none(self._search_regex( | 
					
						
							|  |  |  |             r"class='play-count[^']*'>\s*(\d+) plays", | 
					
						
							|  |  |  |             webpage, 'play count', fatal=False)) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-10 12:30:07 +12:00
										 |  |  |         return { | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  |             'id': talk_id, | 
					
						
							|  |  |  |             'display_id': display_id, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							|  |  |  |             'description': description, | 
					
						
							| 
									
										
										
										
											2015-05-10 12:30:07 +12:00
										 |  |  |             'thumbnail': thumbnail, | 
					
						
							| 
									
										
										
										
											2015-05-10 18:29:15 +06:00
										 |  |  |             'duration': duration, | 
					
						
							|  |  |  |             'view_count': view_count, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							| 
									
										
										
										
											2015-05-10 12:30:07 +12:00
										 |  |  |         } |