| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  | # coding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							| 
									
										
										
										
											2016-04-11 16:14:07 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2016-04-11 16:14:07 +08:00
										 |  |  | from ..utils import remove_start | 
					
						
							| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class PressTVIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2016-04-11 16:14:07 +08:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P<y>\d+)/(?P<m>\d+)/(?P<d>\d+)/(?P<id>\d+)/(?P<display_id>[^/]+)?' | 
					
						
							| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     _TEST = { | 
					
						
							| 
									
										
										
										
											2016-04-09 16:14:05 +02:00
										 |  |  |         'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/', | 
					
						
							|  |  |  |         'md5': '5d7e3195a447cb13e9267e931d8dd5a5', | 
					
						
							| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2016-04-09 16:14:05 +02:00
										 |  |  |             'id': '459911', | 
					
						
							| 
									
										
										
										
											2016-04-11 16:14:07 +08:00
										 |  |  |             'display_id': 'Australian-sewerage-treatment-facility-', | 
					
						
							| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2016-04-09 16:14:05 +02:00
										 |  |  |             'title': 'Organic mattresses used to clean waste water', | 
					
						
							|  |  |  |             'upload_date': '20160409', | 
					
						
							| 
									
										
										
										
											2016-04-10 16:36:44 +02:00
										 |  |  |             'thumbnail': 're:^https?://.*\.jpg', | 
					
						
							|  |  |  |             'description': 'md5:20002e654bbafb6908395a5c0cfcd125' | 
					
						
							| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2016-04-11 16:14:07 +08:00
										 |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         video_id = mobj.group('id') | 
					
						
							|  |  |  |         display_id = mobj.group('display_id') or video_id | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         webpage = self._download_webpage(url, display_id) | 
					
						
							| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # extract video URL from webpage | 
					
						
							| 
									
										
										
										
											2016-04-11 16:14:07 +08:00
										 |  |  |         video_url = self._hidden_inputs(webpage)['inpPlayback'] | 
					
						
							| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # build list of available formats | 
					
						
							|  |  |  |         # specified in http://www.presstv.ir/Scripts/playback.js | 
					
						
							|  |  |  |         base_url = 'http://192.99.219.222:82/presstv' | 
					
						
							| 
									
										
										
										
											2016-04-10 16:36:44 +02:00
										 |  |  |         _formats = [ | 
					
						
							| 
									
										
										
										
											2016-04-11 16:14:07 +08:00
										 |  |  |             (180, '_low200.mp4'), | 
					
						
							|  |  |  |             (360, '_low400.mp4'), | 
					
						
							|  |  |  |             (720, '_low800.mp4'), | 
					
						
							|  |  |  |             (1080, '.mp4') | 
					
						
							| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  |         ] | 
					
						
							| 
									
										
										
										
											2016-04-10 16:36:44 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-11 16:14:07 +08:00
										 |  |  |         formats = [{ | 
					
						
							|  |  |  |             'url': base_url + video_url[:-4] + extension, | 
					
						
							|  |  |  |             'format_id': '%dp' % height, | 
					
						
							|  |  |  |             'height': height, | 
					
						
							|  |  |  |         } for height, extension in _formats] | 
					
						
							| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # extract video metadata | 
					
						
							| 
									
										
										
										
											2016-04-11 16:14:07 +08:00
										 |  |  |         title = remove_start( | 
					
						
							|  |  |  |             self._html_search_meta('title', webpage, fatal=True), 'PressTV-') | 
					
						
							| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-10 16:36:44 +02:00
										 |  |  |         thumbnail = self._og_search_thumbnail(webpage) | 
					
						
							|  |  |  |         description = self._og_search_description(webpage) | 
					
						
							| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-10 16:36:44 +02:00
										 |  |  |         upload_date = '%04d%02d%02d' % ( | 
					
						
							| 
									
										
										
										
											2016-04-11 16:14:07 +08:00
										 |  |  |             int(mobj.group('y')), | 
					
						
							|  |  |  |             int(mobj.group('m')), | 
					
						
							|  |  |  |             int(mobj.group('d')), | 
					
						
							| 
									
										
										
										
											2016-04-10 16:36:44 +02:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							| 
									
										
										
										
											2016-04-11 16:14:07 +08:00
										 |  |  |             'display_id': display_id, | 
					
						
							| 
									
										
										
										
											2015-10-09 18:38:11 +02:00
										 |  |  |             'title': title, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							|  |  |  |             'thumbnail': thumbnail, | 
					
						
							|  |  |  |             'upload_date': upload_date, | 
					
						
							|  |  |  |             'description': description | 
					
						
							|  |  |  |         } |