| 
									
										
										
										
											2014-03-09 16:59:18 +01:00
										 |  |  | # encoding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class AftonbladetIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2014-03-10 10:27:40 +01:00
										 |  |  |     _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])' | 
					
						
							| 
									
										
										
										
											2014-03-09 16:59:18 +01:00
										 |  |  |     _TEST = { | 
					
						
							|  |  |  |         'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': 'article36015', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna', | 
					
						
							|  |  |  |             'description': 'Jupiters måne mest aktiv av alla himlakroppar', | 
					
						
							| 
									
										
										
										
											2014-05-17 15:21:46 +10:00
										 |  |  |             'timestamp': 1394142732, | 
					
						
							| 
									
										
										
										
											2014-03-09 16:59:18 +01:00
										 |  |  |             'upload_date': '20140306', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.search(self._VALID_URL, url) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         video_id = mobj.group('video_id') | 
					
						
							|  |  |  |         webpage = self._download_webpage(url, video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # find internal video meta data | 
					
						
							| 
									
										
										
										
											2014-05-17 15:21:46 +10:00
										 |  |  |         meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json' | 
					
						
							| 
									
										
										
										
											2014-03-10 10:27:40 +01:00
										 |  |  |         internal_meta_id = self._html_search_regex( | 
					
						
							|  |  |  |             r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id') | 
					
						
							| 
									
										
										
										
											2014-05-17 15:21:46 +10:00
										 |  |  |         internal_meta_url = meta_url % internal_meta_id | 
					
						
							| 
									
										
										
										
											2014-03-10 10:27:40 +01:00
										 |  |  |         internal_meta_json = self._download_json( | 
					
						
							|  |  |  |             internal_meta_url, video_id, 'Downloading video meta data') | 
					
						
							| 
									
										
										
										
											2014-03-09 16:59:18 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # find internal video formats | 
					
						
							| 
									
										
										
										
											2014-05-17 15:21:46 +10:00
										 |  |  |         format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s' | 
					
						
							| 
									
										
										
										
											2014-03-09 16:59:18 +01:00
										 |  |  |         internal_video_id = internal_meta_json['videoId'] | 
					
						
							| 
									
										
										
										
											2014-05-17 15:21:46 +10:00
										 |  |  |         internal_formats_url = format_url % internal_video_id | 
					
						
							| 
									
										
										
										
											2014-03-10 10:27:40 +01:00
										 |  |  |         internal_formats_json = self._download_json( | 
					
						
							|  |  |  |             internal_formats_url, video_id, 'Downloading video formats') | 
					
						
							| 
									
										
										
										
											2014-03-09 16:59:18 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         formats = [] | 
					
						
							| 
									
										
										
										
											2014-03-10 10:27:40 +01:00
										 |  |  |         for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']: | 
					
						
							| 
									
										
										
										
											2014-03-09 16:59:18 +01:00
										 |  |  |             p = fmt['paths'][0] | 
					
						
							|  |  |  |             formats.append({ | 
					
						
							|  |  |  |                 'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']), | 
					
						
							|  |  |  |                 'ext': 'mp4', | 
					
						
							|  |  |  |                 'width': fmt['width'], | 
					
						
							|  |  |  |                 'height': fmt['height'], | 
					
						
							|  |  |  |                 'tbr': fmt['bitrate'], | 
					
						
							|  |  |  |                 'protocol': 'http', | 
					
						
							|  |  |  |             }) | 
					
						
							| 
									
										
										
										
											2014-03-10 10:27:40 +01:00
										 |  |  |         self._sort_formats(formats) | 
					
						
							| 
									
										
										
										
											2014-03-09 16:59:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-10 10:27:40 +01:00
										 |  |  |         return { | 
					
						
							| 
									
										
										
										
											2014-03-09 16:59:18 +01:00
										 |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': internal_meta_json['title'], | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							|  |  |  |             'thumbnail': internal_meta_json['imageUrl'], | 
					
						
							|  |  |  |             'description': internal_meta_json['shortPreamble'], | 
					
						
							| 
									
										
										
										
											2014-05-17 15:21:46 +10:00
										 |  |  |             'timestamp': internal_meta_json['timePublished'], | 
					
						
							| 
									
										
										
										
											2014-03-09 16:59:18 +01:00
										 |  |  |             'duration': internal_meta_json['duration'], | 
					
						
							|  |  |  |             'view_count': internal_meta_json['views'], | 
					
						
							| 
									
										
										
										
											2014-03-10 10:27:40 +01:00
										 |  |  |         } |