| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  | # coding: utf-8 | 
					
						
							|  |  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | import re | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  |  | from ..compat import compat_str | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |     determine_ext, | 
					
						
							|  |  |  |  |     ExtractorError, | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  |     int_or_none, | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |     unescapeHTML, | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  | ) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  | class MSNIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |     _VALID_URL = r'https?://(?:www\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)' | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |  |         'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/criminal-minds-shemar-moore-shares-a-touching-goodbye-message/vp-BBqQYNE', | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |         'md5': '8442f66c116cbab1ff7098f986983458', | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  |         'info_dict': { | 
					
						
							|  |  |  |  |             'id': 'BBqQYNE', | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |             'display_id': 'criminal-minds-shemar-moore-shares-a-touching-goodbye-message', | 
					
						
							|  |  |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  |             'title': 'Criminal Minds - Shemar Moore Shares A Touching Goodbye Message', | 
					
						
							|  |  |  |  |             'description': 'md5:e8e89b897b222eb33a6b5067a8f1bc25', | 
					
						
							|  |  |  |  |             'duration': 104, | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |             'uploader': 'CBS Entertainment', | 
					
						
							|  |  |  |  |             'uploader_id': 'IT0X5aoJ6bJgYerJXSDCgFmYPB1__54v', | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  |         }, | 
					
						
							|  |  |  |  |     }, { | 
					
						
							|  |  |  |  |         'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf', | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |  |     }, { | 
					
						
							|  |  |  |  |         'url': 'http://www.msn.com/en-ae/video/watch/obama-a-lot-of-people-will-be-disappointed/vi-AAhxUMH', | 
					
						
							|  |  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |  |     }, { | 
					
						
							|  |  |  |  |         # geo restricted | 
					
						
							|  |  |  |  |         'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/the-first-fart-makes-you-laugh-the-last-fart-makes-you-cry/vp-AAhzIBU', | 
					
						
							|  |  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2016-06-26 22:02:46 +07:00
										 |  |  |  |     }, { | 
					
						
							|  |  |  |  |         'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6', | 
					
						
							|  |  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  |     }] | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |  |         video_id, display_id = mobj.group('id', 'display_id') | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         webpage = self._download_webpage(url, display_id) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |         video = self._parse_json( | 
					
						
							|  |  |  |  |             self._search_regex( | 
					
						
							|  |  |  |  |                 r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1', | 
					
						
							|  |  |  |  |                 webpage, 'video data', default='{}', group='data'), | 
					
						
							|  |  |  |  |             display_id, transform_source=unescapeHTML) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         if not video: | 
					
						
							|  |  |  |  |             error = unescapeHTML(self._search_regex( | 
					
						
							|  |  |  |  |                 r'data-error=(["\'])(?P<error>.+?)\1', | 
					
						
							|  |  |  |  |                 webpage, 'error', group='error')) | 
					
						
							|  |  |  |  |             raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         title = video['title'] | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |         formats = [] | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |         for file_ in video.get('videoFiles', []): | 
					
						
							|  |  |  |  |             format_url = file_.get('url') | 
					
						
							|  |  |  |  |             if not format_url: | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             ext = determine_ext(format_url) | 
					
						
							|  |  |  |  |             if ext == 'ism': | 
					
						
							| 
									
										
										
										
											2016-10-19 16:24:43 +01:00
										 |  |  |  |                 formats.extend(self._extract_ism_formats( | 
					
						
							|  |  |  |  |                     format_url + '/Manifest', display_id, 'mss', fatal=False)) | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |             if 'm3u8' in format_url: | 
					
						
							|  |  |  |  |                 # m3u8_native should not be used here until | 
					
						
							|  |  |  |  |                 # https://github.com/rg3/youtube-dl/issues/9913 is fixed | 
					
						
							|  |  |  |  |                 m3u8_formats = self._extract_m3u8_formats( | 
					
						
							|  |  |  |  |                     format_url, display_id, 'mp4', | 
					
						
							|  |  |  |  |                     m3u8_id='hls', fatal=False) | 
					
						
							|  |  |  |  |                 formats.extend(m3u8_formats) | 
					
						
							|  |  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  |                 formats.append({ | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |                     'url': format_url, | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  |                     'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |                     'format_id': 'http', | 
					
						
							|  |  |  |  |                     'width': int_or_none(file_.get('width')), | 
					
						
							|  |  |  |  |                     'height': int_or_none(file_.get('height')), | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  |                 }) | 
					
						
							|  |  |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         subtitles = {} | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |         for file_ in video.get('files', []): | 
					
						
							|  |  |  |  |             format_url = file_.get('url') | 
					
						
							|  |  |  |  |             format_code = file_.get('formatCode') | 
					
						
							|  |  |  |  |             if not format_url or not format_code: | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             if compat_str(format_code) == '3100': | 
					
						
							|  |  |  |  |                 subtitles.setdefault(file_.get('culture', 'en'), []).append({ | 
					
						
							|  |  |  |  |                     'ext': determine_ext(format_url, 'ttml'), | 
					
						
							|  |  |  |  |                     'url': format_url, | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  |                 }) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         return { | 
					
						
							|  |  |  |  |             'id': video_id, | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |             'display_id': display_id, | 
					
						
							|  |  |  |  |             'title': title, | 
					
						
							|  |  |  |  |             'description': video.get('description'), | 
					
						
							|  |  |  |  |             'thumbnail': video.get('headlineImage', {}).get('url'), | 
					
						
							|  |  |  |  |             'duration': int_or_none(video.get('durationSecs')), | 
					
						
							|  |  |  |  |             'uploader': video.get('sourceFriendly'), | 
					
						
							|  |  |  |  |             'uploader_id': video.get('providerId'), | 
					
						
							|  |  |  |  |             'creator': video.get('creator'), | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  |             'subtitles': subtitles, | 
					
						
							| 
									
										
										
										
											2016-06-26 21:10:05 +07:00
										 |  |  |  |             'formats': formats, | 
					
						
							| 
									
										
										
										
											2016-05-19 20:59:59 +02:00
										 |  |  |  |         } |