| 
									
										
										
										
											2016-05-17 15:38:57 +08:00
										 |  |  | # coding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import calendar | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | import time | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .amp import AMPIE | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | from ..compat import compat_urlparse | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class AbcNewsVideoIE(AMPIE): | 
					
						
							|  |  |  |     IE_NAME = 'abcnews:video' | 
					
						
							| 
									
										
										
										
											2017-04-26 21:21:17 +02:00
										 |  |  |     _VALID_URL = r'''(?x)
 | 
					
						
							| 
									
										
										
										
											2017-05-28 07:05:50 +07:00
										 |  |  |                     https?:// | 
					
						
							|  |  |  |                         abcnews\.go\.com/ | 
					
						
							|  |  |  |                         (?: | 
					
						
							|  |  |  |                             [^/]+/video/(?P<display_id>[0-9a-z-]+)-| | 
					
						
							|  |  |  |                             video/embed\?.*?\bid= | 
					
						
							|  |  |  |                         ) | 
					
						
							|  |  |  |                         (?P<id>\d+) | 
					
						
							|  |  |  |                     '''
 | 
					
						
							| 
									
										
										
										
											2016-05-17 15:38:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '20411932', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'display_id': 'week-exclusive-irans-foreign-minister-zarif', | 
					
						
							|  |  |  |             'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif', | 
					
						
							|  |  |  |             'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.', | 
					
						
							|  |  |  |             'duration': 180, | 
					
						
							| 
									
										
										
										
											2017-01-02 20:08:07 +08:00
										 |  |  |             'thumbnail': r're:^https?://.*\.jpg$', | 
					
						
							| 
									
										
										
										
											2016-05-17 15:38:57 +08:00
										 |  |  |         }, | 
					
						
							|  |  |  |         'params': { | 
					
						
							|  |  |  |             # m3u8 download | 
					
						
							|  |  |  |             'skip_download': True, | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2017-04-26 21:21:17 +02:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://abcnews.go.com/video/embed?id=46979033', | 
					
						
							| 
									
										
										
										
											2017-05-28 07:05:50 +07:00
										 |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2016-05-17 15:38:57 +08:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         display_id = mobj.group('display_id') | 
					
						
							|  |  |  |         video_id = mobj.group('id') | 
					
						
							|  |  |  |         info_dict = self._extract_feed_info( | 
					
						
							|  |  |  |             'http://abcnews.go.com/video/itemfeed?id=%s' % video_id) | 
					
						
							|  |  |  |         info_dict.update({ | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'display_id': display_id, | 
					
						
							|  |  |  |         }) | 
					
						
							|  |  |  |         return info_dict | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class AbcNewsIE(InfoExtractor): | 
					
						
							|  |  |  |     IE_NAME = 'abcnews' | 
					
						
							| 
									
										
										
										
											2016-09-08 17:04:57 +07:00
										 |  |  |     _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)' | 
					
						
							| 
									
										
										
										
											2016-05-17 15:38:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '10498713', | 
					
						
							|  |  |  |             'ext': 'flv', | 
					
						
							|  |  |  |             'display_id': 'dramatic-video-rare-death-job-america', | 
					
						
							|  |  |  |             'title': 'Occupational Hazards', | 
					
						
							|  |  |  |             'description': 'Nightline investigates the dangers that lurk at various jobs.', | 
					
						
							| 
									
										
										
										
											2017-01-02 20:08:07 +08:00
										 |  |  |             'thumbnail': r're:^https?://.*\.jpg$', | 
					
						
							| 
									
										
										
										
											2016-05-17 15:38:57 +08:00
										 |  |  |             'upload_date': '20100428', | 
					
						
							|  |  |  |             'timestamp': 1272412800, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'add_ie': ['AbcNewsVideo'], | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '39125818', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016', | 
					
						
							|  |  |  |             'title': 'Justin Timberlake Drops Hints For Secret Single', | 
					
						
							|  |  |  |             'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.', | 
					
						
							|  |  |  |             'upload_date': '20160515', | 
					
						
							|  |  |  |             'timestamp': 1463329500, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'params': { | 
					
						
							|  |  |  |             # m3u8 download | 
					
						
							|  |  |  |             'skip_download': True, | 
					
						
							|  |  |  |             # The embedded YouTube video is blocked due to copyright issues | 
					
						
							|  |  |  |             'playlist_items': '1', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'add_ie': ['AbcNewsVideo'], | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         display_id = mobj.group('display_id') | 
					
						
							|  |  |  |         video_id = mobj.group('id') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         webpage = self._download_webpage(url, video_id) | 
					
						
							|  |  |  |         video_url = self._search_regex( | 
					
						
							|  |  |  |             r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL') | 
					
						
							|  |  |  |         full_video_url = compat_urlparse.urljoin(url, video_url) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         youtube_url = self._html_search_regex( | 
					
						
							|  |  |  |             r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"', | 
					
						
							|  |  |  |             webpage, 'YouTube URL', default=None) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         timestamp = None | 
					
						
							|  |  |  |         date_str = self._html_search_regex( | 
					
						
							|  |  |  |             r'<span[^>]+class="timestamp">([^<]+)</span>', | 
					
						
							|  |  |  |             webpage, 'timestamp', fatal=False) | 
					
						
							|  |  |  |         if date_str: | 
					
						
							|  |  |  |             tz_offset = 0 | 
					
						
							|  |  |  |             if date_str.endswith(' ET'):  # Eastern Time | 
					
						
							|  |  |  |                 tz_offset = -5 | 
					
						
							|  |  |  |                 date_str = date_str[:-3] | 
					
						
							|  |  |  |             date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p'] | 
					
						
							|  |  |  |             for date_format in date_formats: | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format)) | 
					
						
							|  |  |  |                 except ValueError: | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |             if timestamp is not None: | 
					
						
							|  |  |  |                 timestamp -= tz_offset * 3600 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         entry = { | 
					
						
							|  |  |  |             '_type': 'url_transparent', | 
					
						
							|  |  |  |             'ie_key': AbcNewsVideoIE.ie_key(), | 
					
						
							|  |  |  |             'url': full_video_url, | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'display_id': display_id, | 
					
						
							|  |  |  |             'timestamp': timestamp, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if youtube_url: | 
					
						
							|  |  |  |             entries = [entry, self.url_result(youtube_url, 'Youtube')] | 
					
						
							|  |  |  |             return self.playlist_result(entries) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return entry |