| 
									
										
										
										
											2014-01-07 10:04:48 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											2013-09-29 20:49:58 +02:00
										 |  |  | import json | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  | from ..compat import compat_urlparse | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2014-08-10 13:04:45 +02:00
										 |  |  |     int_or_none, | 
					
						
							| 
									
										
										
										
											2016-06-23 15:49:42 +01:00
										 |  |  |     parse_duration, | 
					
						
							|  |  |  |     unified_strdate, | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class AppleTrailersIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2015-12-23 10:40:45 +01:00
										 |  |  |     IE_NAME = 'appletrailers' | 
					
						
							| 
									
										
										
										
											2016-02-20 15:54:00 +08:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' | 
					
						
							| 
									
										
										
										
											2015-02-22 19:58:39 +06:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2015-10-05 20:21:53 +06:00
										 |  |  |         'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', | 
					
						
							| 
									
										
										
										
											2015-02-18 00:49:10 +01:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2016-06-23 15:49:42 +01:00
										 |  |  |             'id': '5111', | 
					
						
							|  |  |  |             'title': 'Man of Steel', | 
					
						
							| 
									
										
										
										
											2015-02-18 00:49:10 +01:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-10-05 20:21:53 +06:00
										 |  |  |         'playlist': [ | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  |             { | 
					
						
							| 
									
										
										
										
											2015-10-05 20:21:53 +06:00
										 |  |  |                 'md5': 'd97a8e575432dbcb81b7c3acb741f8a8', | 
					
						
							|  |  |  |                 'info_dict': { | 
					
						
							|  |  |  |                     'id': 'manofsteel-trailer4', | 
					
						
							|  |  |  |                     'ext': 'mov', | 
					
						
							|  |  |  |                     'duration': 111, | 
					
						
							|  |  |  |                     'title': 'Trailer 4', | 
					
						
							|  |  |  |                     'upload_date': '20130523', | 
					
						
							|  |  |  |                     'uploader_id': 'wb', | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  |                 }, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |             { | 
					
						
							| 
									
										
										
										
											2015-10-05 20:21:53 +06:00
										 |  |  |                 'md5': 'b8017b7131b721fb4e8d6f49e1df908c', | 
					
						
							|  |  |  |                 'info_dict': { | 
					
						
							|  |  |  |                     'id': 'manofsteel-trailer3', | 
					
						
							|  |  |  |                     'ext': 'mov', | 
					
						
							|  |  |  |                     'duration': 182, | 
					
						
							|  |  |  |                     'title': 'Trailer 3', | 
					
						
							|  |  |  |                     'upload_date': '20130417', | 
					
						
							|  |  |  |                     'uploader_id': 'wb', | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  |                 }, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |             { | 
					
						
							| 
									
										
										
										
											2015-10-05 20:21:53 +06:00
										 |  |  |                 'md5': 'd0f1e1150989b9924679b441f3404d48', | 
					
						
							|  |  |  |                 'info_dict': { | 
					
						
							|  |  |  |                     'id': 'manofsteel-trailer', | 
					
						
							|  |  |  |                     'ext': 'mov', | 
					
						
							|  |  |  |                     'duration': 148, | 
					
						
							|  |  |  |                     'title': 'Trailer', | 
					
						
							|  |  |  |                     'upload_date': '20121212', | 
					
						
							|  |  |  |                     'uploader_id': 'wb', | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  |                 }, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |             { | 
					
						
							| 
									
										
										
										
											2015-10-05 20:21:53 +06:00
										 |  |  |                 'md5': '5fe08795b943eb2e757fa95cb6def1cb', | 
					
						
							|  |  |  |                 'info_dict': { | 
					
						
							|  |  |  |                     'id': 'manofsteel-teaser', | 
					
						
							|  |  |  |                     'ext': 'mov', | 
					
						
							|  |  |  |                     'duration': 93, | 
					
						
							|  |  |  |                     'title': 'Teaser', | 
					
						
							|  |  |  |                     'upload_date': '20120721', | 
					
						
							|  |  |  |                     'uploader_id': 'wb', | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  |                 }, | 
					
						
							| 
									
										
										
										
											2014-03-27 21:10:51 +01:00
										 |  |  |             }, | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  |         ] | 
					
						
							| 
									
										
										
										
											2015-12-23 17:48:37 +01:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': 'blackthorn', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist_mincount': 2, | 
					
						
							| 
									
										
										
										
											2016-06-23 15:49:42 +01:00
										 |  |  |         'expected_warnings': ['Unable to download JSON metadata'], | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json | 
					
						
							|  |  |  |         'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '15881', | 
					
						
							|  |  |  |             'title': 'Kung Fu Panda 3', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist_mincount': 4, | 
					
						
							| 
									
										
										
										
											2015-02-22 19:58:39 +06:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://trailers.apple.com/ca/metropole/autrui/', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2016-02-20 15:54:00 +08:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2015-02-22 19:58:39 +06:00
										 |  |  |     }] | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-29 20:49:58 +02:00
										 |  |  |     _JSON_RE = r'iTunes.playURL\((.*?)\);' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         movie = mobj.group('movie') | 
					
						
							|  |  |  |         uploader_id = mobj.group('company') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-23 15:49:42 +01:00
										 |  |  |         webpage = self._download_webpage(url, movie) | 
					
						
							|  |  |  |         film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') | 
					
						
							|  |  |  |         film_data = self._download_json( | 
					
						
							|  |  |  |             'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id, | 
					
						
							|  |  |  |             film_id, fatal=False) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if film_data: | 
					
						
							|  |  |  |             entries = [] | 
					
						
							|  |  |  |             for clip in film_data.get('clips', []): | 
					
						
							|  |  |  |                 clip_title = clip['title'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 formats = [] | 
					
						
							|  |  |  |                 for version, version_data in clip.get('versions', {}).items(): | 
					
						
							|  |  |  |                     for size, size_data in version_data.get('sizes', {}).items(): | 
					
						
							|  |  |  |                         src = size_data.get('src') | 
					
						
							|  |  |  |                         if not src: | 
					
						
							|  |  |  |                             continue | 
					
						
							|  |  |  |                         formats.append({ | 
					
						
							|  |  |  |                             'format_id': '%s-%s' % (version, size), | 
					
						
							|  |  |  |                             'url': re.sub(r'_(\d+p.mov)', r'_h\1', src), | 
					
						
							|  |  |  |                             'width': int_or_none(size_data.get('width')), | 
					
						
							|  |  |  |                             'height': int_or_none(size_data.get('height')), | 
					
						
							|  |  |  |                             'language': version[:2], | 
					
						
							|  |  |  |                         }) | 
					
						
							|  |  |  |                 self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 entries.append({ | 
					
						
							|  |  |  |                     'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(), | 
					
						
							|  |  |  |                     'formats': formats, | 
					
						
							|  |  |  |                     'title': clip_title, | 
					
						
							| 
									
										
										
										
											2016-06-23 19:03:34 +01:00
										 |  |  |                     'thumbnail': clip.get('screen') or clip.get('thumb'), | 
					
						
							| 
									
										
										
										
											2016-06-23 15:49:42 +01:00
										 |  |  |                     'duration': parse_duration(clip.get('runtime') or clip.get('faded')), | 
					
						
							|  |  |  |                     'upload_date': unified_strdate(clip.get('posted')), | 
					
						
							|  |  |  |                     'uploader_id': uploader_id, | 
					
						
							|  |  |  |                 }) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             page_data = film_data.get('page', {}) | 
					
						
							|  |  |  |             return self.playlist_result(entries, film_id, page_data.get('movie_title')) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-27 21:10:51 +01:00
										 |  |  |         playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') | 
					
						
							| 
									
										
										
										
											2014-11-23 20:41:03 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-10 21:03:53 +01:00
										 |  |  |         def fix_html(s): | 
					
						
							| 
									
										
										
										
											2014-03-27 21:10:51 +01:00
										 |  |  |             s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s) | 
					
						
							| 
									
										
										
										
											2015-12-23 14:48:40 +01:00
										 |  |  |             s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s) | 
					
						
							| 
									
										
										
										
											2013-12-10 21:03:53 +01:00
										 |  |  |             # The ' in the onClick attributes are not escaped, it couldn't be parsed | 
					
						
							|  |  |  |             # like: http://trailers.apple.com/trailers/wb/gravity/ | 
					
						
							| 
									
										
										
										
											2014-11-23 20:41:03 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-10 21:03:53 +01:00
										 |  |  |             def _clean_json(m): | 
					
						
							| 
									
										
										
										
											2014-03-27 21:10:51 +01:00
										 |  |  |                 return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') | 
					
						
							| 
									
										
										
										
											2013-12-10 21:03:53 +01:00
										 |  |  |             s = re.sub(self._JSON_RE, _clean_json, s) | 
					
						
							| 
									
										
										
										
											2014-11-26 12:41:24 +01:00
										 |  |  |             s = '<html>%s</html>' % s | 
					
						
							| 
									
										
										
										
											2013-12-10 21:03:53 +01:00
										 |  |  |             return s | 
					
						
							|  |  |  |         doc = self._download_xml(playlist_url, movie, transform_source=fix_html) | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         playlist = [] | 
					
						
							|  |  |  |         for li in doc.findall('./div/ul/li'): | 
					
						
							| 
									
										
										
										
											2013-09-29 20:49:58 +02:00
										 |  |  |             on_click = li.find('.//a').attrib['onClick'] | 
					
						
							|  |  |  |             trailer_info_json = self._search_regex(self._JSON_RE, | 
					
						
							| 
									
										
										
										
											2014-11-23 21:39:15 +01:00
										 |  |  |                                                    on_click, 'trailer info') | 
					
						
							| 
									
										
										
										
											2013-09-29 20:49:58 +02:00
										 |  |  |             trailer_info = json.loads(trailer_info_json) | 
					
						
							| 
									
										
										
										
											2015-12-23 17:48:37 +01:00
										 |  |  |             first_url = trailer_info.get('url') | 
					
						
							|  |  |  |             if not first_url: | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2013-09-29 20:49:58 +02:00
										 |  |  |             title = trailer_info['title'] | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  |             video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() | 
					
						
							|  |  |  |             thumbnail = li.find('.//img').attrib['src'] | 
					
						
							| 
									
										
										
										
											2013-09-29 20:49:58 +02:00
										 |  |  |             upload_date = trailer_info['posted'].replace('-', '') | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-29 20:49:58 +02:00
										 |  |  |             runtime = trailer_info['runtime'] | 
					
						
							|  |  |  |             m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime) | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  |             duration = None | 
					
						
							|  |  |  |             if m: | 
					
						
							|  |  |  |                 duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-29 20:59:19 +02:00
										 |  |  |             trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() | 
					
						
							| 
									
										
										
										
											2013-09-29 20:49:58 +02:00
										 |  |  |             settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) | 
					
						
							| 
									
										
										
										
											2014-03-27 21:10:51 +01:00
										 |  |  |             settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-29 20:49:58 +02:00
										 |  |  |             formats = [] | 
					
						
							|  |  |  |             for format in settings['metadata']['sizes']: | 
					
						
							|  |  |  |                 # The src is a file pointing to the real video file | 
					
						
							|  |  |  |                 format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src']) | 
					
						
							|  |  |  |                 formats.append({ | 
					
						
							|  |  |  |                     'url': format_url, | 
					
						
							|  |  |  |                     'format': format['type'], | 
					
						
							| 
									
										
										
										
											2014-08-10 13:04:45 +02:00
										 |  |  |                     'width': int_or_none(format['width']), | 
					
						
							|  |  |  |                     'height': int_or_none(format['height']), | 
					
						
							| 
									
										
										
										
											2013-09-29 20:49:58 +02:00
										 |  |  |                 }) | 
					
						
							| 
									
										
										
										
											2013-12-25 15:24:41 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |             self._sort_formats(formats) | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-03 14:21:06 +01:00
										 |  |  |             playlist.append({ | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  |                 '_type': 'video', | 
					
						
							|  |  |  |                 'id': video_id, | 
					
						
							|  |  |  |                 'formats': formats, | 
					
						
							|  |  |  |                 'title': title, | 
					
						
							|  |  |  |                 'duration': duration, | 
					
						
							|  |  |  |                 'thumbnail': thumbnail, | 
					
						
							|  |  |  |                 'upload_date': upload_date, | 
					
						
							|  |  |  |                 'uploader_id': uploader_id, | 
					
						
							| 
									
										
										
										
											2015-01-24 18:19:58 +01:00
										 |  |  |                 'http_headers': { | 
					
						
							|  |  |  |                     'User-Agent': 'QuickTime compatible (youtube-dl)', | 
					
						
							|  |  |  |                 }, | 
					
						
							| 
									
										
										
										
											2013-12-03 14:21:06 +01:00
										 |  |  |             }) | 
					
						
							| 
									
										
										
										
											2013-08-28 02:18:44 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             '_type': 'playlist', | 
					
						
							|  |  |  |             'id': movie, | 
					
						
							|  |  |  |             'entries': playlist, | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2015-12-23 10:40:45 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class AppleTrailersSectionIE(InfoExtractor): | 
					
						
							|  |  |  |     IE_NAME = 'appletrailers:section' | 
					
						
							|  |  |  |     _SECTIONS = { | 
					
						
							|  |  |  |         'justadded': { | 
					
						
							|  |  |  |             'feed_path': 'just_added', | 
					
						
							|  |  |  |             'title': 'Just Added', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'exclusive': { | 
					
						
							|  |  |  |             'feed_path': 'exclusive', | 
					
						
							|  |  |  |             'title': 'Exclusive', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'justhd': { | 
					
						
							|  |  |  |             'feed_path': 'just_hd', | 
					
						
							|  |  |  |             'title': 'Just HD', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'mostpopular': { | 
					
						
							|  |  |  |             'feed_path': 'most_pop', | 
					
						
							|  |  |  |             'title': 'Most Popular', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'moviestudios': { | 
					
						
							|  |  |  |             'feed_path': 'studios', | 
					
						
							|  |  |  |             'title': 'Movie Studios', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS) | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         'url': 'http://trailers.apple.com/#section=justadded', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'title': 'Just Added', | 
					
						
							|  |  |  |             'id': 'justadded', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist_mincount': 80, | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://trailers.apple.com/#section=exclusive', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'title': 'Exclusive', | 
					
						
							|  |  |  |             'id': 'exclusive', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist_mincount': 80, | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://trailers.apple.com/#section=justhd', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'title': 'Just HD', | 
					
						
							|  |  |  |             'id': 'justhd', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist_mincount': 80, | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://trailers.apple.com/#section=mostpopular', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'title': 'Most Popular', | 
					
						
							|  |  |  |             'id': 'mostpopular', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist_mincount': 80, | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://trailers.apple.com/#section=moviestudios', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'title': 'Movie Studios', | 
					
						
							|  |  |  |             'id': 'moviestudios', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist_mincount': 80, | 
					
						
							|  |  |  |     }] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         section = self._match_id(url) | 
					
						
							|  |  |  |         section_data = self._download_json( | 
					
						
							|  |  |  |             'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'], | 
					
						
							|  |  |  |             section) | 
					
						
							|  |  |  |         entries = [ | 
					
						
							|  |  |  |             self.url_result('http://trailers.apple.com' + e['location']) | 
					
						
							|  |  |  |             for e in section_data] | 
					
						
							|  |  |  |         return self.playlist_result(entries, section, self._SECTIONS[section]['title']) |