| 
									
										
										
										
											2014-02-06 21:16:41 +07:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2016-03-26 02:19:24 +06:00
										 |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |     clean_html, | 
					
						
							|  |  |  |     determine_ext, | 
					
						
							|  |  |  |     int_or_none, | 
					
						
							|  |  |  |     qualities, | 
					
						
							| 
									
										
										
										
											2016-03-26 02:19:24 +06:00
										 |  |  |     urlencode_postdata, | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |     xpath_text, | 
					
						
							| 
									
										
										
										
											2016-03-26 02:19:24 +06:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2014-02-06 21:16:41 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class NFBIE(InfoExtractor): | 
					
						
							|  |  |  |     IE_NAME = 'nfb' | 
					
						
							|  |  |  |     IE_DESC = 'National Film Board of Canada' | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)' | 
					
						
							| 
									
										
										
										
											2014-02-06 21:16:41 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     _TEST = { | 
					
						
							|  |  |  |         'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': 'qallunaat_why_white_people_are_funny', | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |             'ext': 'flv', | 
					
						
							| 
									
										
										
										
											2014-02-06 21:16:41 +07:00
										 |  |  |             'title': 'Qallunaat! Why White People Are Funny ', | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |             'description': 'md5:6b8e32dde3abf91e58857b174916620c', | 
					
						
							| 
									
										
										
										
											2014-02-06 21:16:41 +07:00
										 |  |  |             'duration': 3128, | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |             'creator': 'Mark Sandiford', | 
					
						
							| 
									
										
										
										
											2014-02-06 21:16:41 +07:00
										 |  |  |             'uploader': 'Mark Sandiford', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'params': { | 
					
						
							|  |  |  |             # rtmp download | 
					
						
							|  |  |  |             'skip_download': True, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  |         video_id = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2014-02-06 21:16:41 +07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |         config = self._download_xml( | 
					
						
							| 
									
										
										
										
											2015-11-21 22:18:17 +06:00
										 |  |  |             'https://www.nfb.ca/film/%s/player_config' % video_id, | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |             video_id, 'Downloading player config XML', | 
					
						
							|  |  |  |             data=urlencode_postdata({'getConfig': 'true'}), | 
					
						
							|  |  |  |             headers={ | 
					
						
							|  |  |  |                 'Content-Type': 'application/x-www-form-urlencoded', | 
					
						
							|  |  |  |                 'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf' | 
					
						
							|  |  |  |             }) | 
					
						
							| 
									
										
										
										
											2014-02-06 21:16:41 +07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |         title, description, thumbnail, duration, uploader, author = [None] * 6 | 
					
						
							|  |  |  |         thumbnails, formats = [[]] * 2 | 
					
						
							|  |  |  |         subtitles = {} | 
					
						
							| 
									
										
										
										
											2014-02-07 19:23:53 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  |         for media in config.findall('./player/stream/media'): | 
					
						
							|  |  |  |             if media.get('type') == 'posterImage': | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |                 quality_key = qualities(('low', 'high')) | 
					
						
							|  |  |  |                 thumbnails = [] | 
					
						
							|  |  |  |                 for asset in media.findall('assets/asset'): | 
					
						
							|  |  |  |                     asset_url = xpath_text(asset, 'default/url', default=None) | 
					
						
							|  |  |  |                     if not asset_url: | 
					
						
							|  |  |  |                         continue | 
					
						
							|  |  |  |                     quality = asset.get('quality') | 
					
						
							|  |  |  |                     thumbnails.append({ | 
					
						
							|  |  |  |                         'url': asset_url, | 
					
						
							|  |  |  |                         'id': quality, | 
					
						
							|  |  |  |                         'preference': quality_key(quality), | 
					
						
							|  |  |  |                     }) | 
					
						
							| 
									
										
										
										
											2014-02-07 19:23:53 +07:00
										 |  |  |             elif media.get('type') == 'video': | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |                 title = xpath_text(media, 'title', fatal=True) | 
					
						
							| 
									
										
										
										
											2014-05-08 22:07:14 +07:00
										 |  |  |                 for asset in media.findall('assets/asset'): | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |                     quality = asset.get('quality') | 
					
						
							|  |  |  |                     height = int_or_none(self._search_regex( | 
					
						
							|  |  |  |                         r'^(\d+)[pP]$', quality or '', 'height', default=None)) | 
					
						
							|  |  |  |                     for node in asset: | 
					
						
							|  |  |  |                         streamer = xpath_text(node, 'streamerURI', default=None) | 
					
						
							|  |  |  |                         if not streamer: | 
					
						
							|  |  |  |                             continue | 
					
						
							|  |  |  |                         play_path = xpath_text(node, 'url', default=None) | 
					
						
							|  |  |  |                         if not play_path: | 
					
						
							|  |  |  |                             continue | 
					
						
							| 
									
										
										
										
											2014-05-08 22:07:14 +07:00
										 |  |  |                         formats.append({ | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |                             'url': streamer, | 
					
						
							|  |  |  |                             'app': streamer.split('/', 3)[3], | 
					
						
							|  |  |  |                             'play_path': play_path, | 
					
						
							| 
									
										
										
										
											2014-05-08 22:07:14 +07:00
										 |  |  |                             'rtmp_live': False, | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |                             'ext': 'flv', | 
					
						
							|  |  |  |                             'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag, | 
					
						
							|  |  |  |                             'height': height, | 
					
						
							| 
									
										
										
										
											2014-05-08 22:07:14 +07:00
										 |  |  |                         }) | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |                 self._sort_formats(formats) | 
					
						
							|  |  |  |                 description = clean_html(xpath_text(media, 'description')) | 
					
						
							|  |  |  |                 uploader = xpath_text(media, 'author') | 
					
						
							|  |  |  |                 duration = int_or_none(media.get('duration')) | 
					
						
							|  |  |  |                 for subtitle in media.findall('./subtitles/subtitle'): | 
					
						
							|  |  |  |                     subtitle_url = xpath_text(subtitle, 'url', default=None) | 
					
						
							|  |  |  |                     if not subtitle_url: | 
					
						
							|  |  |  |                         continue | 
					
						
							|  |  |  |                     lang = xpath_text(subtitle, 'lang', default='en') | 
					
						
							|  |  |  |                     subtitles.setdefault(lang, []).append({ | 
					
						
							|  |  |  |                         'url': subtitle_url, | 
					
						
							|  |  |  |                         'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(), | 
					
						
							|  |  |  |                     }) | 
					
						
							| 
									
										
										
										
											2014-02-06 21:16:41 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							|  |  |  |             'description': description, | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |             'thumbnails': thumbnails, | 
					
						
							| 
									
										
										
										
											2014-02-06 21:16:41 +07:00
										 |  |  |             'duration': duration, | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |             'creator': uploader, | 
					
						
							| 
									
										
										
										
											2014-02-06 21:16:41 +07:00
										 |  |  |             'uploader': uploader, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							| 
									
										
										
										
											2016-05-18 00:25:15 +06:00
										 |  |  |             'subtitles': subtitles, | 
					
						
							| 
									
										
										
										
											2014-11-23 20:41:03 +01:00
										 |  |  |         } |