| 
									
										
										
										
											2016-10-02 13:39:18 +02:00
										 |  |  | # coding: utf-8 | 
					
						
							| 
									
										
										
										
											2014-02-04 23:15:04 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-06-23 22:28:19 +02:00
										 |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2019-03-27 18:29:24 +01:00
										 |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2019-03-27 18:49:29 +01:00
										 |  |  |     determine_ext, | 
					
						
							| 
									
										
										
										
											2019-03-27 18:29:24 +01:00
										 |  |  |     int_or_none, | 
					
						
							|  |  |  |     strip_or_none, | 
					
						
							|  |  |  |     xpath_attr, | 
					
						
							|  |  |  |     xpath_text, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2013-06-23 22:28:19 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class InaIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2019-03-27 18:49:29 +01:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)' | 
					
						
							| 
									
										
										
										
											2019-03-27 18:29:24 +01:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2014-02-04 23:15:04 +01:00
										 |  |  |         'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', | 
					
						
							|  |  |  |         'md5': 'a667021bf2b41f8dc6049479d9bb38a3', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': 'I12055569', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'François Hollande "Je crois que c\'est clair"', | 
					
						
							| 
									
										
										
										
											2019-03-27 18:29:24 +01:00
										 |  |  |             'description': 'md5:3f09eb072a06cb286b8f7e4f77109663', | 
					
						
							| 
									
										
										
										
											2013-06-27 20:46:46 +02:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2019-03-27 18:29:24 +01:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://www.ina.fr/video/S806544_001/don-d-organes-des-avancees-mais-d-importants-besoins-video.html', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2019-03-27 18:49:29 +01:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://www.ina.fr/audio/P16173408', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://www.ina.fr/video/P16173408-video.html', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2019-03-27 18:29:24 +01:00
										 |  |  |     }] | 
					
						
							| 
									
										
										
										
											2013-06-23 22:28:19 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-02-04 23:15:04 +01:00
										 |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2019-03-27 18:29:24 +01:00
										 |  |  |         video_id = self._match_id(url) | 
					
						
							|  |  |  |         info_doc = self._download_xml( | 
					
						
							|  |  |  |             'http://player.ina.fr/notices/%s.mrss' % video_id, video_id) | 
					
						
							|  |  |  |         item = info_doc.find('channel/item') | 
					
						
							|  |  |  |         title = xpath_text(item, 'title', fatal=True) | 
					
						
							|  |  |  |         media_ns_xpath = lambda x: self._xpath_ns(x, 'http://search.yahoo.com/mrss/') | 
					
						
							|  |  |  |         content = item.find(media_ns_xpath('content')) | 
					
						
							| 
									
										
										
										
											2013-06-23 22:28:19 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-03-27 18:29:24 +01:00
										 |  |  |         get_furl = lambda x: xpath_attr(content, media_ns_xpath(x), 'url') | 
					
						
							|  |  |  |         formats = [] | 
					
						
							|  |  |  |         for q, w, h in (('bq', 400, 300), ('mq', 512, 384), ('hq', 768, 576)): | 
					
						
							|  |  |  |             q_url = get_furl(q) | 
					
						
							|  |  |  |             if not q_url: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             formats.append({ | 
					
						
							|  |  |  |                 'format_id': q, | 
					
						
							|  |  |  |                 'url': q_url, | 
					
						
							|  |  |  |                 'width': w, | 
					
						
							|  |  |  |                 'height': h, | 
					
						
							|  |  |  |             }) | 
					
						
							|  |  |  |         if not formats: | 
					
						
							| 
									
										
										
										
											2019-03-27 18:49:29 +01:00
										 |  |  |             furl = get_furl('player') or content.attrib['url'] | 
					
						
							|  |  |  |             ext = determine_ext(furl) | 
					
						
							| 
									
										
										
										
											2019-03-27 18:29:24 +01:00
										 |  |  |             formats = [{ | 
					
						
							| 
									
										
										
										
											2019-03-27 18:49:29 +01:00
										 |  |  |                 'url': furl, | 
					
						
							|  |  |  |                 'vcodec': 'none' if ext == 'mp3' else None, | 
					
						
							|  |  |  |                 'ext': ext, | 
					
						
							| 
									
										
										
										
											2019-03-27 18:29:24 +01:00
										 |  |  |             }] | 
					
						
							| 
									
										
										
										
											2013-06-23 22:28:19 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-03-27 18:29:24 +01:00
										 |  |  |         thumbnails = [] | 
					
						
							|  |  |  |         for thumbnail in content.findall(media_ns_xpath('thumbnail')): | 
					
						
							|  |  |  |             thumbnail_url = thumbnail.get('url') | 
					
						
							|  |  |  |             if not thumbnail_url: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             thumbnails.append({ | 
					
						
							|  |  |  |                 'url': thumbnail_url, | 
					
						
							|  |  |  |                 'height': int_or_none(thumbnail.get('height')), | 
					
						
							|  |  |  |                 'width': int_or_none(thumbnail.get('width')), | 
					
						
							|  |  |  |             }) | 
					
						
							| 
									
										
										
										
											2013-06-23 22:28:19 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-02-04 23:15:04 +01:00
										 |  |  |         return { | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							| 
									
										
										
										
											2019-03-27 18:29:24 +01:00
										 |  |  |             'formats': formats, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							|  |  |  |             'description': strip_or_none(xpath_text(item, 'description')), | 
					
						
							|  |  |  |             'thumbnails': thumbnails, | 
					
						
							| 
									
										
										
										
											2014-02-04 23:15:04 +01:00
										 |  |  |         } |