| 
									
										
										
										
											2014-01-07 10:04:48 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  | from ..utils import unified_strdate | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class ArchiveOrgIE(InfoExtractor): | 
					
						
							|  |  |  |     IE_NAME = 'archive.org' | 
					
						
							|  |  |  |     IE_DESC = 'archive.org videos' | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$' | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect', | 
					
						
							| 
									
										
										
										
											2014-01-07 10:04:48 +01:00
										 |  |  |         'md5': '8af1d4cf447933ed3c7f4871162602db', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |             'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect', | 
					
						
							|  |  |  |             'ext': 'ogv', | 
					
						
							|  |  |  |             'title': '1968 Demo - FJCC Conference Presentation Reel #1', | 
					
						
							|  |  |  |             'description': 'md5:1780b464abaca9991d8968c877bb53ed', | 
					
						
							|  |  |  |             'upload_date': '19681210', | 
					
						
							|  |  |  |             'uploader': 'SRI International' | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://archive.org/details/Cops1922', | 
					
						
							|  |  |  |         'md5': '18f2a19e6d89af8425671da1cf3d4e04', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': 'Cops1922', | 
					
						
							|  |  |  |             'ext': 'ogv', | 
					
						
							|  |  |  |             'title': 'Buster Keaton\'s "Cops" (1922)', | 
					
						
							|  |  |  |             'description': 'md5:70f72ee70882f713d4578725461ffcc3', | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2014-12-28 20:04:21 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |         video_id = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-06 11:48:36 +01:00
										 |  |  |         json_url = url + ('&' if '?' in url else '?') + 'output=json' | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |         data = self._download_json(json_url, video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def get_optional(data_dict, field): | 
					
						
							|  |  |  |             return data_dict['metadata'].get(field, [None])[0] | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |         title = get_optional(data, 'title') | 
					
						
							|  |  |  |         description = get_optional(data, 'description') | 
					
						
							|  |  |  |         uploader = get_optional(data, 'creator') | 
					
						
							|  |  |  |         upload_date = unified_strdate(get_optional(data, 'date')) | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-07 10:16:22 +01:00
										 |  |  |         formats = [ | 
					
						
							|  |  |  |             { | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  |                 'format': fdata['format'], | 
					
						
							|  |  |  |                 'url': 'http://' + data['server'] + data['dir'] + fn, | 
					
						
							|  |  |  |                 'file_size': int(fdata['size']), | 
					
						
							|  |  |  |             } | 
					
						
							| 
									
										
										
										
											2014-01-07 10:16:22 +01:00
										 |  |  |             for fn, fdata in data['files'].items() | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  |             if 'Video' in fdata['format']] | 
					
						
							| 
									
										
										
										
											2014-01-07 10:16:22 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self._sort_formats(formats) | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-03 14:21:06 +01:00
										 |  |  |         return { | 
					
						
							| 
									
										
										
										
											2013-07-11 12:12:23 +02:00
										 |  |  |             '_type': 'video', | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							|  |  |  |             'description': description, | 
					
						
							|  |  |  |             'uploader': uploader, | 
					
						
							|  |  |  |             'upload_date': upload_date, | 
					
						
							| 
									
										
										
										
											2013-12-03 14:21:06 +01:00
										 |  |  |             'thumbnail': data.get('misc', {}).get('image'), | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  |         } |