| 
									
										
										
										
											2014-01-07 10:04:48 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-16 23:42:36 +08:00
										 |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2016-08-05 16:40:21 +01:00
										 |  |  | from ..utils import ( | 
					
						
							|  |  |  |     unified_strdate, | 
					
						
							|  |  |  |     clean_html, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-16 23:42:36 +08:00
										 |  |  | class ArchiveOrgIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  |     IE_NAME = 'archive.org' | 
					
						
							|  |  |  |     IE_DESC = 'archive.org videos' | 
					
						
							| 
									
										
										
										
											2016-08-05 16:40:21 +01:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$' | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect', | 
					
						
							| 
									
										
										
										
											2014-01-07 10:04:48 +01:00
										 |  |  |         'md5': '8af1d4cf447933ed3c7f4871162602db', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |             'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect', | 
					
						
							| 
									
										
										
										
											2016-08-05 16:40:21 +01:00
										 |  |  |             'ext': 'ogg', | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |             'title': '1968 Demo - FJCC Conference Presentation Reel #1', | 
					
						
							| 
									
										
										
										
											2016-08-05 16:40:21 +01:00
										 |  |  |             'description': 'md5:da45c349df039f1cc8075268eb1b5c25', | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |             'upload_date': '19681210', | 
					
						
							|  |  |  |             'uploader': 'SRI International' | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://archive.org/details/Cops1922', | 
					
						
							| 
									
										
										
										
											2017-04-27 22:48:32 +02:00
										 |  |  |         'md5': '0869000b4ce265e8ca62738b336b268a', | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': 'Cops1922', | 
					
						
							| 
									
										
										
										
											2016-08-05 16:40:21 +01:00
										 |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |             'title': 'Buster Keaton\'s "Cops" (1922)', | 
					
						
							| 
									
										
										
										
											2017-04-27 22:48:32 +02:00
										 |  |  |             'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6', | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2016-08-05 16:40:21 +01:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |     }] | 
					
						
							| 
									
										
										
										
											2014-12-28 20:04:21 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2014-12-29 02:08:46 +06:00
										 |  |  |         video_id = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2016-08-05 16:40:21 +01:00
										 |  |  |         webpage = self._download_webpage( | 
					
						
							|  |  |  |             'http://archive.org/embed/' + video_id, video_id) | 
					
						
							|  |  |  |         jwplayer_playlist = self._parse_json(self._search_regex( | 
					
						
							| 
									
										
										
										
											2018-03-05 22:30:32 +07:00
										 |  |  |             r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)", | 
					
						
							| 
									
										
										
										
											2016-08-05 16:40:21 +01:00
										 |  |  |             webpage, 'jwplayer playlist'), video_id) | 
					
						
							|  |  |  |         info = self._parse_jwplayer_data( | 
					
						
							|  |  |  |             {'playlist': jwplayer_playlist}, video_id, base_url=url) | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-08-05 16:40:21 +01:00
										 |  |  |         def get_optional(metadata, field): | 
					
						
							|  |  |  |             return metadata.get(field, [None])[0] | 
					
						
							| 
									
										
										
										
											2013-07-08 02:04:11 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-08-05 16:40:21 +01:00
										 |  |  |         metadata = self._download_json( | 
					
						
							|  |  |  |             'http://archive.org/details/' + video_id, video_id, query={ | 
					
						
							|  |  |  |                 'output': 'json', | 
					
						
							|  |  |  |             })['metadata'] | 
					
						
							|  |  |  |         info.update({ | 
					
						
							|  |  |  |             'title': get_optional(metadata, 'title') or info.get('title'), | 
					
						
							|  |  |  |             'description': clean_html(get_optional(metadata, 'description')), | 
					
						
							|  |  |  |         }) | 
					
						
							|  |  |  |         if info.get('_type') != 'playlist': | 
					
						
							|  |  |  |             info.update({ | 
					
						
							|  |  |  |                 'uploader': get_optional(metadata, 'creator'), | 
					
						
							|  |  |  |                 'upload_date': unified_strdate(get_optional(metadata, 'date')), | 
					
						
							|  |  |  |             }) | 
					
						
							| 
									
										
										
										
											2016-08-05 23:16:19 +07:00
										 |  |  |         return info |