| 
									
										
										
										
											2015-12-21 03:05:34 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-29 07:53:21 +01:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-24 01:13:23 +01:00
										 |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2015-07-18 23:31:14 +01:00
										 |  |  | from ..utils import ( | 
					
						
							|  |  |  |     ExtractorError, | 
					
						
							| 
									
										
										
										
											2015-12-21 03:05:34 +01:00
										 |  |  |     int_or_none, | 
					
						
							| 
									
										
										
										
											2017-02-05 21:41:08 +08:00
										 |  |  |     lowercase_escape, | 
					
						
							| 
									
										
										
										
											2015-07-18 23:31:14 +01:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2015-06-24 01:13:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-21 03:05:34 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | class GoogleDriveIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2016-03-12 00:36:39 +06:00
										 |  |  |     _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})' | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2015-12-21 03:05:34 +01:00
										 |  |  |         'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', | 
					
						
							| 
									
										
										
										
											2017-02-05 21:41:08 +08:00
										 |  |  |         'md5': 'd109872761f7e7ecf353fa108c0dbe1e', | 
					
						
							| 
									
										
										
										
											2015-06-29 07:53:21 +01:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2015-12-21 03:05:34 +01:00
										 |  |  |             'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ', | 
					
						
							| 
									
										
										
										
											2015-06-29 07:53:21 +01:00
										 |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2015-12-21 03:05:34 +01:00
										 |  |  |             'title': 'Big Buck Bunny.mp4', | 
					
						
							| 
									
										
										
										
											2017-02-05 21:41:08 +08:00
										 |  |  |             'duration': 45, | 
					
						
							| 
									
										
										
										
											2015-06-29 07:53:21 +01:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2016-03-12 00:36:39 +06:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         # video id is longer than 28 characters | 
					
						
							|  |  |  |         'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2015-12-21 03:05:34 +01:00
										 |  |  |     _FORMATS_EXT = { | 
					
						
							|  |  |  |         '5': 'flv', | 
					
						
							|  |  |  |         '6': 'flv', | 
					
						
							|  |  |  |         '13': '3gp', | 
					
						
							|  |  |  |         '17': '3gp', | 
					
						
							|  |  |  |         '18': 'mp4', | 
					
						
							|  |  |  |         '22': 'mp4', | 
					
						
							|  |  |  |         '34': 'flv', | 
					
						
							|  |  |  |         '35': 'flv', | 
					
						
							|  |  |  |         '36': '3gp', | 
					
						
							|  |  |  |         '37': 'mp4', | 
					
						
							|  |  |  |         '38': 'mp4', | 
					
						
							|  |  |  |         '43': 'webm', | 
					
						
							|  |  |  |         '44': 'webm', | 
					
						
							|  |  |  |         '45': 'webm', | 
					
						
							|  |  |  |         '46': 'webm', | 
					
						
							|  |  |  |         '59': 'mp4', | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2015-06-29 07:53:21 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def _extract_url(webpage): | 
					
						
							|  |  |  |         mobj = re.search( | 
					
						
							| 
									
										
										
										
											2016-03-12 00:36:39 +06:00
										 |  |  |             r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28,})', | 
					
						
							| 
									
										
										
										
											2015-06-29 07:53:21 +01:00
										 |  |  |             webpage) | 
					
						
							|  |  |  |         if mobj: | 
					
						
							|  |  |  |             return 'https://drive.google.com/file/d/%s' % mobj.group('id') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         video_id = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2015-12-21 03:05:34 +01:00
										 |  |  |         webpage = self._download_webpage( | 
					
						
							| 
									
										
										
										
											2017-02-05 21:41:08 +08:00
										 |  |  |             'http://docs.google.com/file/d/%s' % video_id, video_id) | 
					
						
							| 
									
										
										
										
											2015-06-29 07:53:21 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-21 03:05:34 +01:00
										 |  |  |         reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) | 
					
						
							|  |  |  |         if reason: | 
					
						
							|  |  |  |             raise ExtractorError(reason) | 
					
						
							| 
									
										
										
										
											2015-06-24 01:13:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-21 03:05:34 +01:00
										 |  |  |         title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title') | 
					
						
							|  |  |  |         duration = int_or_none(self._search_regex( | 
					
						
							|  |  |  |             r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None)) | 
					
						
							|  |  |  |         fmt_stream_map = self._search_regex( | 
					
						
							|  |  |  |             r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',') | 
					
						
							|  |  |  |         fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') | 
					
						
							| 
									
										
										
										
											2015-06-24 01:13:23 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         formats = [] | 
					
						
							| 
									
										
										
										
											2015-12-21 03:05:34 +01:00
										 |  |  |         for fmt, fmt_stream in zip(fmt_list, fmt_stream_map): | 
					
						
							|  |  |  |             fmt_id, fmt_url = fmt_stream.split('|') | 
					
						
							|  |  |  |             resolution = fmt.split('/')[1] | 
					
						
							| 
									
										
										
										
											2015-06-24 01:13:23 +01:00
										 |  |  |             width, height = resolution.split('x') | 
					
						
							|  |  |  |             formats.append({ | 
					
						
							| 
									
										
										
										
											2017-02-05 21:41:08 +08:00
										 |  |  |                 'url': lowercase_escape(fmt_url), | 
					
						
							| 
									
										
										
										
											2015-06-24 01:13:23 +01:00
										 |  |  |                 'format_id': fmt_id, | 
					
						
							|  |  |  |                 'resolution': resolution, | 
					
						
							| 
									
										
										
										
											2015-12-21 03:05:34 +01:00
										 |  |  |                 'width': int_or_none(width), | 
					
						
							|  |  |  |                 'height': int_or_none(height), | 
					
						
							|  |  |  |                 'ext': self._FORMATS_EXT[fmt_id], | 
					
						
							| 
									
										
										
										
											2015-06-24 01:13:23 +01:00
										 |  |  |             }) | 
					
						
							|  |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							| 
									
										
										
										
											2016-02-22 03:13:18 +06:00
										 |  |  |             'thumbnail': self._og_search_thumbnail(webpage, default=None), | 
					
						
							| 
									
										
										
										
											2015-12-21 03:05:34 +01:00
										 |  |  |             'duration': duration, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							| 
									
										
										
										
											2015-06-24 01:13:23 +01:00
										 |  |  |         } |