61 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			61 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|  | import re | ||
|  | import os | ||
|  | import socket | ||
|  | 
 | ||
|  | from .common import InfoExtractor | ||
|  | from ..utils import ( | ||
|  |     compat_http_client, | ||
|  |     compat_str, | ||
|  |     compat_urllib_error, | ||
|  |     compat_urllib_parse, | ||
|  |     compat_urllib_request, | ||
|  | 
 | ||
|  |     ExtractorError, | ||
|  | ) | ||
|  | 
 | ||
|  | 
 | ||
|  | class DepositFilesIE(InfoExtractor): | ||
|  |     """Information extractor for depositfiles.com""" | ||
|  | 
 | ||
|  |     _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' | ||
|  | 
 | ||
|  |     def _real_extract(self, url): | ||
|  |         file_id = url.split('/')[-1] | ||
|  |         # Rebuild url in english locale | ||
|  |         url = 'http://depositfiles.com/en/files/' + file_id | ||
|  | 
 | ||
|  |         # Retrieve file webpage with 'Free download' button pressed | ||
|  |         free_download_indication = { 'gateway_result' : '1' } | ||
|  |         request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication)) | ||
|  |         try: | ||
|  |             self.report_download_webpage(file_id) | ||
|  |             webpage = compat_urllib_request.urlopen(request).read() | ||
|  |         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||
|  |             raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err)) | ||
|  | 
 | ||
|  |         # Search for the real file URL | ||
|  |         mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage) | ||
|  |         if (mobj is None) or (mobj.group(1) is None): | ||
|  |             # Try to figure out reason of the error. | ||
|  |             mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) | ||
|  |             if (mobj is not None) and (mobj.group(1) is not None): | ||
|  |                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() | ||
|  |                 raise ExtractorError(u'%s' % restriction_message) | ||
|  |             else: | ||
|  |                 raise ExtractorError(u'Unable to extract download URL from: %s' % url) | ||
|  | 
 | ||
|  |         file_url = mobj.group(1) | ||
|  |         file_extension = os.path.splitext(file_url)[1][1:] | ||
|  | 
 | ||
|  |         # Search for file title | ||
|  |         file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title') | ||
|  | 
 | ||
|  |         return [{ | ||
|  |             'id':       file_id.decode('utf-8'), | ||
|  |             'url':      file_url.decode('utf-8'), | ||
|  |             'uploader': None, | ||
|  |             'upload_date':  None, | ||
|  |             'title':    file_title, | ||
|  |             'ext':      file_extension.decode('utf-8'), | ||
|  |         }] |