| 
									
										
										
										
											2013-06-23 21:06:20 +02:00
										 |  |  | import re | 
					
						
							|  |  |  | import os | 
					
						
							|  |  |  | import socket | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							|  |  |  |     compat_http_client, | 
					
						
							|  |  |  |     compat_str, | 
					
						
							|  |  |  |     compat_urllib_error, | 
					
						
							|  |  |  |     compat_urllib_parse, | 
					
						
							|  |  |  |     compat_urllib_request, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ExtractorError, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class DepositFilesIE(InfoExtractor): | 
					
						
							|  |  |  |     """Information extractor for depositfiles.com""" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         file_id = url.split('/')[-1] | 
					
						
							|  |  |  |         # Rebuild url in english locale | 
					
						
							|  |  |  |         url = 'http://depositfiles.com/en/files/' + file_id | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Retrieve file webpage with 'Free download' button pressed | 
					
						
							| 
									
										
										
										
											2013-11-03 14:03:17 +01:00
										 |  |  |         free_download_indication = {'gateway_result' : '1'} | 
					
						
							| 
									
										
										
										
											2013-06-23 21:06:20 +02:00
										 |  |  |         request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication)) | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             self.report_download_webpage(file_id) | 
					
						
							|  |  |  |             webpage = compat_urllib_request.urlopen(request).read() | 
					
						
							|  |  |  |         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | 
					
						
							|  |  |  |             raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Search for the real file URL | 
					
						
							|  |  |  |         mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage) | 
					
						
							|  |  |  |         if (mobj is None) or (mobj.group(1) is None): | 
					
						
							|  |  |  |             # Try to figure out reason of the error. | 
					
						
							|  |  |  |             mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) | 
					
						
							|  |  |  |             if (mobj is not None) and (mobj.group(1) is not None): | 
					
						
							|  |  |  |                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() | 
					
						
							|  |  |  |                 raise ExtractorError(u'%s' % restriction_message) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 raise ExtractorError(u'Unable to extract download URL from: %s' % url) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         file_url = mobj.group(1) | 
					
						
							|  |  |  |         file_extension = os.path.splitext(file_url)[1][1:] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Search for file title | 
					
						
							|  |  |  |         file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return [{ | 
					
						
							|  |  |  |             'id':       file_id.decode('utf-8'), | 
					
						
							|  |  |  |             'url':      file_url.decode('utf-8'), | 
					
						
							|  |  |  |             'uploader': None, | 
					
						
							|  |  |  |             'upload_date':  None, | 
					
						
							|  |  |  |             'title':    file_title, | 
					
						
							|  |  |  |             'ext':      file_extension.decode('utf-8'), | 
					
						
							|  |  |  |         }] |