| 
									
										
										
										
											2014-01-06 12:54:01 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | import re | 
					
						
							|  |  |  | import json | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  | from ..compat import ( | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  |     compat_urlparse, | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2015-01-11 15:42:53 +06:00
										 |  |  |     ExtractorError, | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  |     clean_html, | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  |     get_element_by_id, | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-06 12:54:01 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | class VeeHDIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2013-12-04 20:34:47 +07:00
										 |  |  |     _VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)' | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     _TEST = { | 
					
						
							| 
									
										
										
										
											2015-01-11 16:20:39 +06:00
										 |  |  |         'url': 'http://veehd.com/video/4639434_Solar-Sinter', | 
					
						
							| 
									
										
										
										
											2014-01-06 12:54:01 +01:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2015-01-11 16:20:39 +06:00
										 |  |  |             'id': '4639434', | 
					
						
							| 
									
										
										
										
											2014-08-28 01:37:57 +02:00
										 |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2015-01-11 16:20:39 +06:00
										 |  |  |             'title': 'Solar Sinter', | 
					
						
							|  |  |  |             'uploader_id': 'VideoEyes', | 
					
						
							|  |  |  |             'description': 'md5:46a840e8692ddbaffb5f81d9885cb457', | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  |         }, | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  |         video_id = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-06 12:54:01 +01:00
										 |  |  |         # VeeHD seems to send garbage on the first request. | 
					
						
							|  |  |  |         # See https://github.com/rg3/youtube-dl/issues/2102 | 
					
						
							|  |  |  |         self._download_webpage(url, video_id, 'Requesting webpage') | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  |         webpage = self._download_webpage(url, video_id) | 
					
						
							| 
									
										
										
										
											2015-01-11 15:42:53 +06:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if 'This video has been removed<' in webpage: | 
					
						
							|  |  |  |             raise ExtractorError('Video %s has been removed' % video_id, expected=True) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-06 12:54:01 +01:00
										 |  |  |         player_path = self._search_regex( | 
					
						
							|  |  |  |             r'\$\("#playeriframe"\).attr\({src : "(.+?)"', | 
					
						
							|  |  |  |             webpage, 'player path') | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  |         player_url = compat_urlparse.urljoin(url, player_path) | 
					
						
							| 
									
										
										
										
											2014-01-06 12:54:01 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self._download_webpage(player_url, video_id, 'Requesting player page') | 
					
						
							|  |  |  |         player_page = self._download_webpage( | 
					
						
							|  |  |  |             player_url, video_id, 'Downloading player page') | 
					
						
							| 
									
										
										
										
											2015-01-11 16:20:16 +06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-06 12:54:01 +01:00
										 |  |  |         config_json = self._search_regex( | 
					
						
							| 
									
										
										
										
											2015-01-11 16:20:16 +06:00
										 |  |  |             r'value=\'config=({.+?})\'', player_page, 'config json', default=None) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if config_json: | 
					
						
							|  |  |  |             config = json.loads(config_json) | 
					
						
							|  |  |  |             video_url = compat_urlparse.unquote(config['clip']['url']) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             iframe_src = self._search_regex( | 
					
						
							|  |  |  |                 r'<iframe[^>]+src="/?([^"]+)"', player_page, 'iframe url') | 
					
						
							|  |  |  |             iframe_url = 'http://veehd.com/%s' % iframe_src | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             self._download_webpage(iframe_url, video_id, 'Requesting iframe page') | 
					
						
							|  |  |  |             iframe_page = self._download_webpage( | 
					
						
							|  |  |  |                 iframe_url, video_id, 'Downloading iframe page') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             video_url = self._search_regex( | 
					
						
							|  |  |  |                 r"file\s*:\s*'([^']+)'", iframe_page, 'video url') | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0]) | 
					
						
							| 
									
										
										
										
											2015-01-11 16:20:16 +06:00
										 |  |  |         uploader_id = self._html_search_regex( | 
					
						
							|  |  |  |             r'<a href="/profile/\d+">(.+?)</a>', | 
					
						
							|  |  |  |             webpage, 'uploader') | 
					
						
							|  |  |  |         thumbnail = self._search_regex( | 
					
						
							|  |  |  |             r'<img id="veehdpreview" src="(.+?)"', | 
					
						
							|  |  |  |             webpage, 'thumbnail') | 
					
						
							|  |  |  |         description = self._html_search_regex( | 
					
						
							|  |  |  |             r'<td class="infodropdown".*?<div>(.*?)<ul', | 
					
						
							|  |  |  |             webpage, 'description', flags=re.DOTALL) | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             '_type': 'video', | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							|  |  |  |             'url': video_url, | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'uploader_id': uploader_id, | 
					
						
							|  |  |  |             'thumbnail': thumbnail, | 
					
						
							|  |  |  |             'description': description, | 
					
						
							|  |  |  |         } |