| 
									
										
										
										
											2014-01-06 12:54:01 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | import re | 
					
						
							|  |  |  | import json | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  | from ..compat import ( | 
					
						
							| 
									
										
										
										
											2015-07-18 00:04:25 +06:00
										 |  |  |     compat_urllib_parse_unquote, | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  |     compat_urlparse, | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2015-01-11 15:42:53 +06:00
										 |  |  |     ExtractorError, | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  |     clean_html, | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  |     get_element_by_id, | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-06 12:54:01 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | class VeeHDIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2013-12-04 20:34:47 +07:00
										 |  |  |     _VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)' | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-04-30 22:37:41 +08:00
										 |  |  |     # Seems VeeHD videos have multiple copies on several servers, all of | 
					
						
							|  |  |  |     # whom have different MD5 checksums, so omit md5 field in all tests | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2015-01-11 16:20:39 +06:00
										 |  |  |         'url': 'http://veehd.com/video/4639434_Solar-Sinter', | 
					
						
							| 
									
										
										
										
											2014-01-06 12:54:01 +01:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2015-01-11 16:20:39 +06:00
										 |  |  |             'id': '4639434', | 
					
						
							| 
									
										
										
										
											2014-08-28 01:37:57 +02:00
										 |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2015-01-11 16:20:39 +06:00
										 |  |  |             'title': 'Solar Sinter', | 
					
						
							|  |  |  |             'uploader_id': 'VideoEyes', | 
					
						
							|  |  |  |             'description': 'md5:46a840e8692ddbaffb5f81d9885cb457', | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-04-30 22:37:41 +08:00
										 |  |  |         'skip': 'Video deleted', | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://veehd.com/video/4905758_Elysian-Fields-Channeling', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '4905758', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'Elysian Fields - Channeling', | 
					
						
							|  |  |  |             'description': 'md5:360e4e95fdab58aefbea0f2a19e5604b', | 
					
						
							|  |  |  |             'uploader_id': 'spotted', | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     }, { | 
					
						
							| 
									
										
										
										
											2015-04-30 22:58:03 +08:00
										 |  |  |         'url': 'http://veehd.com/video/2046729_2012-2009-DivX-Trailer', | 
					
						
							| 
									
										
										
										
											2015-04-30 22:37:41 +08:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2015-04-30 22:58:03 +08:00
										 |  |  |             'id': '2046729', | 
					
						
							| 
									
										
										
										
											2015-04-30 22:37:41 +08:00
										 |  |  |             'ext': 'avi', | 
					
						
							| 
									
										
										
										
											2015-04-30 22:58:03 +08:00
										 |  |  |             'title': '2012 (2009) DivX Trailer', | 
					
						
							|  |  |  |             'description': 'md5:75435ee95255e6a9838ac6f6f3a2396b', | 
					
						
							|  |  |  |             'uploader_id': 'Movie_Trailers', | 
					
						
							| 
									
										
										
										
											2015-04-30 22:37:41 +08:00
										 |  |  |         } | 
					
						
							|  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  |         video_id = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-06 12:54:01 +01:00
										 |  |  |         # VeeHD seems to send garbage on the first request. | 
					
						
							|  |  |  |         # See https://github.com/rg3/youtube-dl/issues/2102 | 
					
						
							|  |  |  |         self._download_webpage(url, video_id, 'Requesting webpage') | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  |         webpage = self._download_webpage(url, video_id) | 
					
						
							| 
									
										
										
										
											2015-01-11 15:42:53 +06:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if 'This video has been removed<' in webpage: | 
					
						
							|  |  |  |             raise ExtractorError('Video %s has been removed' % video_id, expected=True) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-06 12:54:01 +01:00
										 |  |  |         player_path = self._search_regex( | 
					
						
							|  |  |  |             r'\$\("#playeriframe"\).attr\({src : "(.+?)"', | 
					
						
							|  |  |  |             webpage, 'player path') | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  |         player_url = compat_urlparse.urljoin(url, player_path) | 
					
						
							| 
									
										
										
										
											2014-01-06 12:54:01 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self._download_webpage(player_url, video_id, 'Requesting player page') | 
					
						
							|  |  |  |         player_page = self._download_webpage( | 
					
						
							|  |  |  |             player_url, video_id, 'Downloading player page') | 
					
						
							| 
									
										
										
										
											2015-01-11 16:20:16 +06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-04-30 22:37:41 +08:00
										 |  |  |         video_url = None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-06 12:54:01 +01:00
										 |  |  |         config_json = self._search_regex( | 
					
						
							| 
									
										
										
										
											2015-01-11 16:20:16 +06:00
										 |  |  |             r'value=\'config=({.+?})\'', player_page, 'config json', default=None) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if config_json: | 
					
						
							|  |  |  |             config = json.loads(config_json) | 
					
						
							| 
									
										
										
										
											2015-07-18 00:04:25 +06:00
										 |  |  |             video_url = compat_urllib_parse_unquote(config['clip']['url']) | 
					
						
							| 
									
										
										
										
											2015-04-30 22:37:41 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if not video_url: | 
					
						
							|  |  |  |             video_url = self._html_search_regex( | 
					
						
							|  |  |  |                 r'<embed[^>]+type="video/divx"[^>]+src="([^"]+)"', | 
					
						
							|  |  |  |                 player_page, 'video url', default=None) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if not video_url: | 
					
						
							| 
									
										
										
										
											2015-01-11 16:20:16 +06:00
										 |  |  |             iframe_src = self._search_regex( | 
					
						
							|  |  |  |                 r'<iframe[^>]+src="/?([^"]+)"', player_page, 'iframe url') | 
					
						
							|  |  |  |             iframe_url = 'http://veehd.com/%s' % iframe_src | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             self._download_webpage(iframe_url, video_id, 'Requesting iframe page') | 
					
						
							|  |  |  |             iframe_page = self._download_webpage( | 
					
						
							|  |  |  |                 iframe_url, video_id, 'Downloading iframe page') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             video_url = self._search_regex( | 
					
						
							|  |  |  |                 r"file\s*:\s*'([^']+)'", iframe_page, 'video url') | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0]) | 
					
						
							| 
									
										
										
										
											2015-01-11 16:20:16 +06:00
										 |  |  |         uploader_id = self._html_search_regex( | 
					
						
							|  |  |  |             r'<a href="/profile/\d+">(.+?)</a>', | 
					
						
							|  |  |  |             webpage, 'uploader') | 
					
						
							|  |  |  |         thumbnail = self._search_regex( | 
					
						
							|  |  |  |             r'<img id="veehdpreview" src="(.+?)"', | 
					
						
							|  |  |  |             webpage, 'thumbnail') | 
					
						
							|  |  |  |         description = self._html_search_regex( | 
					
						
							|  |  |  |             r'<td class="infodropdown".*?<div>(.*?)<ul', | 
					
						
							|  |  |  |             webpage, 'description', flags=re.DOTALL) | 
					
						
							| 
									
										
										
										
											2013-09-02 11:54:09 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             '_type': 'video', | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							|  |  |  |             'url': video_url, | 
					
						
							|  |  |  |             'uploader_id': uploader_id, | 
					
						
							|  |  |  |             'thumbnail': thumbnail, | 
					
						
							|  |  |  |             'description': description, | 
					
						
							|  |  |  |         } |