| 
									
										
										
										
											2014-01-29 16:44:21 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-07-30 15:48:22 +07:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-06-23 22:14:22 +02:00
										 |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2017-07-30 15:48:22 +07:00
										 |  |  | from ..utils import ( | 
					
						
							|  |  |  |     determine_ext, | 
					
						
							|  |  |  |     int_or_none, | 
					
						
							|  |  |  |     parse_duration, | 
					
						
							| 
									
										
										
										
											2018-07-21 19:08:28 +07:00
										 |  |  |     url_or_none, | 
					
						
							| 
									
										
										
										
											2017-07-30 15:48:22 +07:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2013-06-23 22:14:22 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class YouJizzIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2017-07-30 15:48:22 +07:00
										 |  |  |     _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]*-(?P<id>\d+)\.html|embed/(?P<embed_id>\d+))' | 
					
						
							| 
									
										
										
										
											2016-07-20 22:41:13 +07:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2014-01-29 16:44:21 +01:00
										 |  |  |         'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', | 
					
						
							| 
									
										
										
										
											2017-07-30 15:48:22 +07:00
										 |  |  |         'md5': 'b1e1dfaa8bb9537d8b84eeda9cf4acf4', | 
					
						
							| 
									
										
										
										
											2014-01-29 16:44:21 +01:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2014-11-12 15:19:23 +01:00
										 |  |  |             'id': '2189178', | 
					
						
							| 
									
										
										
										
											2016-09-03 18:37:36 +08:00
										 |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2016-02-14 15:37:17 +06:00
										 |  |  |             'title': 'Zeichentrick 1', | 
					
						
							|  |  |  |             'age_limit': 18, | 
					
						
							| 
									
										
										
										
											2017-07-30 15:48:22 +07:00
										 |  |  |             'duration': 2874, | 
					
						
							| 
									
										
										
										
											2013-06-27 20:46:46 +02:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2016-07-20 22:41:13 +07:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://www.youjizz.com/videos/-2189178.html', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2017-07-30 15:48:22 +07:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'https://www.youjizz.com/videos/embed/31991001', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2016-07-20 22:41:13 +07:00
										 |  |  |     }] | 
					
						
							| 
									
										
										
										
											2013-06-23 22:14:22 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2017-07-30 15:48:22 +07:00
										 |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         video_id = mobj.group('id') or mobj.group('embed_id') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-06-23 22:14:22 +02:00
										 |  |  |         webpage = self._download_webpage(url, video_id) | 
					
						
							| 
									
										
										
										
											2014-11-12 15:19:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-07-30 15:48:22 +07:00
										 |  |  |         title = self._html_search_regex( | 
					
						
							|  |  |  |             r'<title>(.+?)</title>', webpage, 'title') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         formats = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         encodings = self._parse_json( | 
					
						
							|  |  |  |             self._search_regex( | 
					
						
							|  |  |  |                 r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings', | 
					
						
							|  |  |  |                 default='[]'), | 
					
						
							|  |  |  |             video_id, fatal=False) | 
					
						
							|  |  |  |         for encoding in encodings: | 
					
						
							|  |  |  |             if not isinstance(encoding, dict): | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2018-07-21 19:08:28 +07:00
										 |  |  |             format_url = url_or_none(encoding.get('filename')) | 
					
						
							|  |  |  |             if not format_url: | 
					
						
							| 
									
										
										
										
											2017-07-30 15:48:22 +07:00
										 |  |  |                 continue | 
					
						
							|  |  |  |             if determine_ext(format_url) == 'm3u8': | 
					
						
							|  |  |  |                 formats.extend(self._extract_m3u8_formats( | 
					
						
							|  |  |  |                     format_url, video_id, 'mp4', entry_protocol='m3u8_native', | 
					
						
							|  |  |  |                     m3u8_id='hls', fatal=False)) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 format_id = encoding.get('name') or encoding.get('quality') | 
					
						
							|  |  |  |                 height = int_or_none(self._search_regex( | 
					
						
							|  |  |  |                     r'^(\d+)[pP]', format_id, 'height', default=None)) | 
					
						
							|  |  |  |                 formats.append({ | 
					
						
							|  |  |  |                     'url': format_url, | 
					
						
							|  |  |  |                     'format_id': format_id, | 
					
						
							|  |  |  |                     'height': height, | 
					
						
							|  |  |  |                 }) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if formats: | 
					
						
							|  |  |  |             info_dict = { | 
					
						
							|  |  |  |                 'formats': formats, | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             # YouJizz's HTML5 player has invalid HTML | 
					
						
							|  |  |  |             webpage = webpage.replace('"controls', '" controls') | 
					
						
							|  |  |  |             info_dict = self._parse_html5_media_entries( | 
					
						
							|  |  |  |                 url, webpage, video_id)[0] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         duration = parse_duration(self._search_regex( | 
					
						
							|  |  |  |             r'<strong>Runtime:</strong>([^<]+)', webpage, 'duration', | 
					
						
							|  |  |  |             default=None)) | 
					
						
							|  |  |  |         uploader = self._search_regex( | 
					
						
							|  |  |  |             r'<strong>Uploaded By:.*?<a[^>]*>([^<]+)', webpage, 'uploader', | 
					
						
							|  |  |  |             default=None) | 
					
						
							| 
									
										
										
										
											2013-06-23 22:14:22 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-03 18:37:36 +08:00
										 |  |  |         info_dict.update({ | 
					
						
							| 
									
										
										
										
											2014-01-29 16:44:21 +01:00
										 |  |  |             'id': video_id, | 
					
						
							| 
									
										
										
										
											2017-07-30 15:48:22 +07:00
										 |  |  |             'title': title, | 
					
						
							|  |  |  |             'age_limit': self._rta_search(webpage), | 
					
						
							|  |  |  |             'duration': duration, | 
					
						
							|  |  |  |             'uploader': uploader, | 
					
						
							| 
									
										
										
										
											2016-09-03 18:37:36 +08:00
										 |  |  |         }) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return info_dict |