| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  | # coding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-07 01:35:09 +08:00
										 |  |  | import hashlib | 
					
						
							|  |  |  | import math | 
					
						
							|  |  |  | import random | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  | import time | 
					
						
							|  |  |  | import uuid | 
					
						
							| 
									
										
										
										
											2015-06-07 01:35:09 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | from ..compat import compat_urllib_parse | 
					
						
							| 
									
										
										
										
											2015-07-19 02:28:43 +08:00
										 |  |  | from ..utils import ExtractorError | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-07 00:37:29 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  | class IqiyiIE(InfoExtractor): | 
					
						
							|  |  |  |     IE_NAME = 'iqiyi' | 
					
						
							| 
									
										
										
										
											2015-07-11 00:51:03 +08:00
										 |  |  |     IE_DESC = '爱奇艺' | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-07 02:13:22 +08:00
										 |  |  |     _VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html' | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-07 02:09:33 +08:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2015-06-07 00:37:29 +08:00
										 |  |  |         'url': 'http://www.iqiyi.com/v_19rrojlavg.html', | 
					
						
							|  |  |  |         'md5': '2cb594dc2781e6c941a110d8f358118b', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', | 
					
						
							|  |  |  |             'title': '美国德州空中惊现奇异云团 酷似UFO', | 
					
						
							|  |  |  |             'ext': 'f4v', | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2015-06-07 02:09:33 +08:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': 'e3f585b550a280af23c98b6cb2be19fb', | 
					
						
							|  |  |  |             'title': '名侦探柯南第752集', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist': [{ | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1', | 
					
						
							|  |  |  |                 'ext': 'f4v', | 
					
						
							|  |  |  |                 'title': '名侦探柯南第752集', | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         }, { | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2', | 
					
						
							|  |  |  |                 'ext': 'f4v', | 
					
						
							|  |  |  |                 'title': '名侦探柯南第752集', | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         }, { | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3', | 
					
						
							|  |  |  |                 'ext': 'f4v', | 
					
						
							|  |  |  |                 'title': '名侦探柯南第752集', | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         }, { | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4', | 
					
						
							|  |  |  |                 'ext': 'f4v', | 
					
						
							|  |  |  |                 'title': '名侦探柯南第752集', | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         }, { | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5', | 
					
						
							|  |  |  |                 'ext': 'f4v', | 
					
						
							|  |  |  |                 'title': '名侦探柯南第752集', | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         }, { | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6', | 
					
						
							|  |  |  |                 'ext': 'f4v', | 
					
						
							|  |  |  |                 'title': '名侦探柯南第752集', | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         }, { | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7', | 
					
						
							|  |  |  |                 'ext': 'f4v', | 
					
						
							|  |  |  |                 'title': '名侦探柯南第752集', | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         }, { | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8', | 
					
						
							|  |  |  |                 'ext': 'f4v', | 
					
						
							|  |  |  |                 'title': '名侦探柯南第752集', | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         }], | 
					
						
							| 
									
										
										
										
											2015-07-19 11:20:05 +08:00
										 |  |  |         'params': { | 
					
						
							|  |  |  |             'skip_download': True, | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-06-07 02:09:33 +08:00
										 |  |  |     }] | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-07 02:25:00 +08:00
										 |  |  |     _FORMATS_MAP = [ | 
					
						
							|  |  |  |         ('1', 'h6'), | 
					
						
							|  |  |  |         ('2', 'h5'), | 
					
						
							|  |  |  |         ('3', 'h4'), | 
					
						
							|  |  |  |         ('4', 'h3'), | 
					
						
							|  |  |  |         ('5', 'h2'), | 
					
						
							|  |  |  |         ('10', 'h1'), | 
					
						
							|  |  |  |     ] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-09-23 22:22:04 +08:00
										 |  |  |     @staticmethod | 
					
						
							|  |  |  |     def md5_text(text): | 
					
						
							|  |  |  |         return hashlib.md5(text.encode('utf-8')).hexdigest() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-07 00:44:54 +08:00
										 |  |  |     def construct_video_urls(self, data, video_id, _uuid): | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |         def do_xor(x, y): | 
					
						
							|  |  |  |             a = y % 3 | 
					
						
							|  |  |  |             if a == 1: | 
					
						
							|  |  |  |                 return x ^ 121 | 
					
						
							|  |  |  |             if a == 2: | 
					
						
							|  |  |  |                 return x ^ 72 | 
					
						
							|  |  |  |             return x ^ 103 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def get_encode_code(l): | 
					
						
							|  |  |  |             a = 0 | 
					
						
							|  |  |  |             b = l.split('-') | 
					
						
							|  |  |  |             c = len(b) | 
					
						
							|  |  |  |             s = '' | 
					
						
							|  |  |  |             for i in range(c - 1, -1, -1): | 
					
						
							| 
									
										
										
										
											2015-06-07 00:37:29 +08:00
										 |  |  |                 a = do_xor(int(b[c - i - 1], 16), i) | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |                 s += chr(a) | 
					
						
							|  |  |  |             return s[::-1] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-07 01:52:51 +08:00
										 |  |  |         def get_path_key(x, format_id, segment_index): | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |             mg = ')(*&^flash@#$%a' | 
					
						
							|  |  |  |             tm = self._download_json( | 
					
						
							| 
									
										
										
										
											2015-06-07 01:52:51 +08:00
										 |  |  |                 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id, | 
					
						
							|  |  |  |                 note='Download path key of segment %d for format %s' % (segment_index + 1, format_id) | 
					
						
							|  |  |  |             )['t'] | 
					
						
							| 
									
										
										
										
											2015-06-07 00:37:29 +08:00
										 |  |  |             t = str(int(math.floor(int(tm) / (600.0)))) | 
					
						
							| 
									
										
										
										
											2015-09-23 22:25:16 +08:00
										 |  |  |             return self.md5_text(t + mg + x) | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         video_urls_dict = {} | 
					
						
							| 
									
										
										
										
											2015-06-07 01:52:51 +08:00
										 |  |  |         for format_item in data['vp']['tkl'][0]['vs']: | 
					
						
							|  |  |  |             if 0 < int(format_item['bid']) <= 10: | 
					
						
							|  |  |  |                 format_id = self.get_format(format_item['bid']) | 
					
						
							| 
									
										
										
										
											2015-05-30 10:37:54 +08:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             video_urls = [] | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-07 01:52:51 +08:00
										 |  |  |             video_urls_info = format_item['fs'] | 
					
						
							|  |  |  |             if not format_item['fs'][0]['l'].startswith('/'): | 
					
						
							|  |  |  |                 t = get_encode_code(format_item['fs'][0]['l']) | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |                 if t.endswith('mp4'): | 
					
						
							| 
									
										
										
										
											2015-06-07 01:52:51 +08:00
										 |  |  |                     video_urls_info = format_item['flvs'] | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-07 01:52:51 +08:00
										 |  |  |             for segment_index, segment in enumerate(video_urls_info): | 
					
						
							|  |  |  |                 vl = segment['l'] | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |                 if not vl.startswith('/'): | 
					
						
							|  |  |  |                     vl = get_encode_code(vl) | 
					
						
							|  |  |  |                 key = get_path_key( | 
					
						
							| 
									
										
										
										
											2015-06-07 01:52:51 +08:00
										 |  |  |                     vl.split('/')[-1].split('.')[0], format_id, segment_index) | 
					
						
							|  |  |  |                 filesize = segment['b'] | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |                 base_url = data['vp']['du'].split('/') | 
					
						
							|  |  |  |                 base_url.insert(-1, key) | 
					
						
							|  |  |  |                 base_url = '/'.join(base_url) | 
					
						
							|  |  |  |                 param = { | 
					
						
							|  |  |  |                     'su': _uuid, | 
					
						
							|  |  |  |                     'qyid': uuid.uuid4().hex, | 
					
						
							|  |  |  |                     'client': '', | 
					
						
							|  |  |  |                     'z': '', | 
					
						
							|  |  |  |                     'bt': '', | 
					
						
							|  |  |  |                     'ct': '', | 
					
						
							|  |  |  |                     'tn': str(int(time.time())) | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 api_video_url = base_url + vl + '?' + \ | 
					
						
							|  |  |  |                     compat_urllib_parse.urlencode(param) | 
					
						
							| 
									
										
										
										
											2015-06-07 01:52:51 +08:00
										 |  |  |                 js = self._download_json( | 
					
						
							|  |  |  |                     api_video_url, video_id, | 
					
						
							|  |  |  |                     note='Download video info of segment %d for format %s' % (segment_index + 1, format_id)) | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |                 video_url = js['l'] | 
					
						
							|  |  |  |                 video_urls.append( | 
					
						
							|  |  |  |                     (video_url, filesize)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             video_urls_dict[format_id] = video_urls | 
					
						
							|  |  |  |         return video_urls_dict | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_format(self, bid): | 
					
						
							| 
									
										
										
										
											2015-06-07 02:25:00 +08:00
										 |  |  |         matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)] | 
					
						
							|  |  |  |         return matched_format_ids[0] if len(matched_format_ids) else None | 
					
						
							| 
									
										
										
										
											2015-05-30 10:37:54 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def get_bid(self, format_id): | 
					
						
							| 
									
										
										
										
											2015-06-07 02:25:00 +08:00
										 |  |  |         matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id] | 
					
						
							|  |  |  |         return matched_bids[0] if len(matched_bids) else None | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def get_raw_data(self, tvid, video_id, enc_key, _uuid): | 
					
						
							|  |  |  |         tm = str(int(time.time())) | 
					
						
							| 
									
										
										
										
											2015-09-23 22:22:04 +08:00
										 |  |  |         tail = tm + tvid | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |         param = { | 
					
						
							|  |  |  |             'key': 'fvip', | 
					
						
							| 
									
										
										
										
											2015-09-23 22:25:16 +08:00
										 |  |  |             'src': self.md5_text('youtube-dl'), | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |             'tvId': tvid, | 
					
						
							|  |  |  |             'vid': video_id, | 
					
						
							|  |  |  |             'vinfo': 1, | 
					
						
							|  |  |  |             'tm': tm, | 
					
						
							| 
									
										
										
										
											2015-10-10 15:03:01 +08:00
										 |  |  |             'enc': self.md5_text(enc_key + tail), | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |             'qyid': _uuid, | 
					
						
							|  |  |  |             'tn': random.random(), | 
					
						
							|  |  |  |             'um': 0, | 
					
						
							| 
									
										
										
										
											2015-09-23 22:22:04 +08:00
										 |  |  |             'authkey': self.md5_text(self.md5_text('') + tail), | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ | 
					
						
							|  |  |  |             compat_urllib_parse.urlencode(param) | 
					
						
							|  |  |  |         raw_data = self._download_json(api_url, video_id) | 
					
						
							|  |  |  |         return raw_data | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_enc_key(self, swf_url, video_id): | 
					
						
							| 
									
										
										
										
											2015-09-23 22:22:04 +08:00
										 |  |  |         # TODO: automatic key extraction | 
					
						
							| 
									
										
										
										
											2015-10-22 17:47:11 +08:00
										 |  |  |         # last update at 2015-10-22 for Zombie::bite | 
					
						
							|  |  |  |         # '7223c67061dbea1259d0ceb44f44b6d62288f4f80c972170de5201d2321060270e05'[2:66][0::2] | 
					
						
							|  |  |  |         enc_key = '2c76de15dcb44bd28ff0927d50d31620' | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |         return enc_key | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         webpage = self._download_webpage( | 
					
						
							|  |  |  |             url, 'temp_id', note='download video page') | 
					
						
							|  |  |  |         tvid = self._search_regex( | 
					
						
							| 
									
										
										
										
											2015-06-07 00:56:08 +08:00
										 |  |  |             r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |         video_id = self._search_regex( | 
					
						
							| 
									
										
										
										
											2015-06-07 00:56:08 +08:00
										 |  |  |             r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |         swf_url = self._search_regex( | 
					
						
							| 
									
										
										
										
											2015-06-07 02:39:03 +08:00
										 |  |  |             r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL') | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |         _uuid = uuid.uuid4().hex | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         enc_key = self.get_enc_key(swf_url, video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid) | 
					
						
							| 
									
										
										
										
											2015-06-07 01:32:03 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if raw_data['code'] != 'A000000': | 
					
						
							|  |  |  |             raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |         if not raw_data['data']['vp']['tkl']: | 
					
						
							|  |  |  |             raise ExtractorError('No support iQiqy VIP video') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         data = raw_data['data'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         title = data['vi']['vn'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # generate video_urls_dict | 
					
						
							| 
									
										
										
										
											2015-05-30 10:37:54 +08:00
										 |  |  |         video_urls_dict = self.construct_video_urls( | 
					
						
							| 
									
										
										
										
											2015-06-07 00:44:54 +08:00
										 |  |  |             data, video_id, _uuid) | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # construct info | 
					
						
							|  |  |  |         entries = [] | 
					
						
							|  |  |  |         for format_id in video_urls_dict: | 
					
						
							|  |  |  |             video_urls = video_urls_dict[format_id] | 
					
						
							|  |  |  |             for i, video_url_info in enumerate(video_urls): | 
					
						
							| 
									
										
										
										
											2015-06-07 00:37:29 +08:00
										 |  |  |                 if len(entries) < i + 1: | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |                     entries.append({'formats': []}) | 
					
						
							|  |  |  |                 entries[i]['formats'].append( | 
					
						
							|  |  |  |                     { | 
					
						
							|  |  |  |                         'url': video_url_info[0], | 
					
						
							|  |  |  |                         'filesize': video_url_info[-1], | 
					
						
							|  |  |  |                         'format_id': format_id, | 
					
						
							| 
									
										
										
										
											2015-05-30 10:37:54 +08:00
										 |  |  |                         'preference': int(self.get_bid(format_id)) | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |                     } | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for i in range(len(entries)): | 
					
						
							| 
									
										
										
										
											2015-05-30 10:37:54 +08:00
										 |  |  |             self._sort_formats(entries[i]['formats']) | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |             entries[i].update( | 
					
						
							|  |  |  |                 { | 
					
						
							| 
									
										
										
										
											2015-06-07 01:57:05 +08:00
										 |  |  |                     'id': '%s_part%d' % (video_id, i + 1), | 
					
						
							| 
									
										
										
										
											2015-05-29 23:32:04 +08:00
										 |  |  |                     'title': title, | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if len(entries) > 1: | 
					
						
							|  |  |  |             info = { | 
					
						
							|  |  |  |                 '_type': 'multi_video', | 
					
						
							|  |  |  |                 'id': video_id, | 
					
						
							|  |  |  |                 'title': title, | 
					
						
							|  |  |  |                 'entries': entries, | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             info = entries[0] | 
					
						
							|  |  |  |             info['id'] = video_id | 
					
						
							|  |  |  |             info['title'] = title | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return info |