| 
									
										
										
										
											2016-02-15 17:30:53 -08:00
										 |  |  | # coding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-04 23:08:47 +08:00
										 |  |  | import random | 
					
						
							| 
									
										
										
										
											2016-02-15 17:30:53 -08:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | from ..compat import compat_urllib_parse_unquote_plus | 
					
						
							| 
									
										
										
										
											2016-03-04 23:08:47 +08:00
										 |  |  | from ..utils import ( | 
					
						
							|  |  |  |     int_or_none, | 
					
						
							|  |  |  |     float_or_none, | 
					
						
							|  |  |  |     timeconvert, | 
					
						
							|  |  |  |     update_url_query, | 
					
						
							|  |  |  |     xpath_text, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2016-02-15 17:30:53 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class KUSIIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2016-03-21 21:36:32 +06:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))' | 
					
						
							| 
									
										
										
										
											2016-03-04 14:32:01 +08:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2016-08-29 22:54:33 +07:00
										 |  |  |         'url': 'http://www.kusi.com/story/32849881/turko-files-refused-to-help-it-aint-right', | 
					
						
							|  |  |  |         'md5': '4e76ce8e53660ce9697d06c0ba6fc47d', | 
					
						
							| 
									
										
										
										
											2016-02-15 17:30:53 -08:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2016-08-29 22:54:33 +07:00
										 |  |  |             'id': '12689020', | 
					
						
							| 
									
										
										
										
											2016-02-15 17:30:53 -08:00
										 |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2016-08-29 22:54:33 +07:00
										 |  |  |             'title': "Turko Files: Refused to Help, It Ain't Right!", | 
					
						
							|  |  |  |             'duration': 223.586, | 
					
						
							|  |  |  |             'upload_date': '20160826', | 
					
						
							|  |  |  |             'timestamp': 1472233118, | 
					
						
							| 
									
										
										
										
											2017-01-02 20:08:07 +08:00
										 |  |  |             'thumbnail': r're:^https?://.*\.jpg$' | 
					
						
							| 
									
										
										
										
											2016-03-04 14:32:01 +08:00
										 |  |  |         }, | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://kusi.com/video?clipId=12203019', | 
					
						
							| 
									
										
										
										
											2016-08-29 22:54:33 +07:00
										 |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2016-03-04 14:32:01 +08:00
										 |  |  |     }] | 
					
						
							| 
									
										
										
										
											2016-02-15 17:30:53 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							| 
									
										
										
										
											2016-03-04 23:08:47 +08:00
										 |  |  |         clip_id = mobj.group('clipId') | 
					
						
							|  |  |  |         video_id = clip_id or mobj.group('path') | 
					
						
							| 
									
										
										
										
											2016-02-15 17:30:53 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-04 23:08:47 +08:00
										 |  |  |         webpage = self._download_webpage(url, video_id) | 
					
						
							| 
									
										
										
										
											2016-02-15 17:30:53 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-04 23:08:47 +08:00
										 |  |  |         if clip_id is None: | 
					
						
							|  |  |  |             video_id = clip_id = self._html_search_regex( | 
					
						
							|  |  |  |                 r'"clipId"\s*,\s*"(\d+)"', webpage, 'clip id') | 
					
						
							| 
									
										
										
										
											2016-02-15 17:30:53 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-04 23:08:47 +08:00
										 |  |  |         affiliate_id = self._search_regex( | 
					
						
							|  |  |  |             r'affiliateId\s*:\s*\'([^\']+)\'', webpage, 'affiliate id') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # See __Packages/worldnow/model/GalleryModel.as of WNGallery.swf | 
					
						
							|  |  |  |         xml_url = update_url_query('http://www.kusi.com/build.asp', { | 
					
						
							|  |  |  |             'buildtype': 'buildfeaturexmlrequest', | 
					
						
							|  |  |  |             'featureType': 'Clip', | 
					
						
							|  |  |  |             'featureid': clip_id, | 
					
						
							|  |  |  |             'affiliateno': affiliate_id, | 
					
						
							|  |  |  |             'clientgroupid': '1', | 
					
						
							|  |  |  |             'rnd': int(round(random.random() * 1000000)), | 
					
						
							|  |  |  |         }) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         doc = self._download_xml(xml_url, video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-04 23:57:05 +08:00
										 |  |  |         video_title = xpath_text(doc, 'HEADLINE', fatal=True) | 
					
						
							|  |  |  |         duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000) | 
					
						
							|  |  |  |         description = xpath_text(doc, 'ABSTRACT') | 
					
						
							|  |  |  |         thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME') | 
					
						
							|  |  |  |         createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate')) | 
					
						
							| 
									
										
										
										
											2016-02-15 17:30:53 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content') | 
					
						
							|  |  |  |         formats = [] | 
					
						
							|  |  |  |         for quality in quality_options: | 
					
						
							| 
									
										
										
										
											2016-03-04 23:08:47 +08:00
										 |  |  |             formats.append({ | 
					
						
							|  |  |  |                 'url': compat_urllib_parse_unquote_plus(quality.attrib['url']), | 
					
						
							|  |  |  |                 'height': int_or_none(quality.attrib.get('height')), | 
					
						
							|  |  |  |                 'width': int_or_none(quality.attrib.get('width')), | 
					
						
							| 
									
										
										
										
											2016-03-04 23:57:05 +08:00
										 |  |  |                 'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000), | 
					
						
							| 
									
										
										
										
											2016-03-04 23:08:47 +08:00
										 |  |  |             }) | 
					
						
							| 
									
										
										
										
											2016-02-15 17:30:53 -08:00
										 |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': video_title, | 
					
						
							|  |  |  |             'description': description, | 
					
						
							|  |  |  |             'duration': duration, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							| 
									
										
										
										
											2016-03-04 23:08:47 +08:00
										 |  |  |             'thumbnail': thumbnail, | 
					
						
							|  |  |  |             'timestamp': createtion_time, | 
					
						
							| 
									
										
										
										
											2016-02-15 17:30:53 -08:00
										 |  |  |         } |