| 
									
										
										
										
											2015-08-20 12:56:11 +08:00
										 |  |  | # coding: utf-8 | 
					
						
							| 
									
										
										
										
											2016-05-24 12:13:05 +03:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							| 
									
										
										
										
											2015-08-20 12:56:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-26 17:30:24 +03:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											2016-04-29 19:29:00 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-08-20 12:56:11 +08:00
										 |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2016-02-07 06:17:40 +06:00
										 |  |  |     dict_get, | 
					
						
							| 
									
										
										
										
											2016-04-29 19:29:00 +08:00
										 |  |  |     ExtractorError, | 
					
						
							| 
									
										
										
										
											2016-02-07 06:17:40 +06:00
										 |  |  |     float_or_none, | 
					
						
							|  |  |  |     int_or_none, | 
					
						
							| 
									
										
										
										
											2016-06-07 14:39:21 +03:00
										 |  |  |     remove_start, | 
					
						
							| 
									
										
										
										
											2016-12-14 21:05:50 +07:00
										 |  |  |     urlencode_postdata, | 
					
						
							| 
									
										
										
										
											2015-08-20 12:56:11 +08:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2016-03-26 01:46:57 +06:00
										 |  |  | from ..compat import compat_urllib_parse_urlencode | 
					
						
							| 
									
										
										
										
											2015-08-20 12:56:11 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class VLiveIE(InfoExtractor): | 
					
						
							|  |  |  |     IE_NAME = 'vlive' | 
					
						
							| 
									
										
										
										
											2016-02-07 06:17:40 +06:00
										 |  |  |     _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)' | 
					
						
							| 
									
										
										
										
											2016-11-15 22:11:47 +07:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2016-02-06 23:37:55 +01:00
										 |  |  |         'url': 'http://www.vlive.tv/video/1326', | 
					
						
							| 
									
										
										
										
											2015-08-20 12:56:11 +08:00
										 |  |  |         'md5': 'cc7314812855ce56de70a06a27314983', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '1326', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2016-05-24 12:13:05 +03:00
										 |  |  |             'title': "[V LIVE] Girl's Day's Broadcast", | 
					
						
							| 
									
										
										
										
											2016-02-07 06:17:40 +06:00
										 |  |  |             'creator': "Girl's Day", | 
					
						
							|  |  |  |             'view_count': int, | 
					
						
							| 
									
										
										
										
											2015-08-20 12:56:11 +08:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2016-11-15 22:11:47 +07:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://www.vlive.tv/video/16937', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '16937', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': '[V LIVE] 첸백시 걍방', | 
					
						
							|  |  |  |             'creator': 'EXO', | 
					
						
							|  |  |  |             'view_count': int, | 
					
						
							|  |  |  |             'subtitles': 'mincount:12', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'params': { | 
					
						
							|  |  |  |             'skip_download': True, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2015-08-20 12:56:11 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         video_id = self._match_id(url) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         webpage = self._download_webpage( | 
					
						
							| 
									
										
										
										
											2016-02-07 06:17:40 +06:00
										 |  |  |             'http://www.vlive.tv/video/%s' % video_id, video_id) | 
					
						
							| 
									
										
										
										
											2015-08-20 12:56:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-14 21:05:50 +07:00
										 |  |  |         VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)' | 
					
						
							|  |  |  |         VIDEO_PARAMS_FIELD = 'video params' | 
					
						
							| 
									
										
										
										
											2016-12-05 21:16:12 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-14 21:05:50 +07:00
										 |  |  |         params = self._parse_json(self._search_regex( | 
					
						
							|  |  |  |             VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id, | 
					
						
							|  |  |  |             transform_source=lambda s: '[' + s + ']', fatal=False) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if not params or len(params) < 7: | 
					
						
							|  |  |  |             params = self._search_regex( | 
					
						
							|  |  |  |                 VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD) | 
					
						
							|  |  |  |             params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         status, long_video_id, key = params[2], params[5], params[6] | 
					
						
							| 
									
										
										
										
											2016-06-07 14:39:21 +03:00
										 |  |  |         status = remove_start(status, 'PRODUCT_') | 
					
						
							| 
									
										
										
										
											2016-04-26 17:30:24 +03:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR': | 
					
						
							| 
									
										
										
										
											2016-12-05 21:16:12 +00:00
										 |  |  |             return self._live(video_id, webpage) | 
					
						
							| 
									
										
										
										
											2016-04-26 17:30:24 +03:00
										 |  |  |         elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO': | 
					
						
							|  |  |  |             if long_video_id and key: | 
					
						
							|  |  |  |                 return self._replay(video_id, webpage, long_video_id, key) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 status = 'COMING_SOON' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if status == 'LIVE_END': | 
					
						
							|  |  |  |             raise ExtractorError('Uploading for replay. Please wait...', | 
					
						
							|  |  |  |                                  expected=True) | 
					
						
							|  |  |  |         elif status == 'COMING_SOON': | 
					
						
							| 
									
										
										
										
											2016-05-24 12:13:05 +03:00
										 |  |  |             raise ExtractorError('Coming soon!', expected=True) | 
					
						
							| 
									
										
										
										
											2016-04-26 17:30:24 +03:00
										 |  |  |         elif status == 'CANCELED': | 
					
						
							|  |  |  |             raise ExtractorError('We are sorry, ' | 
					
						
							|  |  |  |                                  'but the live broadcast has been canceled.', | 
					
						
							|  |  |  |                                  expected=True) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             raise ExtractorError('Unknown status %s' % status) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _get_common_fields(self, webpage): | 
					
						
							| 
									
										
										
										
											2015-08-20 12:56:11 +08:00
										 |  |  |         title = self._og_search_title(webpage) | 
					
						
							| 
									
										
										
										
											2016-04-26 17:30:24 +03:00
										 |  |  |         creator = self._html_search_regex( | 
					
						
							|  |  |  |             r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)', | 
					
						
							|  |  |  |             webpage, 'creator', fatal=False) | 
					
						
							|  |  |  |         thumbnail = self._og_search_thumbnail(webpage) | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'title': title, | 
					
						
							|  |  |  |             'creator': creator, | 
					
						
							|  |  |  |             'thumbnail': thumbnail, | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2015-09-05 15:25:19 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-05 21:16:12 +00:00
										 |  |  |     def _live(self, video_id, webpage): | 
					
						
							|  |  |  |         init_page = self._download_webpage( | 
					
						
							|  |  |  |             'http://www.vlive.tv/video/init/view', | 
					
						
							| 
									
										
										
										
											2016-12-14 21:05:50 +07:00
										 |  |  |             video_id, note='Downloading live webpage', | 
					
						
							|  |  |  |             data=urlencode_postdata({'videoSeq': video_id}), | 
					
						
							|  |  |  |             headers={ | 
					
						
							| 
									
										
										
										
											2016-12-05 21:16:12 +00:00
										 |  |  |                 'Referer': 'http://www.vlive.tv/video/%s' % video_id, | 
					
						
							|  |  |  |                 'Content-Type': 'application/x-www-form-urlencoded' | 
					
						
							|  |  |  |             }) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         live_params = self._search_regex( | 
					
						
							|  |  |  |             r'"liveStreamInfo"\s*:\s*(".*"),', | 
					
						
							| 
									
										
										
										
											2016-12-14 21:05:50 +07:00
										 |  |  |             init_page, 'live stream info') | 
					
						
							| 
									
										
										
										
											2016-12-05 21:16:12 +00:00
										 |  |  |         live_params = self._parse_json(live_params, video_id) | 
					
						
							|  |  |  |         live_params = self._parse_json(live_params, video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-26 17:30:24 +03:00
										 |  |  |         formats = [] | 
					
						
							|  |  |  |         for vid in live_params.get('resolutions', []): | 
					
						
							|  |  |  |             formats.extend(self._extract_m3u8_formats( | 
					
						
							|  |  |  |                 vid['cdnUrl'], video_id, 'mp4', | 
					
						
							|  |  |  |                 m3u8_id=vid.get('name'), | 
					
						
							|  |  |  |                 fatal=False, live=True)) | 
					
						
							|  |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-14 21:30:33 +07:00
										 |  |  |         info = self._get_common_fields(webpage) | 
					
						
							|  |  |  |         info.update({ | 
					
						
							|  |  |  |             'title': self._live_title(info['title']), | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							|  |  |  |             'is_live': True, | 
					
						
							|  |  |  |         }) | 
					
						
							|  |  |  |         return info | 
					
						
							| 
									
										
										
										
											2016-04-26 17:30:24 +03:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _replay(self, video_id, webpage, long_video_id, key): | 
					
						
							| 
									
										
										
										
											2016-02-07 06:17:40 +06:00
										 |  |  |         playinfo = self._download_json( | 
					
						
							|  |  |  |             'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s' | 
					
						
							| 
									
										
										
										
											2016-03-26 01:46:57 +06:00
										 |  |  |             % compat_urllib_parse_urlencode({ | 
					
						
							| 
									
										
										
										
											2016-02-07 06:17:40 +06:00
										 |  |  |                 'videoId': long_video_id, | 
					
						
							|  |  |  |                 'key': key, | 
					
						
							|  |  |  |                 'ptc': 'http', | 
					
						
							|  |  |  |                 'doct': 'json',  # document type (xml or json) | 
					
						
							|  |  |  |                 'cpt': 'vtt',  # captions type (vtt or ttml) | 
					
						
							|  |  |  |             }), video_id) | 
					
						
							| 
									
										
										
										
											2015-08-20 12:56:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-07 06:17:40 +06:00
										 |  |  |         formats = [{ | 
					
						
							|  |  |  |             'url': vid['source'], | 
					
						
							|  |  |  |             'format_id': vid.get('encodingOption', {}).get('name'), | 
					
						
							|  |  |  |             'abr': float_or_none(vid.get('bitrate', {}).get('audio')), | 
					
						
							|  |  |  |             'vbr': float_or_none(vid.get('bitrate', {}).get('video')), | 
					
						
							|  |  |  |             'width': int_or_none(vid.get('encodingOption', {}).get('width')), | 
					
						
							|  |  |  |             'height': int_or_none(vid.get('encodingOption', {}).get('height')), | 
					
						
							|  |  |  |             'filesize': int_or_none(vid.get('size')), | 
					
						
							|  |  |  |         } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')] | 
					
						
							| 
									
										
										
										
											2015-08-20 12:56:11 +08:00
										 |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-07 06:17:40 +06:00
										 |  |  |         view_count = int_or_none(playinfo.get('meta', {}).get('count')) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-08-20 12:56:11 +08:00
										 |  |  |         subtitles = {} | 
					
						
							| 
									
										
										
										
											2016-02-06 23:37:55 +01:00
										 |  |  |         for caption in playinfo.get('captions', {}).get('list', []): | 
					
						
							| 
									
										
										
										
											2016-11-15 23:07:17 +08:00
										 |  |  |             lang = dict_get(caption, ('locale', 'language', 'country', 'label')) | 
					
						
							| 
									
										
										
										
											2016-02-07 06:17:40 +06:00
										 |  |  |             if lang and caption.get('source'): | 
					
						
							|  |  |  |                 subtitles[lang] = [{ | 
					
						
							|  |  |  |                     'ext': 'vtt', | 
					
						
							|  |  |  |                     'url': caption['source']}] | 
					
						
							| 
									
										
										
										
											2015-08-20 12:56:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-14 21:30:33 +07:00
										 |  |  |         info = self._get_common_fields(webpage) | 
					
						
							|  |  |  |         info.update({ | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							|  |  |  |             'view_count': view_count, | 
					
						
							|  |  |  |             'subtitles': subtitles, | 
					
						
							|  |  |  |         }) | 
					
						
							|  |  |  |         return info |