[naver] extract subtitles and hls formats and reuse code in VLiveIE

This commit is contained in:
remitamine 2015-11-01 19:16:00 +01:00
parent 0a9fad8527
commit a0b06b344a
2 changed files with 92 additions and 81 deletions

View File

@ -84,11 +84,17 @@ class NaverIE(InfoExtractor):
}, },
}] }]
def _extract_video_formats(self, formats_list): def _extract_video_formats(self, formats_list, vid):
formats = [] formats = []
for format_el in formats_list: for format_el in formats_list:
url = format_el.get('source') url = format_el.get('source')
if url: if url:
if format_el.get('type') == 'HLS':
key = format_el.get('key')
if key:
url += '?%s=%s' % (key['name'], key['value'])
formats.extend(self._extract_m3u8_formats(url, vid, 'mp4', m3u8_id='hls'))
else:
encoding_option = format_el.get('encodingOption') encoding_option = format_el.get('encodingOption')
bitrate = format_el.get('bitrate') bitrate = format_el.get('bitrate')
formats.append({ formats.append({
@ -106,27 +112,25 @@ class NaverIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
return formats return formats
def _extract_video_info(self, vid, key): def _parse_video_info(self, play_data, vid):
play_data = self._download_json(
'http://global.apis.naver.com/linetv/rmcnmv/vod_play_videoInfo.json?' + compat_urllib_parse.urlencode({'videoId': vid, 'key': key}),
vid, 'Downloading video info')
meta = play_data.get('meta') meta = play_data.get('meta')
user = meta.get('user') user = meta.get('user', {})
thumbnails = [] thumbnails = []
for thumbnail in play_data['thumbnails']['list']: for thumbnail in play_data.get('thumbnails', {}).get('list', []):
thumbnails.append({'url': thumbnail['source']}) thumbnails.append({'url': thumbnail['source']})
formats = self._extract_video_formats(play_data['videos']['list']) subtitles = {}
if not formats: for caption in play_data.get('captions', {}).get('list', []):
video_info = self._download_json( subtitles[caption['language']] = [
'http://serviceapi.rmcnmv.naver.com/mobile/getVideoInfo.nhn?' + compat_urllib_parse.urlencode({'videoId': vid, 'inKey': key, 'protocol': 'http'}), {'ext': determine_ext(caption['source'], default_ext='vtt'),
vid, 'Downloading video info') 'url': caption['source']}]
formats = self._extract_video_formats(video_info['videos']['list'])
formats = self._extract_video_formats(play_data['videos']['list'] + play_data.get('streams', []), vid)
return { return {
'id': vid, 'id': vid,
'title': meta['subject'], 'title': meta.get('subject'),
'formats': formats, 'formats': formats,
'thumbnail': meta.get('cover', {}).get('source'), 'thumbnail': meta.get('cover', {}).get('source'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
@ -135,6 +139,18 @@ class NaverIE(InfoExtractor):
'uploader': user.get('name'), 'uploader': user.get('name'),
} }
def _extract_video_info(self, vid, key):
play_data = self._download_json(
'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?' + compat_urllib_parse.urlencode({'videoId': vid, 'key': key}),
vid, 'Downloading video info')
info = self._parse_video_info(play_data, vid)
if not info['formats']:
play_data = self._download_json(
'http://serviceapi.rmcnmv.naver.com/mobile/getVideoInfo.nhn?' + compat_urllib_parse.urlencode({'videoId': vid, 'inKey': key, 'protocol': 'http'}),
vid, 'Downloading video info')
info['formats'] = self._extract_video_formats(play_data['videos']['list'] + play_data.get('streams', []), vid)
return info
def _extract_id_and_key(self, webpage): def _extract_id_and_key(self, webpage):
m_id = re.search(r'(?s)new\s+nhn.rmcnmv.RMCVideoPlayer\(\s*["\']([^"\']+)["\']\s*,\s*(?:{[^}]*?value[^:]*?:\s*?)?["\']([^"\']+)["\']', webpage) m_id = re.search(r'(?s)new\s+nhn.rmcnmv.RMCVideoPlayer\(\s*["\']([^"\']+)["\']\s*,\s*(?:{[^}]*?value[^:]*?:\s*?)?["\']([^"\']+)["\']', webpage)
if not m_id: if not m_id:

View File

@ -6,19 +6,19 @@ from hashlib import sha1
from base64 import b64encode from base64 import b64encode
from time import time from time import time
from .common import InfoExtractor from .naver import NaverIE
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
determine_ext int_or_none,
) )
from ..compat import compat_urllib_parse from ..compat import compat_urllib_parse
class VLiveIE(InfoExtractor): class VLiveIE(NaverIE):
IE_NAME = 'vlive' IE_NAME = 'vlive'
# www.vlive.tv/video/ links redirect to m.vlive.tv/video/ for mobile devices # www.vlive.tv/video/ links redirect to m.vlive.tv/video/ for mobile devices
_VALID_URL = r'https?://(?:(www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:(www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
_TEST = { _TESTS = [{
'url': 'http://m.vlive.tv/video/1326', 'url': 'http://m.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983', 'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': { 'info_dict': {
@ -27,21 +27,31 @@ class VLiveIE(InfoExtractor):
'title': '[V] Girl\'s Day\'s Broadcast', 'title': '[V] Girl\'s Day\'s Broadcast',
'creator': 'Girl\'s Day', 'creator': 'Girl\'s Day',
}, },
} }]
_SECRET = 'rFkwZet6pqk1vQt6SxxUkAHX7YL3lmqzUMrU4IDusTo4jEBdtOhNfT4BYYAdArwH' _SECRET = 'rFkwZet6pqk1vQt6SxxUkAHX7YL3lmqzUMrU4IDusTo4jEBdtOhNfT4BYYAdArwH'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
status = self._download_json(
'http://www.vlive.tv/video/status?videoSeq=%s' % video_id,
video_id, note='Download status metadata')
vid = status.get('vodId')
if vid:
key = status.get('vodInKey')
if not key:
key = self._download_webpage('http://www.vlive.tv/video/inkey?vodId=%s' % vid, video_id)
if key:
video_info = self._extract_video_info(vid, key)
elif status['status'] not in ('CANCELED', 'COMING_SOON', 'NOT_FOUND'):
webpage = self._download_webpage( webpage = self._download_webpage(
'http://m.vlive.tv/video/%s' % video_id, 'http://m.vlive.tv/video/%s' % video_id,
video_id, note='Download video page') video_id, note='Download video page')
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
creator = self._html_search_regex( creator = self._html_search_regex(
r'<span[^>]+class="name">([^<>]+)</span>', webpage, 'creator') r'<span[^>]+class="name">([^<>]+)</span>', webpage, 'creator')
url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id
msgpad = '%.0f' % (time() * 1000) msgpad = '%.0f' % (time() * 1000)
md = b64encode( md = b64encode(
@ -50,37 +60,22 @@ class VLiveIE(InfoExtractor):
) )
url += '&' + compat_urllib_parse.urlencode({'msgpad': msgpad, 'md': md}) url += '&' + compat_urllib_parse.urlencode({'msgpad': msgpad, 'md': md})
playinfo = self._download_json(url, video_id, 'Downloading video json') playinfo = self._download_json(url, video_id, 'Downloading video json')
if playinfo.get('message', '') != 'success': if playinfo.get('message', '') != 'success':
raise ExtractorError(playinfo.get('message', 'JSON request unsuccessful')) raise ExtractorError(playinfo.get('message', 'JSON request unsuccessful'))
result = playinfo.get('result')
if not playinfo.get('result'): if not result:
raise ExtractorError('No videos found.') raise ExtractorError('No videos found.')
video_info = self._parse_video_info(result, video_id)
formats = [] video_info.update({
for vid in playinfo['result'].get('videos', {}).get('list', []):
formats.append({
'url': vid['source'],
'ext': 'mp4',
'abr': vid.get('bitrate', {}).get('audio'),
'vbr': vid.get('bitrate', {}).get('video'),
'format_id': vid['encodingOption']['name'],
'height': vid.get('height'),
'width': vid.get('width'),
})
self._sort_formats(formats)
subtitles = {}
for caption in playinfo['result'].get('captions', {}).get('list', []):
subtitles[caption['language']] = [
{'ext': determine_ext(caption['source'], default_ext='vtt'),
'url': caption['source']}]
return {
'id': video_id,
'title': title, 'title': title,
'creator': creator,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'formats': formats, 'creator': creator,
'subtitles': subtitles, })
} if video_info:
video_info.update({
'id': video_id,
'view_count': int_or_none(status.get('playCount')),
'likes': int_or_none(status.get('likeCount')),
})
return video_info
raise ExtractorError(status['status'])