[naver] extract subtitles and hls formats and reuse code in VLiveIE

2015-11-01 19:16:00 +01:00 · 2015-11-01 19:16:00 +01:00 · a0b06b344a
commit a0b06b344a
parent 0a9fad8527
2 changed files with 92 additions and 81 deletions
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@ -84,11 +84,17 @@ class NaverIE(InfoExtractor):
        },
    }]
-    def _extract_video_formats(self, formats_list):
+    def _extract_video_formats(self, formats_list, vid):
        formats = []
        for format_el in formats_list:
            url = format_el.get('source')
            if url:
                if format_el.get('type') == 'HLS':
                    key = format_el.get('key')
                    if key:
                        url += '?%s=%s' % (key['name'], key['value'])
                    formats.extend(self._extract_m3u8_formats(url, vid, 'mp4', m3u8_id='hls'))
                else:
                    encoding_option = format_el.get('encodingOption')
                    bitrate = format_el.get('bitrate')
                    formats.append({
@ -106,27 +112,25 @@ class NaverIE(InfoExtractor):
            self._sort_formats(formats)
        return formats
-    def _extract_video_info(self, vid, key):
+    def _parse_video_info(self, play_data, vid):
        play_data = self._download_json(
            'http://global.apis.naver.com/linetv/rmcnmv/vod_play_videoInfo.json?' + compat_urllib_parse.urlencode({'videoId': vid, 'key': key}),
            vid, 'Downloading video info')
        meta = play_data.get('meta')
-        user = meta.get('user')
+        user = meta.get('user', {})
        thumbnails = []
-        for thumbnail in play_data['thumbnails']['list']:
+        for thumbnail in play_data.get('thumbnails', {}).get('list', []):
            thumbnails.append({'url': thumbnail['source']})
-        formats = self._extract_video_formats(play_data['videos']['list'])
+        subtitles = {}
-        if not formats:
+        for caption in play_data.get('captions', {}).get('list', []):
-            video_info = self._download_json(
+            subtitles[caption['language']] = [
-                'http://serviceapi.rmcnmv.naver.com/mobile/getVideoInfo.nhn?' + compat_urllib_parse.urlencode({'videoId': vid, 'inKey': key, 'protocol': 'http'}),
+                {'ext': determine_ext(caption['source'], default_ext='vtt'),
-                vid, 'Downloading video info')
+                 'url': caption['source']}]
-            formats = self._extract_video_formats(video_info['videos']['list'])
+
        formats = self._extract_video_formats(play_data['videos']['list'] + play_data.get('streams', []), vid)
        return {
            'id': vid,
-            'title': meta['subject'],
+            'title': meta.get('subject'),
            'formats': formats,
            'thumbnail': meta.get('cover', {}).get('source'),
            'thumbnails': thumbnails,
@ -135,6 +139,18 @@ class NaverIE(InfoExtractor):
            'uploader': user.get('name'),
        }
    def _extract_video_info(self, vid, key):
        play_data = self._download_json(
            'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?' + compat_urllib_parse.urlencode({'videoId': vid, 'key': key}),
            vid, 'Downloading video info')
        info = self._parse_video_info(play_data, vid)
        if not info['formats']:
            play_data = self._download_json(
                'http://serviceapi.rmcnmv.naver.com/mobile/getVideoInfo.nhn?' + compat_urllib_parse.urlencode({'videoId': vid, 'inKey': key, 'protocol': 'http'}),
                vid, 'Downloading video info')
            info['formats'] = self._extract_video_formats(play_data['videos']['list'] + play_data.get('streams', []), vid)
        return info
    def _extract_id_and_key(self, webpage):
        m_id = re.search(r'(?s)new\s+nhn.rmcnmv.RMCVideoPlayer\(\s*["\']([^"\']+)["\']\s*,\s*(?:{[^}]*?value[^:]*?:\s*?)?["\']([^"\']+)["\']', webpage)
        if not m_id:
--- a/youtube_dl/extractor/vlive.py
+++ b/youtube_dl/extractor/vlive.py
@ -6,19 +6,19 @@ from hashlib import sha1
 from base64 import b64encode
 from time import time
-from .common import InfoExtractor
+from .naver import NaverIE
 from ..utils import (
    ExtractorError,
-    determine_ext
+    int_or_none,
 )
 from ..compat import compat_urllib_parse
-class VLiveIE(InfoExtractor):
+class VLiveIE(NaverIE):
    IE_NAME = 'vlive'
    # www.vlive.tv/video/ links redirect to m.vlive.tv/video/ for mobile devices
    _VALID_URL = r'https?://(?:(www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://m.vlive.tv/video/1326',
        'md5': 'cc7314812855ce56de70a06a27314983',
        'info_dict': {
@ -27,21 +27,31 @@ class VLiveIE(InfoExtractor):
            'title': '[V] Girl\'s Day\'s Broadcast',
            'creator': 'Girl\'s Day',
        },
-    }
+    }]
    _SECRET = 'rFkwZet6pqk1vQt6SxxUkAHX7YL3lmqzUMrU4IDusTo4jEBdtOhNfT4BYYAdArwH'
    def _real_extract(self, url):
        video_id = self._match_id(url)
        status = self._download_json(
            'http://www.vlive.tv/video/status?videoSeq=%s' % video_id,
            video_id, note='Download status metadata')
        vid = status.get('vodId')
        if vid:
            key = status.get('vodInKey')
            if not key:
                key = self._download_webpage('http://www.vlive.tv/video/inkey?vodId=%s' % vid, video_id)
            if key:
                video_info = self._extract_video_info(vid, key)
        elif status['status'] not in ('CANCELED', 'COMING_SOON', 'NOT_FOUND'):
            webpage = self._download_webpage(
                'http://m.vlive.tv/video/%s' % video_id,
                video_id, note='Download video page')
            title = self._og_search_title(webpage)
            thumbnail = self._og_search_thumbnail(webpage)
            creator = self._html_search_regex(
                r'<span[^>]+class="name">([^<>]+)</span>', webpage, 'creator')
            url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id
            msgpad = '%.0f' % (time() * 1000)
            md = b64encode(
@ -50,37 +60,22 @@ class VLiveIE(InfoExtractor):
            )
            url += '&' + compat_urllib_parse.urlencode({'msgpad': msgpad, 'md': md})
            playinfo = self._download_json(url, video_id, 'Downloading video json')
            if playinfo.get('message', '') != 'success':
                raise ExtractorError(playinfo.get('message', 'JSON request unsuccessful'))
-
+            result = playinfo.get('result')
-        if not playinfo.get('result'):
+            if not result:
                raise ExtractorError('No videos found.')
-
+            video_info = self._parse_video_info(result, video_id)
-        formats = []
+            video_info.update({
        for vid in playinfo['result'].get('videos', {}).get('list', []):
            formats.append({
                'url': vid['source'],
                'ext': 'mp4',
                'abr': vid.get('bitrate', {}).get('audio'),
                'vbr': vid.get('bitrate', {}).get('video'),
                'format_id': vid['encodingOption']['name'],
                'height': vid.get('height'),
                'width': vid.get('width'),
            })
        self._sort_formats(formats)
        subtitles = {}
        for caption in playinfo['result'].get('captions', {}).get('list', []):
            subtitles[caption['language']] = [
                {'ext': determine_ext(caption['source'], default_ext='vtt'),
                 'url': caption['source']}]
        return {
            'id': video_id,
                'title': title,
            'creator': creator,
                'thumbnail': thumbnail,
-            'formats': formats,
+                'creator': creator,
-            'subtitles': subtitles,
+            })
-        }
+        if video_info:
            video_info.update({
                'id': video_id,
                'view_count': int_or_none(status.get('playCount')),
                'likes': int_or_none(status.get('likeCount')),
            })
            return video_info
        raise ExtractorError(status['status'])