[naver] extract subtitles and hls formats and reuse code in VLiveIE

2015-11-01 19:16:00 +01:00 · 2015-11-01 19:16:00 +01:00 · a0b06b344a
commit a0b06b344a
parent 0a9fad8527
2 changed files with 92 additions and 81 deletions
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@ -84,49 +84,53 @@ class NaverIE(InfoExtractor):
        },
    }]
-    def _extract_video_formats(self, formats_list):
+    def _extract_video_formats(self, formats_list, vid):
        formats = []
        for format_el in formats_list:
            url = format_el.get('source')
            if url:
-                encoding_option = format_el.get('encodingOption')
+                if format_el.get('type') == 'HLS':
-                bitrate = format_el.get('bitrate')
+                    key = format_el.get('key')
-                formats.append({
+                    if key:
-                    'format_id': encoding_option.get('id') or encoding_option.get('name'),
+                        url += '?%s=%s' % (key['name'], key['value'])
-                    'url': format_el['source'],
+                    formats.extend(self._extract_m3u8_formats(url, vid, 'mp4', m3u8_id='hls'))
-                    'width': int_or_none(encoding_option.get('width')),
+                else:
-                    'height': int_or_none(encoding_option.get('height')),
+                    encoding_option = format_el.get('encodingOption')
-                    'vbr': float_or_none(bitrate.get('video')),
+                    bitrate = format_el.get('bitrate')
-                    'abr': float_or_none(bitrate.get('audio')),
+                    formats.append({
-                    'filesize': int_or_none(format_el.get('size')),
+                        'format_id': encoding_option.get('id') or encoding_option.get('name'),
-                    'vcodec': format_el.get('type'),
+                        'url': format_el['source'],
-                    'ext': determine_ext(url, 'mp4'),
+                        'width': int_or_none(encoding_option.get('width')),
-                })
+                        'height': int_or_none(encoding_option.get('height')),
                        'vbr': float_or_none(bitrate.get('video')),
                        'abr': float_or_none(bitrate.get('audio')),
                        'filesize': int_or_none(format_el.get('size')),
                        'vcodec': format_el.get('type'),
                        'ext': determine_ext(url, 'mp4'),
                    })
        if formats:
            self._sort_formats(formats)
        return formats
-    def _extract_video_info(self, vid, key):
+    def _parse_video_info(self, play_data, vid):
        play_data = self._download_json(
            'http://global.apis.naver.com/linetv/rmcnmv/vod_play_videoInfo.json?' + compat_urllib_parse.urlencode({'videoId': vid, 'key': key}),
            vid, 'Downloading video info')
        meta = play_data.get('meta')
-        user = meta.get('user')
+        user = meta.get('user', {})
        thumbnails = []
-        for thumbnail in play_data['thumbnails']['list']:
+        for thumbnail in play_data.get('thumbnails', {}).get('list', []):
            thumbnails.append({'url': thumbnail['source']})
-        formats = self._extract_video_formats(play_data['videos']['list'])
+        subtitles = {}
-        if not formats:
+        for caption in play_data.get('captions', {}).get('list', []):
-            video_info = self._download_json(
+            subtitles[caption['language']] = [
-                'http://serviceapi.rmcnmv.naver.com/mobile/getVideoInfo.nhn?' + compat_urllib_parse.urlencode({'videoId': vid, 'inKey': key, 'protocol': 'http'}),
+                {'ext': determine_ext(caption['source'], default_ext='vtt'),
-                vid, 'Downloading video info')
+                 'url': caption['source']}]
-            formats = self._extract_video_formats(video_info['videos']['list'])
+
        formats = self._extract_video_formats(play_data['videos']['list'] + play_data.get('streams', []), vid)
        return {
            'id': vid,
-            'title': meta['subject'],
+            'title': meta.get('subject'),
            'formats': formats,
            'thumbnail': meta.get('cover', {}).get('source'),
            'thumbnails': thumbnails,
@ -135,6 +139,18 @@ class NaverIE(InfoExtractor):
            'uploader': user.get('name'),
        }
    def _extract_video_info(self, vid, key):
        play_data = self._download_json(
            'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?' + compat_urllib_parse.urlencode({'videoId': vid, 'key': key}),
            vid, 'Downloading video info')
        info = self._parse_video_info(play_data, vid)
        if not info['formats']:
            play_data = self._download_json(
                'http://serviceapi.rmcnmv.naver.com/mobile/getVideoInfo.nhn?' + compat_urllib_parse.urlencode({'videoId': vid, 'inKey': key, 'protocol': 'http'}),
                vid, 'Downloading video info')
            info['formats'] = self._extract_video_formats(play_data['videos']['list'] + play_data.get('streams', []), vid)
        return info
    def _extract_id_and_key(self, webpage):
        m_id = re.search(r'(?s)new\s+nhn.rmcnmv.RMCVideoPlayer\(\s*["\']([^"\']+)["\']\s*,\s*(?:{[^}]*?value[^:]*?:\s*?)?["\']([^"\']+)["\']', webpage)
        if not m_id:
--- a/youtube_dl/extractor/vlive.py
+++ b/youtube_dl/extractor/vlive.py
@ -6,19 +6,19 @@ from hashlib import sha1
 from base64 import b64encode
 from time import time
-from .common import InfoExtractor
+from .naver import NaverIE
 from ..utils import (
    ExtractorError,
-    determine_ext
+    int_or_none,
 )
 from ..compat import compat_urllib_parse
-class VLiveIE(InfoExtractor):
+class VLiveIE(NaverIE):
    IE_NAME = 'vlive'
    # www.vlive.tv/video/ links redirect to m.vlive.tv/video/ for mobile devices
    _VALID_URL = r'https?://(?:(www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://m.vlive.tv/video/1326',
        'md5': 'cc7314812855ce56de70a06a27314983',
        'info_dict': {
@ -27,60 +27,55 @@ class VLiveIE(InfoExtractor):
            'title': '[V] Girl\'s Day\'s Broadcast',
            'creator': 'Girl\'s Day',
        },
-    }
+    }]
    _SECRET = 'rFkwZet6pqk1vQt6SxxUkAHX7YL3lmqzUMrU4IDusTo4jEBdtOhNfT4BYYAdArwH'
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(
+        status = self._download_json(
-            'http://m.vlive.tv/video/%s' % video_id,
+            'http://www.vlive.tv/video/status?videoSeq=%s' % video_id,
-            video_id, note='Download video page')
+            video_id, note='Download status metadata')
-        title = self._og_search_title(webpage)
+        vid = status.get('vodId')
-        thumbnail = self._og_search_thumbnail(webpage)
+        if vid:
-        creator = self._html_search_regex(
+            key = status.get('vodInKey')
-            r'<span[^>]+class="name">([^<>]+)</span>', webpage, 'creator')
+            if not key:
-
+                key = self._download_webpage('http://www.vlive.tv/video/inkey?vodId=%s' % vid, video_id)
-        url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id
+            if key:
-        msgpad = '%.0f' % (time() * 1000)
+                video_info = self._extract_video_info(vid, key)
-        md = b64encode(
+        elif status['status'] not in ('CANCELED', 'COMING_SOON', 'NOT_FOUND'):
-            hmac.new(self._SECRET.encode('ascii'),
+            webpage = self._download_webpage(
-                     (url[:255] + msgpad).encode('ascii'), sha1).digest()
+                'http://m.vlive.tv/video/%s' % video_id,
-        )
+                video_id, note='Download video page')
-        url += '&' + compat_urllib_parse.urlencode({'msgpad': msgpad, 'md': md})
+            title = self._og_search_title(webpage)
-        playinfo = self._download_json(url, video_id, 'Downloading video json')
+            thumbnail = self._og_search_thumbnail(webpage)
-
+            creator = self._html_search_regex(
-        if playinfo.get('message', '') != 'success':
+                r'<span[^>]+class="name">([^<>]+)</span>', webpage, 'creator')
-            raise ExtractorError(playinfo.get('message', 'JSON request unsuccessful'))
+            url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id
-
+            msgpad = '%.0f' % (time() * 1000)
-        if not playinfo.get('result'):
+            md = b64encode(
-            raise ExtractorError('No videos found.')
+                hmac.new(self._SECRET.encode('ascii'),
-
+                         (url[:255] + msgpad).encode('ascii'), sha1).digest()
-        formats = []
+            )
-        for vid in playinfo['result'].get('videos', {}).get('list', []):
+            url += '&' + compat_urllib_parse.urlencode({'msgpad': msgpad, 'md': md})
-            formats.append({
+            playinfo = self._download_json(url, video_id, 'Downloading video json')
-                'url': vid['source'],
+            if playinfo.get('message', '') != 'success':
-                'ext': 'mp4',
+                raise ExtractorError(playinfo.get('message', 'JSON request unsuccessful'))
-                'abr': vid.get('bitrate', {}).get('audio'),
+            result = playinfo.get('result')
-                'vbr': vid.get('bitrate', {}).get('video'),
+            if not result:
-                'format_id': vid['encodingOption']['name'],
+                raise ExtractorError('No videos found.')
-                'height': vid.get('height'),
+            video_info = self._parse_video_info(result, video_id)
-                'width': vid.get('width'),
+            video_info.update({
                'title': title,
                'thumbnail': thumbnail,
                'creator': creator,
            })
-        self._sort_formats(formats)
+        if video_info:
-
+            video_info.update({
-        subtitles = {}
+                'id': video_id,
-        for caption in playinfo['result'].get('captions', {}).get('list', []):
+                'view_count': int_or_none(status.get('playCount')),
-            subtitles[caption['language']] = [
+                'likes': int_or_none(status.get('likeCount')),
-                {'ext': determine_ext(caption['source'], default_ext='vtt'),
+            })
-                 'url': caption['source']}]
+            return video_info
-
+        raise ExtractorError(status['status'])
        return {
            'id': video_id,
            'title': title,
            'creator': creator,
            'thumbnail': thumbnail,
            'formats': formats,
            'subtitles': subtitles,
        }