[Kakao] Add support for playlist and channel

This commit is contained in:
kjy00302 2017-10-13 22:12:14 +09:00
parent 414e709405
commit 016d2fb975
2 changed files with 147 additions and 5 deletions

View File

@ -484,7 +484,11 @@ from .jove import JoveIE
from .joj import JojIE from .joj import JojIE
from .jwplatform import JWPlatformIE from .jwplatform import JWPlatformIE
from .jpopsukitv import JpopsukiIE from .jpopsukitv import JpopsukiIE
from .kakao import KakaoIE from .kakao import (
KakaoIE,
KakaoPlaylistIE,
KakaoChannelIE,
)
from .kaltura import KalturaIE from .kaltura import KalturaIE
from .kamcord import KamcordIE from .kamcord import KamcordIE
from .kanalplay import KanalPlayIE from .kanalplay import KanalPlayIE

View File

@ -2,18 +2,21 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
ExtractorError
) )
class KakaoIE(InfoExtractor): class KakaoIE(InfoExtractor):
_VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)' _VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)'
_API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks' _API_BASE = 'http://tv.kakao.com/api/v1/ft'
_TESTS = [{ _TESTS = [{
'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083', 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
@ -45,6 +48,17 @@ class KakaoIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
playlist_id = self._search_regex(r'playlistId=(\d+)', url, 'channel_id', default=None)
if playlist_id:
if not self._downloader.params.get('noplaylist'):
chan_id = self._search_regex(r'channel/(\d+)', url, 'playlist_id')
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
return self.url_result(
'http://tv.kakao.com/channel/%s/playlist/%s' % (chan_id, playlist_id)
)
else:
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
player_header = { player_header = {
'Referer': update_url_query( 'Referer': update_url_query(
'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, { 'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, {
@ -67,7 +81,7 @@ class KakaoIE(InfoExtractor):
query = QUERY_COMMON.copy() query = QUERY_COMMON.copy()
query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList' query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList'
impress = self._download_json( impress = self._download_json(
'%s/%s/impress' % (self._API_BASE, video_id), '%s/cliplinks/%s/impress' % (self._API_BASE, video_id),
video_id, 'Downloading video info', video_id, 'Downloading video info',
query=query, headers=player_header) query=query, headers=player_header)
@ -84,7 +98,7 @@ class KakaoIE(InfoExtractor):
'profile': 'HIGH', 'profile': 'HIGH',
}) })
raw = self._download_json( raw = self._download_json(
'%s/%s/raw' % (self._API_BASE, video_id), '%s/cliplinks/%s/raw' % (self._API_BASE, video_id),
video_id, 'Downloading video formats info', video_id, 'Downloading video formats info',
query=query, headers=player_header) query=query, headers=player_header)
@ -93,7 +107,7 @@ class KakaoIE(InfoExtractor):
try: try:
profile_name = fmt['profile'] profile_name = fmt['profile']
fmt_url_json = self._download_json( fmt_url_json = self._download_json(
'%s/%s/raw/videolocation' % (self._API_BASE, video_id), '%s/cliplinks/%s/raw/videolocation' % (self._API_BASE, video_id),
video_id, video_id,
'Downloading video URL for profile %s' % profile_name, 'Downloading video URL for profile %s' % profile_name,
query={ query={
@ -147,3 +161,127 @@ class KakaoIE(InfoExtractor):
'comment_count': int_or_none(clip.get('commentCount')), 'comment_count': int_or_none(clip.get('commentCount')),
'formats': formats, 'formats': formats,
} }
class KakaoPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/playlist/(?P<id>\d+)'
_TESTS = [{
'url': 'http://tv.kakao.com/channel/2653401/playlist/12305',
'info_dict': {
'id': '12305',
'title': '아는 형님 1회',
},
'params': {
'skip_download': True
},
'playlist_count': 23
}, {
'note': 'Video url with playlist',
'url': 'http://tv.kakao.com/channel/2657529/cliplink/301795620?playlistId=71340&metaObjectType=Playlist',
'info_dict': {
'id': '71340',
'title': '오버워치 단편',
},
'params': {
'skip_download': True
},
'playlist_mincount': 90
}, {
'note': 'Video url with playlist, but with --no-playlist ',
'url': 'http://tv.kakao.com/channel/2657529/cliplink/301795620?playlistId=71340&metaObjectType=Playlist',
'info_dict': {
'id': '301795620',
'ext': 'mp4',
'title': '신영웅 떡밥 자세히 파헤치기',
'upload_date': '20170224',
'uploader_id': 2657529,
'uploader': '게임친구 롤큐',
'timestamp': 1487936269
},
'params': {
'skip_download': True,
'noplaylist': True
}
}]
def _real_extract(self, url):
list_id = self._match_id(url)
webpage = self._download_webpage(url, list_id)
try:
list_name = self._html_search_regex('class="loss_word tit_epiname"\>(.*)', webpage, 'list title')
except ExtractorError:
raise ExtractorError('This playlist is empty', expected=True)
listelement = self._search_regex('(\<ul class="list_vertical" data-playlist-id.*?\<\/ul\>)', webpage, 'lists', flags=re.DOTALL)
entries = []
for entry in re.findall(r'<a href="(.*?)\?', listelement, re.DOTALL):
url = 'http://tv.kakao.com' + entry
entries.append(self.url_result(url))
return self.playlist_result(entries, list_id, list_name)
class KakaoChannelIE(InfoExtractor):
_VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<id>\d+)?/(?!(cliplink|playlist))'
_TESTS = [{
'url': 'http://tv.kakao.com/channel/2685195/',
'info_dict': {
'id': '2685195',
'title': 'Mr.아재의 만들기'
},
'params': {
'skip_download': True,
},
'playlist_mincount': 250
}, {
'note': 'This Channel has over 10k videos',
'url': 'http://tv.kakao.com/channel/19635/video',
'info_dict': {
'id': '19635',
'title': 'iHQ'
},
'params': {
'skip_download': True,
},
'playlist_mincount': 11000
}]
def _real_extract(self, url):
channel_id = self._match_id(url)
channel_info = self._download_json(
'%s/channels/%s/' % (KakaoIE._API_BASE, channel_id),
channel_id,
note='Downloading channel info',
)
channel_name = channel_info.get('name')
channel_description = channel_info.get('description')
query = {
'sort': 'CreateTime',
'fulllevels': 'clipLinkList,liveLinkList',
'fields': 'ccuCount,thumbnailUri,-user,-clipChapterThumbnailList,-tagList',
'size': '200',
'page': 1
}
hasmore = True
entries = []
while hasmore:
videolist = self._download_json(
'%s/channels/%s/videolinks' % (KakaoIE._API_BASE, channel_id),
channel_id,
note='Downloading video list %s' % query.get('page'),
query=query
)
hasmore = videolist.get('hasMore')
query['page'] += 1
for clip in videolist.get('clipLinkList'):
entries.append(self.url_result('http://tv.kakao.com/v/%s' % clip.get('id')))
return self.playlist_result(
entries,
channel_id,
channel_name,
channel_description)