From 0a2bf0e1b5a1e8caf568317d0766ebe5fe3383d0 Mon Sep 17 00:00:00 2001 From: Namnamseo <0201ssw+github@gmail.com> Date: Thu, 24 Aug 2017 11:32:24 +0900 Subject: [PATCH 1/5] [kakao] Add kakao support --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/kakao.py | 138 +++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 youtube_dl/extractor/kakao.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 17048fd6e..3292dfc26 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -481,6 +481,7 @@ from .jove import JoveIE from .joj import JojIE from .jwplatform import JWPlatformIE from .jpopsukitv import JpopsukiIE +from .kakao import KakaoIE from .kaltura import KalturaIE from .kamcord import KamcordIE from .kanalplay import KanalPlayIE diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py new file mode 100644 index 000000000..717519cb2 --- /dev/null +++ b/youtube_dl/extractor/kakao.py @@ -0,0 +1,138 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, +) +import datetime + + +class KakaoIE(InfoExtractor): + _VALID_URL = r'https?://tv.kakao.com/channel/(?P\d+)/cliplink/(?P\d+)' + IE_NAME = 'kakao.com' + + _TESTS = [{ + 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083', + 'md5': '702b2fbdeb51ad82f5c904e8c0766340', + 'info_dict': { + 'id': '301965083', + 'ext': 'mp4', + 'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』', + 'uploader_id': 2671005, + 'uploader': '그랑그랑이', + } + }, { + 'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180', + 'md5': 'a8917742069a4dd442516b86e7d66529', + 'info_dict': { + 'id': '300103180', + 'ext': 'mp4', + 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회', + 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)', + 'uploader_id': 2653210, + 'uploader': '쇼 음악중심', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + # Player URL, to be used in Referer header + player_url = 'http://tv.kakao.com/embed/player/cliplink/' + video_id + \ + '?service=kakao_tv&autoplay=1&profile=HIGH&wmode=transparent' + player_header = {'Referer': player_url} + + # Request Impress, which contains video information + impress = self._download_json( + 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/impress' % video_id, + video_id, 'Downloading video info', + query={ + 'player': 'monet_html5', + 'referer': url, + 'uuid': '', + 'service': 'kakao_tv', + 'section': '', + 'dteType': 'PC', + 'fields': 'clipLink,clip,channel,hasPlusFriend,-service,-tagList' + }, headers=player_header) + + clipLink = impress['clipLink'] + + # Now we request Raw, which contains infos about video files. + tid = impress.get('tid', '') + raw = self._download_json( + 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw' % video_id, + video_id, 'Downloading video formats info', + query={ + 'player': 'monet_html5', + 'referer': url, + 'uuid': '', + 'service': 'kakao_tv', + 'section': '', + 'tid': tid, + 'profile': 'HIGH', + 'dteType': 'PC', + }, headers=player_header) + + formats = [] + for fmt in raw['outputList']: + profile_name = fmt['profile'] + # The following request is called when user changes the video quality. + # We simulate it here. + fmt_url_json = self._download_json( + 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw/videolocation' % video_id, + video_id, 'Downloading video URL for profile %s' % profile_name, + query={ + 'service': 'kakao_tv', + 'section': '', + 'tid': tid, + 'profile': profile_name + }, headers=player_header) + fmt_url = fmt_url_json['url'] + + formats.append({ + 'url': fmt_url, + 'format_id': profile_name, + 'width': int_or_none(fmt.get('width')), + 'height': int_or_none(fmt.get('height')), + 'format_note': fmt.get('label', None), + 'filesize': int_or_none(fmt.get('filesize')) + }) + + self._sort_formats(formats) + + clip = clipLink['clip'] + # Parse thumbnails. + top_thumbnail = clip.get('thumbnailUrl', None) + thumbs = [] + for thumb in clip.get('clipChapterThumbnailList', []): + thumbs.append({ + 'url': thumb['thumbnailUrl'], + 'id': str(thumb['timeInSec']), + 'preference': -1 if thumb['isDefault'] else 0 + }) + # Parse upload date. + upload_date = None + try: + upload_date = datetime.datetime.strptime(clipLink['create_time'], '%Y-%m-%d %H:%M:%S') + upload_date = upload_date.strftime('%Y%m%d') + except (ValueError, KeyError): + pass + + return { + 'id': video_id, + 'title': clip['title'], + 'formats': formats, + 'thumbnail': top_thumbnail, + 'thumbnails': thumbs, + 'description': clip.get('description'), + 'uploader': clipLink['channel'].get('name'), + 'upload_date': upload_date, + 'uploader_id': clipLink.get('channelId'), + 'duration': int_or_none(clip.get('duration')), + 'view_count': int_or_none(clip.get('playCount')), + 'like_count': int_or_none(clip.get('likeCount')), + 'comment_count': int_or_none(clip.get('commentCount')), + } From d266a505c1d78a9e53b2abe25b654d9e5c145055 Mon Sep 17 00:00:00 2001 From: Namnamseo <0201ssw+github@gmail.com> Date: Thu, 24 Aug 2017 11:48:06 +0900 Subject: [PATCH 2/5] [kakao] Fix parsing creation date --- youtube_dl/extractor/kakao.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py index 717519cb2..430cc7792 100644 --- a/youtube_dl/extractor/kakao.py +++ b/youtube_dl/extractor/kakao.py @@ -116,7 +116,7 @@ class KakaoIE(InfoExtractor): # Parse upload date. upload_date = None try: - upload_date = datetime.datetime.strptime(clipLink['create_time'], '%Y-%m-%d %H:%M:%S') + upload_date = datetime.datetime.strptime(clipLink['createTime'], '%Y-%m-%d %H:%M:%S') upload_date = upload_date.strftime('%Y%m%d') except (ValueError, KeyError): pass From cd0c661757e428ba9d34fbd6b82ff8415ed22c85 Mon Sep 17 00:00:00 2001 From: Namnamseo <0201ssw+github@gmail.com> Date: Thu, 24 Aug 2017 12:18:08 +0900 Subject: [PATCH 3/5] [kakao] Add creation dates in tests --- youtube_dl/extractor/kakao.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py index 430cc7792..621033a92 100644 --- a/youtube_dl/extractor/kakao.py +++ b/youtube_dl/extractor/kakao.py @@ -22,6 +22,7 @@ class KakaoIE(InfoExtractor): 'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』', 'uploader_id': 2671005, 'uploader': '그랑그랑이', + 'upload_date': '20170227' } }, { 'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180', @@ -33,6 +34,7 @@ class KakaoIE(InfoExtractor): 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)', 'uploader_id': 2653210, 'uploader': '쇼 음악중심', + 'upload_date': '20170129' } }] From 82c9def6c61570a4c8f1a7b15718bb639f4c2222 Mon Sep 17 00:00:00 2001 From: Namnamseo <0201ssw+github@gmail.com> Date: Fri, 25 Aug 2017 16:30:38 +0900 Subject: [PATCH 4/5] [kakao] Fix issues --- youtube_dl/extractor/kakao.py | 80 ++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py index 621033a92..2854973e5 100644 --- a/youtube_dl/extractor/kakao.py +++ b/youtube_dl/extractor/kakao.py @@ -5,8 +5,9 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( int_or_none, + compat_str, + unified_timestamp, ) -import datetime class KakaoIE(InfoExtractor): @@ -22,7 +23,8 @@ class KakaoIE(InfoExtractor): 'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』', 'uploader_id': 2671005, 'uploader': '그랑그랑이', - 'upload_date': '20170227' + 'timestamp': 1488160199, + 'upload_date': '20170227', } }, { 'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180', @@ -34,7 +36,8 @@ class KakaoIE(InfoExtractor): 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)', 'uploader_id': 2653210, 'uploader': '쇼 음악중심', - 'upload_date': '20170129' + 'timestamp': 1485684628, + 'upload_date': '20170129', } }] @@ -62,7 +65,7 @@ class KakaoIE(InfoExtractor): clipLink = impress['clipLink'] - # Now we request Raw, which contains infos about video files. + # Raw contains informations regarding downloading video files. tid = impress.get('tid', '') raw = self._download_json( 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw' % video_id, @@ -76,52 +79,51 @@ class KakaoIE(InfoExtractor): 'tid': tid, 'profile': 'HIGH', 'dteType': 'PC', - }, headers=player_header) + }, headers=player_header, fatal=False) formats = [] - for fmt in raw['outputList']: - profile_name = fmt['profile'] - # The following request is called when user changes the video quality. - # We simulate it here. - fmt_url_json = self._download_json( - 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw/videolocation' % video_id, - video_id, 'Downloading video URL for profile %s' % profile_name, - query={ - 'service': 'kakao_tv', - 'section': '', - 'tid': tid, - 'profile': profile_name - }, headers=player_header) - fmt_url = fmt_url_json['url'] + for fmt in raw.get('outputList', []): + try: + profile_name = fmt['profile'] + # The following request is called when user changes the video quality. + # We simulate it here. + fmt_url_json = self._download_json( + 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw/videolocation' % video_id, + video_id, 'Downloading video URL for profile %s' % profile_name, + query={ + 'service': 'kakao_tv', + 'section': '', + 'tid': tid, + 'profile': profile_name + }, headers=player_header, fatal=False) + fmt_url = fmt_url_json['url'] - formats.append({ - 'url': fmt_url, - 'format_id': profile_name, - 'width': int_or_none(fmt.get('width')), - 'height': int_or_none(fmt.get('height')), - 'format_note': fmt.get('label', None), - 'filesize': int_or_none(fmt.get('filesize')) - }) + formats.append({ + 'url': fmt_url, + 'format_id': profile_name, + 'width': int_or_none(fmt.get('width')), + 'height': int_or_none(fmt.get('height')), + 'format_note': fmt.get('label'), + 'filesize': int_or_none(fmt.get('filesize')) + }) + except KeyError: + pass self._sort_formats(formats) clip = clipLink['clip'] # Parse thumbnails. - top_thumbnail = clip.get('thumbnailUrl', None) + top_thumbnail = clip.get('thumbnailUrl') thumbs = [] for thumb in clip.get('clipChapterThumbnailList', []): thumbs.append({ - 'url': thumb['thumbnailUrl'], - 'id': str(thumb['timeInSec']), - 'preference': -1 if thumb['isDefault'] else 0 + 'url': thumb.get('thumbnailUrl'), + 'id': compat_str(thumb.get('timeInSec')), + 'preference': -1 if thumb.get('isDefault') else 0 }) + # Parse upload date. - upload_date = None - try: - upload_date = datetime.datetime.strptime(clipLink['createTime'], '%Y-%m-%d %H:%M:%S') - upload_date = upload_date.strftime('%Y%m%d') - except (ValueError, KeyError): - pass + upload_date = unified_timestamp(clipLink.get('createTime')) return { 'id': video_id, @@ -130,8 +132,8 @@ class KakaoIE(InfoExtractor): 'thumbnail': top_thumbnail, 'thumbnails': thumbs, 'description': clip.get('description'), - 'uploader': clipLink['channel'].get('name'), - 'upload_date': upload_date, + 'uploader': clipLink.get('channel', {}).get('name'), + 'timestamp': upload_date, 'uploader_id': clipLink.get('channelId'), 'duration': int_or_none(clip.get('duration')), 'view_count': int_or_none(clip.get('playCount')), From 3424014a9bb85746e776ce3dbae105b3c8be5e2d Mon Sep 17 00:00:00 2001 From: Namnamseo <0201ssw+github@gmail.com> Date: Sun, 3 Sep 2017 20:20:24 +0900 Subject: [PATCH 5/5] [kakao] Fix issues - Remove unnecessary comments - Parse video info first - Check validity of fmt_url_json --- youtube_dl/extractor/kakao.py | 46 +++++++++++++++++------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py index 2854973e5..0caa41e9e 100644 --- a/youtube_dl/extractor/kakao.py +++ b/youtube_dl/extractor/kakao.py @@ -44,12 +44,10 @@ class KakaoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - # Player URL, to be used in Referer header player_url = 'http://tv.kakao.com/embed/player/cliplink/' + video_id + \ '?service=kakao_tv&autoplay=1&profile=HIGH&wmode=transparent' player_header = {'Referer': player_url} - # Request Impress, which contains video information impress = self._download_json( 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/impress' % video_id, video_id, 'Downloading video info', @@ -64,8 +62,20 @@ class KakaoIE(InfoExtractor): }, headers=player_header) clipLink = impress['clipLink'] + clip = clipLink['clip'] + + video_info = { + 'id': video_id, + 'title': clip['title'], + 'description': clip.get('description'), + 'uploader': clipLink.get('channel', {}).get('name'), + 'uploader_id': clipLink.get('channelId'), + 'duration': int_or_none(clip.get('duration')), + 'view_count': int_or_none(clip.get('playCount')), + 'like_count': int_or_none(clip.get('likeCount')), + 'comment_count': int_or_none(clip.get('commentCount')), + } - # Raw contains informations regarding downloading video files. tid = impress.get('tid', '') raw = self._download_json( 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw' % video_id, @@ -85,8 +95,6 @@ class KakaoIE(InfoExtractor): for fmt in raw.get('outputList', []): try: profile_name = fmt['profile'] - # The following request is called when user changes the video quality. - # We simulate it here. fmt_url_json = self._download_json( 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw/videolocation' % video_id, video_id, 'Downloading video URL for profile %s' % profile_name, @@ -96,8 +104,11 @@ class KakaoIE(InfoExtractor): 'tid': tid, 'profile': profile_name }, headers=player_header, fatal=False) - fmt_url = fmt_url_json['url'] + if fmt_url_json is None: + continue + + fmt_url = fmt_url_json['url'] formats.append({ 'url': fmt_url, 'format_id': profile_name, @@ -110,9 +121,8 @@ class KakaoIE(InfoExtractor): pass self._sort_formats(formats) + video_info['formats'] = formats - clip = clipLink['clip'] - # Parse thumbnails. top_thumbnail = clip.get('thumbnailUrl') thumbs = [] for thumb in clip.get('clipChapterThumbnailList', []): @@ -121,22 +131,10 @@ class KakaoIE(InfoExtractor): 'id': compat_str(thumb.get('timeInSec')), 'preference': -1 if thumb.get('isDefault') else 0 }) + video_info['thumbnail'] = top_thumbnail + video_info['thumbnails'] = thumbs - # Parse upload date. upload_date = unified_timestamp(clipLink.get('createTime')) + video_info['timestamp'] = upload_date - return { - 'id': video_id, - 'title': clip['title'], - 'formats': formats, - 'thumbnail': top_thumbnail, - 'thumbnails': thumbs, - 'description': clip.get('description'), - 'uploader': clipLink.get('channel', {}).get('name'), - 'timestamp': upload_date, - 'uploader_id': clipLink.get('channelId'), - 'duration': int_or_none(clip.get('duration')), - 'view_count': int_or_none(clip.get('playCount')), - 'like_count': int_or_none(clip.get('likeCount')), - 'comment_count': int_or_none(clip.get('commentCount')), - } + return video_info