From 8ea499fa2a20dcb41e7443e62eb63a483f3576b8 Mon Sep 17 00:00:00 2001 From: "Dr. PO" Date: Mon, 21 Aug 2017 21:11:40 +0800 Subject: [PATCH 1/4] [acfun] Add support for AcFun.tv --- ChangeLog | 1 + docs/supportedsites.md | 4 + youtube_dl/extractor/acfun.py | 570 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 8 + 4 files changed, 583 insertions(+) create mode 100644 youtube_dl/extractor/acfun.py diff --git a/ChangeLog b/ChangeLog index c07cb9648..911bfd44e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,6 +7,7 @@ Extractors + [liveleak] Support multi-video pages (#6542) + [liveleak] Support another liveleak embedding pattern (#13336) * [cda] Fix extraction (#13935) ++ [acfun] Add support for AcFun.tv version 2017.08.18 diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1991975cc..61cc02f20 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -27,6 +27,10 @@ - **AcademicEarth:Course** - **acast** - **acast:channel** + - **acfun**: AcFun 弹幕视频网 + - **acfun:album**: AcFun - 合辑 + - **acfun:bangumi**: AcFun - 番剧 + - **acfun:user**: AcFun - UP主投稿 - **AddAnime** - **ADN**: Anime Digital Network - **AdobeTV** diff --git a/youtube_dl/extractor/acfun.py b/youtube_dl/extractor/acfun.py new file mode 100644 index 000000000..df2157eaa --- /dev/null +++ b/youtube_dl/extractor/acfun.py @@ -0,0 +1,570 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import functools +import json +import re +import time + +from .common import ( + InfoExtractor, +) +from ..compat import ( + compat_ord, + compat_parse_qs, + compat_str, + compat_urllib_parse_urlencode, + compat_urllib_parse_urlparse, +) +from ..utils import ( + ExtractorError, + OnDemandPagedList, + dict_get, + float_or_none, + int_or_none, +) + + +_ACFUN_HOST = r''' + (?x)^( + (?:https?://|//) + (?:www\.)?acfun\. + (?:cn|tv|com|tudou\.com) + )''' + + +class _AcFunBaseIE(InfoExtractor): + # api limit, max 20 + _PAGE_SIZE = 20 + + def _acfun_api_raw(self, video_id, url, args, kw, code, msg, data, succ): + headers = kw.pop('headers', {}) + headers['deviceType'] = 1 + kw['headers'] = headers + json_data = self._download_json(url, video_id, *args, **kw) + if succ != json_data[code]: + raise ExtractorError(json_data[msg], expected=True, video_id=video_id) + return json_data[data] + + def _acfun_api_v0(self, video_id, url, *args, **kw): + return self._acfun_api_raw(video_id, url, args, kw, 'status', 'msg', 'data', 200) + + def _acfun_api_v1(self, video_id, url, *args, **kw): + return self._acfun_api_raw(video_id, url, args, kw, 'code', 'message', 'data', 200) + + def _acfun_api_v2(self, video_id, url, *args, **kw): + return self._acfun_api_raw(video_id, url, args, kw, 'errorid', 'errordesc', 'vdata', 0) + + @classmethod + def _match_two(cls, url, group): + match = re.match(cls._VALID_URL, url) + video_id = compat_str(match.group('id')) + second = int_or_none(match.group(group)) + return video_id, second + + @classmethod + def _get_desc(cls, info): + return dict_get(info, ['description', 'intro']) + + +class AcFunVideoIE(_AcFunBaseIE): + IE_NAME = 'acfun:video' + IE_DESC = False # Do not list + # NOTE: require query string, internal use only + _VALID_URL = _ACFUN_HOST + r'/v/(?Pa[bc]\d+_\d+)\?(?P.*)$' + # for document purpose + _TESTS = [] and [{ + 'url': 'http://www.acfun.cn/v/ab1470310_1', + 'note': 'sourceType: youku', + 'info_dict': { + '_type': 'url', + 'id': 'XMTI3ODI4OTU1Ng==', + 'title': '【七月】悠哉日常大王Repeat_第1话', + 'url': 'https://v.youku.com/v_show/id_XMTI3ODI4OTU1Ng==.html', + }, + }, { + 'url': 'http://www.acfun.cn/v/ab1464837_1', + 'note': 'sourceType: youku2', + 'info_dict': { + 'id': 'XNzk4NzQzMzI4', + 'title': '晨曦公主_第1话', + 'url': 'https://v.youku.com/v_show/id_XNzk4NzQzMzI4.html', + }, + }, { + 'url': 'http://www.acfun.cn/v/ab1464814_1', + 'note': 'sourceType: iqiyi', + 'info_dict': { + 'id': '7f3791481e31a308c43d2f129c584ded:319095200', + 'title': '白箱 SHIROBAKO_第1话', + }, + 'skip': 'TODO: how to build url?', + }, { + 'url': 'http://www.acfun.cn/v/ab1464842_1', + 'note': 'sourceType: qq2', + 'info_dict': { + 'id': 'k0015myyz8t', + 'title': '【十月】大图书馆的牧羊人_第1话', + 'url': 'https://v.qq.com/x/page/k0015myyz8t.html', + }, + }, { + 'url': 'http://www.acfun.cn/v/ab1103_1', + 'note': 'sourceType: letv2', + 'info_dict': { + 'id': '20055264', + 'title': '乒乓_第1话', + 'url': 'http://www.le.com/ptv/vplay/20055264.html', + }, + }, { + 'url': 'http://www.acfun.cn/v/ab1470224_1', + 'note': 'sourceType: pptv', + 'info_dict': { + 'id': 'V2GFAmrQQH7hX6M', + 'title': '【四月】关于完全听不懂老公在说什么的事 第二季_第1话', + 'url': 'http://v.pptv.com/show/V2GFAmrQQH7hX6M.html', + }, + }] + + # copied from youku.py (function removed in commit 59ed87c) + @classmethod + def _yk_t(cls, s1, s2): + ls = list(range(256)) + t = 0 + for i in range(256): + t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256 + ls[i], ls[t] = ls[t], ls[i] + s = bytearray() + x, y = 0, 0 + for i in range(len(s2)): + y = (y + 1) % 256 + x = (x + ls[y]) % 256 + ls[x], ls[y] = ls[y], ls[x] + s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256]) + return bytes(s) + + def _acfun_flash_data(self, vid, sign, ref, video_id): + api = 'http://player.acfun.cn/flash_data?vid={vid}&ct=85&ev=3&sign={sign}&time={time}' + flash_data = self._download_json( + api.format(vid=vid, sign=sign, time=int(time.time()*1000)), + video_id, note='Downloading video flash data: vid=%s' % vid, + headers={'Referer': ref}) + encrypted = base64.b64decode(flash_data['data']) + decrypted = self._yk_t('8bdc7e1a', encrypted) + return json.loads(decrypted.decode('utf8')) + + def _acfun_video(self, video, url, title, video_id): + info = self._download_json( + 'http://www.acfun.cn/video/getVideo.aspx?id={}'.format(video[2:]), + video_id, note='Downloading video part info') + if not info['success']: + raise ExtractorError(info['result'], expected=True, video_id=video_id) + sourceType = info['sourceType'] + if 'zhuzhan' == sourceType: + return self._acfun_video_zhuzhan(video, info, url, title, video_id) + sourceId = info['sourceId'] + new_url = None + if sourceType in ('youku', 'youku2'): + new_url = 'https://v.youku.com/v_show/id_{}.html'.format(sourceId) + elif sourceType in ('qq', 'qq2'): + new_url = 'https://v.qq.com/x/page/{}.html'.format(sourceId) + elif sourceType in ('letv', 'letv2'): + new_url = 'http://www.le.com/ptv/vplay/{}.html'.format(sourceId) + elif sourceType in ('pptv'): + sourceId = sourceId.split(':')[1] + new_url = 'http://v.pptv.com/show/{}.html'.format(sourceId) + if new_url: + return self.url_result(new_url, video_id=sourceId, video_title=title) + raise ExtractorError('unsupported sourceType: %s' % sourceType, expected=True, video_id=video_id) + + def _acfun_video_zhuzhan(self, vid, info, url, title, video_id): + flash = self._acfun_flash_data(info['sourceId'], info['encode'], url, video_id) + streams = [stream for stream in flash['stream'] if 'segs' in stream] + streams.sort(key=lambda v: int(v['width'])) + segs_len = [len(stream['segs']) for stream in streams] + same_len = 1 == len(set(segs_len)) + entries = [] + for idx in range(max(segs_len)): + formats = [{ + 'url': stream['segs'][idx]['url'], + 'ext': 'mp4', + 'format_id': stream['stream_type'], + 'width': int_or_none(stream['width']), + 'height': int_or_none(stream['height']), + 'filesize': stream['segs'][idx]['size'], + } for sidx, stream in enumerate(streams) if idx < segs_len[sidx]] + seconds = streams[0]['segs'][idx]['seconds'] if same_len else None + entries.append({ + 'id': '%s_seg%d' % (vid, idx), + 'title': title, + 'formats': formats, + 'duration': float_or_none(seconds), + }) + return { + '_type': 'multi_video', + 'id': video_id, + 'entries': entries, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + parsed_url = compat_urllib_parse_urlparse(url) + query = compat_parse_qs(parsed_url.query) + title = query['title'][0] if 'title' in query else video_id + vid = 'av' + query['vid'][0] + return self._acfun_video(vid, url, title, video_id) + + +class _AcFunVideoListIE(_AcFunBaseIE): + def _acfun_list(self, videos_info, video_id, video_idx): + info = { + 'description': self._get_desc(videos_info), + 'thumbnail': videos_info['cover'], + 'view_count': videos_info['visit']['views'], + 'comment_count': videos_info['visit']['comments'], + 'tags': videos_info.get('tags'), + } + if 'owner' in videos_info: + owner = videos_info['owner'] + info.update({ + 'uploader': owner['name'], + 'uploader_id': owner['id'], + 'uploader_url': 'http://www.acfun.cn/u/%d.aspx' % owner['id'], + }) + if 'releaseDate' in videos_info: + info['timestamp'] = int_or_none(videos_info['releaseDate'], scale=1000) + + entries = [] + for idx, video in enumerate(videos_info['videos']): + url = 'http://www.acfun.cn/v/%s_%d' % (video_id, idx + 1) + title = '%s_%s' % (videos_info['title'], video['title']) + if 'ac' == video_id[:2] and 1 == len(videos_info['videos']): + title = videos_info['title'] + vid = dict_get(video, ['videoId', 'id']) + query_str = compat_urllib_parse_urlencode({ + 'title': title, + 'vid': vid, + }) + entry = { + '_type': 'url_transparent', + 'url': '%s?%s' % (url, query_str), + 'ie_key': AcFunVideoIE.ie_key(), + 'title': title, + } + entry.update(info) + if 'updateTime' in video: + entry['timestamp'] = int_or_none(video['updateTime'], scale=1000) + entries.append(entry) + + if 1 == len(entries): + video_idx = 1 + if video_idx is not None: + return entries[video_idx - 1] + + return self.playlist_result(entries, + video_id, videos_info['title'], self._get_desc(videos_info)) + + +class AcFunIE(_AcFunVideoListIE): + IE_NAME = 'acfun' + IE_DESC = 'AcFun 弹幕视频网' + _VALID_URL = _ACFUN_HOST + r'/v/(?Pac\d+)(?:_(?P\d+))?' + _TESTS = [{ + 'url': 'http://www.acfun.cn/v/ac3704490', + 'playlist_mincount': 23, + 'info_dict': { + 'id': 'ac3704490', + 'title': '广西车神叛逆少年之夺命125合集(搬运)', + 'description': 'md5:4c8bdbc6a8217b8a95c27671f3e6a597', + }, + }, { + 'url': 'http://www.acfun.cn/v/ac3913858_1', + 'info_dict': { + 'id': 'ac3913858_1', + 'title': '中国交通事故合集20170811:每天10分钟最新国内车祸实例,助你提高安全意识', + 'description': 'md5:0cbb9578cb5383d5bc75bfff4984b040', + 'timestamp': 1502543900, + 'uploader_id': 4075269, + 'uploader': '交通事故video', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'av5500038_seg0', + 'ext': 'mp4', + 'title': 'md5:7f843db80b5769311d04622846e71b59', + 'format_id': 'mp4hd2', + 'duration': 189, + }, + }, { + 'info_dict': { + 'id': 'av5500038_seg1', + 'ext': 'mp4', + 'title': 'md5:7f843db80b5769311d04622846e71b59', + 'format_id': 'mp4hd2', + 'duration': 178, + }, + }, { + 'info_dict': { + 'id': 'av5500038_seg2', + 'ext': 'mp4', + 'title': 'md5:7f843db80b5769311d04622846e71b59', + 'format_id': 'mp4hd2', + 'duration': 177, + }, + }, { + 'info_dict': { + 'id': 'av5500038_seg3', + 'ext': 'mp4', + 'title': 'md5:7f843db80b5769311d04622846e71b59', + 'format_id': 'mp4hd2', + 'duration': 118, + }, + }], + 'params': { + 'skip_download': True, + }, + }] + + def _acfun_video_info(self, video_id): + return self._acfun_api_v2(video_id, + 'http://apipc.app.acfun.cn/v2/videos/' + video_id[2:], + note='Downloading video info') + + def _real_extract(self, url): + video_id, video_idx = self._match_two(url, 'idx') + videos_info = self._acfun_video_info(video_id) + return self._acfun_list(videos_info, video_id, video_idx) + + +class AcFunBangumiIE(_AcFunVideoListIE): + IE_NAME = 'acfun:bangumi' + IE_DESC = 'AcFun - 番剧' + _VALID_URL = _ACFUN_HOST + r'/v/(?Pab\d+)(?:_(?P\d+))?' + _TESTS = [{ + 'url': 'http://www.acfun.cn/v/ab1480054', + 'playlist_count': 12, + 'info_dict': { + 'id': 'ab1480054', + 'title': '四叠半神话大系', + 'description': 'md5:9d03a432ba6e84a3155727e36ed5f16a', + }, + }, { + 'url': 'http://www.acfun.cn/v/ab1470396_1', + 'info_dict': { + 'id': 'ab1470396_1', + 'title': '【十月】无论如何都想加入生肖【AcFun独家正版】_第1话', + 'description': 'md5:74f7029bb5615a3efe52f4bef9388d65', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'av2766142_seg0', + 'ext': 'mp4', + 'title': 'md5:922e303fe6e6f21623d3ca72f6b6429f', + 'format_id': 'mp4hd', + 'duration': 359, + }, + }], + 'params': { + 'skip_download': True, + }, + }] + + def _acfun_bangumi_page(self, bangumi_id, pagesize, pagenum): + page = pagenum + 1 + query = { + 'bangumiId': bangumi_id[2:], + 'pageSize': pagesize, + 'pageNo': page, + 'isWeb': 1, + 'order': 2, + } + info = self._acfun_api_v0(bangumi_id, + 'http://www.acfun.cn/bangumi/video/page', + query=query, + note='Downloading Bangumi video info, page=%d' % page) + return info['list'] + + def _acfun_bangumi_info(self, bangumi_id): + bangumi_info = self._acfun_api_v2(bangumi_id, + 'http://apipc.app.acfun.cn/v2/bangumis/' + bangumi_id[2:], + query={'page': '{num:%d,size:%d}' % (1, self._PAGE_SIZE)}, + note='Downloading Bangumi info') + if 'tags' in bangumi_info: + tags = bangumi_info.pop('tags') + bangumi_info['tags'] = [tag['name'] for tag in tags] + return bangumi_info + + def _real_extract(self, url): + bangumi_id, bangumi_idx = self._match_two(url, 'idx') + bangumi_info = self._acfun_bangumi_info(bangumi_id) + paged = OnDemandPagedList( + functools.partial(self._acfun_bangumi_page, bangumi_id, self._PAGE_SIZE), + self._PAGE_SIZE) + bangumi_info['videos'] = paged.getslice() + return self._acfun_list(bangumi_info, bangumi_id, bangumi_idx) + + +class _AcFunListIE(_AcFunBaseIE): + def _acfun_entry(self, video): + return self.url_result( + 'http://www.acfun.cn/v/ac%s' % video['contentId'], + ie=AcFunIE.ie_key(), + video_title=dict_get(video, ['title', 'subtitle']), + ) + + def _acfun_list(self, videos_info, video_id, entries): + return self.playlist_result(entries, video_id, + videos_info.get('title'), self._get_desc(videos_info)) + + +class AcFunUserIE(_AcFunListIE): + IE_NAME = 'acfun:user' + IE_DESC = 'AcFun - UP主投稿' + _VALID_URL = _ACFUN_HOST + r'/u/(?P\d+)\.aspx' + _TESTS = [{ + 'url': 'http://www.acfun.cn/u/90274.aspx', + 'playlist_mincount': 66, + 'info_dict': { + 'id': '90274', + 'title': '极品国产', + 'description': 'md5:e9b7ab94985fdfba527ea25285a60be4', + }, + }] + + def _acfun_user_video_page(self, user_id, pagesize, pagenum): + page = pagenum + 1 + query = { + 'pageNo': page, + 'pageSize': pagesize, + 'userId': user_id, + 'type': 1, + } + info = self._acfun_api_v0(user_id, + 'http://api.app.acfun.cn/apiserver/user/contribution', + query=query, + note='Downloading user videos info, page=%d' % page) + for video in info['page']['list']: + yield self._acfun_entry(video) + + def _acfun_user_info(self, user_id): + info = self._acfun_api_v0(user_id, + 'http://api.app.acfun.cn/apiserver/profile', + query={'userId': user_id}, + note='Downloading user info') + return info['fullUser'] + + def _real_extract(self, url): + user_id = self._match_id(url) + user_info = self._acfun_user_info(user_id) + paged = OnDemandPagedList( + functools.partial(self._acfun_user_video_page, user_id, self._PAGE_SIZE), + self._PAGE_SIZE, use_cache=True) + return self._acfun_list({ + 'title': user_info['username'], + 'description': user_info['signature'], + }, user_id, paged) + + +class _AcFunAlbumIE(_AcFunListIE): + _ACFUN_API_ALBUM = 'http://apipc.app.acfun.cn/albums/' + _ACFUN_ALBUM_CACHE = {} + + def _acfun_album_group_page(self, album_id, group, pagesize, pagenum): + contents = group['contents'] + if 0 == pagenum and len(contents) <= pagesize: + return contents + + group_id = group['groupId'] + page = pagenum + 1 + query = { + 'groupId': group_id, + 'page': '{num:%d,size:%d}' % (page, pagesize), + } + info = self._acfun_api_v1(album_id, + self._ACFUN_API_ALBUM + album_id[2:] + '/contents', + query=query, + note='Downloading Album group info, group=%d, page=%d' % (group_id, page)) + return info['list'] + + def _acfun_album_info(self, album_id): + if album_id in self._ACFUN_ALBUM_CACHE: + return self._ACFUN_ALBUM_CACHE[album_id] + album_info = self._acfun_api_v1(album_id, + self._ACFUN_API_ALBUM + album_id[2:], + note='Downloading Album info') + album_groups = [] + for group in album_info.pop('groups'): + paged = OnDemandPagedList( + functools.partial(self._acfun_album_group_page, album_id, group, self._PAGE_SIZE), + self._PAGE_SIZE) + group['contents'] = paged.getslice() + album_groups.append(group) + album_info['groups'] = album_groups + self._ACFUN_ALBUM_CACHE[album_id] = album_info + return album_info + + +class AcFunAlbumGroupIE(_AcFunAlbumIE): + IE_NAME = 'acfun:albumgroup' + IE_DESC = False # Do not list + _VALID_URL = _ACFUN_HOST + r'/a/(?Paa\d+)\#group=(?P\d+)' + _TESTS = [{ + 'url': 'http://www.acfun.cn/a/aa5001561#group=1', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'ag2680', + 'title': '8分钟家庭锻炼_未分组', + 'description': '8分钟系列,适合无器械锻炼胸肌 腹肌', + }, + }, { + 'url': 'http://www.acfun.cn/a/aa5016734#group=1', + 'playlist_mincount': 34, + 'info_dict': { + 'id': 'ag26859', + 'title': '番剧OPED_未分组', + 'description': 'md5:e0544b43a3f9c918218111cd32d9fdb7', + }, + }] + + def _real_extract(self, url): + album_id, group_idx = self._match_two(url, 'group') + album_info = self._acfun_album_info(album_id) + group_info = album_info['groups'][group_idx - 1] + videos = [self._acfun_entry(v) for v in group_info['contents'] if not v['article']] + return self._acfun_list({ + 'title': '%s_%s' % (album_info['title'], group_info['groupName']), + 'description': self._get_desc(album_info), + }, 'ag{}'.format(group_info['groupId']), videos) + + +class AcFunAlbumIE(_AcFunAlbumIE): + IE_NAME = 'acfun:album' + IE_DESC = 'AcFun - 合辑' + _VALID_URL = _ACFUN_HOST + r'/a/(?Paa\d+)$' + _TESTS = [{ + 'url': 'http://www.acfun.cn/a/aa5001107', + 'playlist_mincount': 4, + 'info_dict': { + 'id': 'aa5001107', + 'title': 'AcFun无聊大作战-视频', + 'description': 'md5:4962d13677feb34eda82f2f98202e1ee', + }, + }, { + 'url': 'http://www.acfun.cn/a/aa5014197', + 'playlist_mincount': 19, + 'info_dict': { + 'id': 'aa5014197', + 'title': '第五届AcFun毁歌祭-视频', + 'description': 'md5:9d729d9127baaf8a0c66fd381a0a3d12', + }, + }] + + def _real_extract(self, url): + album_id = self._match_id(url) + album_info = self._acfun_album_info(album_id) + entries = [self.url_result( + 'http://www.acfun.cn/a/%s#group=%d' % (album_id, idx + 1), + ie=AcFunAlbumGroupIE.ie_key(), + video_title='%s_%s' % (album_info.get('title'), group['groupName']), + ) for idx, group in enumerate(album_info['groups'])] + return self.playlist_result(entries, album_id, + album_info.get('title'), self._get_desc(album_info)) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 17048fd6e..272fa5cbf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -18,6 +18,14 @@ from .acast import ( ACastIE, ACastChannelIE, ) +from .acfun import ( + AcFunAlbumGroupIE, + AcFunAlbumIE, + AcFunBangumiIE, + AcFunIE, + AcFunUserIE, + AcFunVideoIE, +) from .addanime import AddAnimeIE from .adn import ADNIE from .adobetv import ( From efaecff5c75cae14154f3ab23a62fb0da1f75b13 Mon Sep 17 00:00:00 2001 From: "Dr. PO" Date: Mon, 21 Aug 2017 23:18:19 +0800 Subject: [PATCH 2/4] [acfun] possible KeyError? --- youtube_dl/extractor/acfun.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/acfun.py b/youtube_dl/extractor/acfun.py index df2157eaa..9df265ce0 100644 --- a/youtube_dl/extractor/acfun.py +++ b/youtube_dl/extractor/acfun.py @@ -184,6 +184,10 @@ class AcFunVideoIE(_AcFunBaseIE): same_len = 1 == len(set(segs_len)) entries = [] for idx in range(max(segs_len)): + # no KeyError in formats + # 'segs': streams checked that + # idx: checked by segs_len + # ('url', 'size', etc...): api should return these, or the api fail formats = [{ 'url': stream['segs'][idx]['url'], 'ext': 'mp4', @@ -218,11 +222,16 @@ class _AcFunVideoListIE(_AcFunBaseIE): def _acfun_list(self, videos_info, video_id, video_idx): info = { 'description': self._get_desc(videos_info), - 'thumbnail': videos_info['cover'], - 'view_count': videos_info['visit']['views'], - 'comment_count': videos_info['visit']['comments'], + 'thumbnail': videos_info.get('cover'), 'tags': videos_info.get('tags'), + 'timestamp': int_or_none(videos_info.get('releaseDate'), scale=1000) } + if 'visit' in videos_info: + visit = videos_info['visit'] + info.update({ + 'view_count': visit['views'], + 'comment_count': visit['comments'], + }) if 'owner' in videos_info: owner = videos_info['owner'] info.update({ @@ -230,8 +239,6 @@ class _AcFunVideoListIE(_AcFunBaseIE): 'uploader_id': owner['id'], 'uploader_url': 'http://www.acfun.cn/u/%d.aspx' % owner['id'], }) - if 'releaseDate' in videos_info: - info['timestamp'] = int_or_none(videos_info['releaseDate'], scale=1000) entries = [] for idx, video in enumerate(videos_info['videos']): From 40166318d6bb807f209335a9883afdcbac58f749 Mon Sep 17 00:00:00 2001 From: "Dr. PO" Date: Wed, 23 Aug 2017 12:55:12 +0800 Subject: [PATCH 3/4] [acfun] use vid in acfun:video --- youtube_dl/extractor/acfun.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/acfun.py b/youtube_dl/extractor/acfun.py index 9df265ce0..d390e957a 100644 --- a/youtube_dl/extractor/acfun.py +++ b/youtube_dl/extractor/acfun.py @@ -146,21 +146,20 @@ class AcFunVideoIE(_AcFunBaseIE): api = 'http://player.acfun.cn/flash_data?vid={vid}&ct=85&ev=3&sign={sign}&time={time}' flash_data = self._download_json( api.format(vid=vid, sign=sign, time=int(time.time()*1000)), - video_id, note='Downloading video flash data: vid=%s' % vid, - headers={'Referer': ref}) + video_id, note=False, headers={'Referer': ref}) encrypted = base64.b64decode(flash_data['data']) decrypted = self._yk_t('8bdc7e1a', encrypted) return json.loads(decrypted.decode('utf8')) - def _acfun_video(self, video, url, title, video_id): + def _acfun_video(self, vid, url, title, video_id): info = self._download_json( - 'http://www.acfun.cn/video/getVideo.aspx?id={}'.format(video[2:]), - video_id, note='Downloading video part info') + 'http://www.acfun.cn/video/getVideo.aspx?id={}'.format(vid), + video_id, note='Downloading video part info: id=%s' % vid) if not info['success']: raise ExtractorError(info['result'], expected=True, video_id=video_id) sourceType = info['sourceType'] if 'zhuzhan' == sourceType: - return self._acfun_video_zhuzhan(video, info, url, title, video_id) + return self._acfun_video_zhuzhan(vid, info, url, title, video_id) sourceId = info['sourceId'] new_url = None if sourceType in ('youku', 'youku2'): @@ -198,7 +197,7 @@ class AcFunVideoIE(_AcFunBaseIE): } for sidx, stream in enumerate(streams) if idx < segs_len[sidx]] seconds = streams[0]['segs'][idx]['seconds'] if same_len else None entries.append({ - 'id': '%s_seg%d' % (vid, idx), + 'id': 'av%s_seg%d' % (vid, idx), 'title': title, 'formats': formats, 'duration': float_or_none(seconds), @@ -214,7 +213,7 @@ class AcFunVideoIE(_AcFunBaseIE): parsed_url = compat_urllib_parse_urlparse(url) query = compat_parse_qs(parsed_url.query) title = query['title'][0] if 'title' in query else video_id - vid = 'av' + query['vid'][0] + vid = query['vid'][0] return self._acfun_video(vid, url, title, video_id) From 7d896f4df527f72bf9c0b19883f581ecec8b2ed0 Mon Sep 17 00:00:00 2001 From: "Dr. PO" Date: Wed, 23 Aug 2017 13:14:45 +0800 Subject: [PATCH 4/4] [acfun] fix flake8 errors --- youtube_dl/extractor/acfun.py | 53 +++++++++++++++++------------------ 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/acfun.py b/youtube_dl/extractor/acfun.py index d390e957a..96a47cc74 100644 --- a/youtube_dl/extractor/acfun.py +++ b/youtube_dl/extractor/acfun.py @@ -73,7 +73,7 @@ class AcFunVideoIE(_AcFunBaseIE): IE_DESC = False # Do not list # NOTE: require query string, internal use only _VALID_URL = _ACFUN_HOST + r'/v/(?Pa[bc]\d+_\d+)\?(?P.*)$' - # for document purpose + # note some urls for different sourceType _TESTS = [] and [{ 'url': 'http://www.acfun.cn/v/ab1470310_1', 'note': 'sourceType: youku', @@ -145,7 +145,7 @@ class AcFunVideoIE(_AcFunBaseIE): def _acfun_flash_data(self, vid, sign, ref, video_id): api = 'http://player.acfun.cn/flash_data?vid={vid}&ct=85&ev=3&sign={sign}&time={time}' flash_data = self._download_json( - api.format(vid=vid, sign=sign, time=int(time.time()*1000)), + api.format(vid=vid, sign=sign, time=int(time.time() * 1000)), video_id, note=False, headers={'Referer': ref}) encrypted = base64.b64decode(flash_data['data']) decrypted = self._yk_t('8bdc7e1a', encrypted) @@ -183,10 +183,6 @@ class AcFunVideoIE(_AcFunBaseIE): same_len = 1 == len(set(segs_len)) entries = [] for idx in range(max(segs_len)): - # no KeyError in formats - # 'segs': streams checked that - # idx: checked by segs_len - # ('url', 'size', etc...): api should return these, or the api fail formats = [{ 'url': stream['segs'][idx]['url'], 'ext': 'mp4', @@ -266,8 +262,9 @@ class _AcFunVideoListIE(_AcFunBaseIE): if video_idx is not None: return entries[video_idx - 1] - return self.playlist_result(entries, - video_id, videos_info['title'], self._get_desc(videos_info)) + return self.playlist_result( + entries, video_id, + videos_info['title'], self._get_desc(videos_info)) class AcFunIE(_AcFunVideoListIE): @@ -331,8 +328,8 @@ class AcFunIE(_AcFunVideoListIE): }] def _acfun_video_info(self, video_id): - return self._acfun_api_v2(video_id, - 'http://apipc.app.acfun.cn/v2/videos/' + video_id[2:], + return self._acfun_api_v2( + video_id, 'http://apipc.app.acfun.cn/v2/videos/' + video_id[2:], note='Downloading video info') def _real_extract(self, url): @@ -383,15 +380,15 @@ class AcFunBangumiIE(_AcFunVideoListIE): 'isWeb': 1, 'order': 2, } - info = self._acfun_api_v0(bangumi_id, - 'http://www.acfun.cn/bangumi/video/page', + info = self._acfun_api_v0( + bangumi_id, 'http://www.acfun.cn/bangumi/video/page', query=query, note='Downloading Bangumi video info, page=%d' % page) return info['list'] def _acfun_bangumi_info(self, bangumi_id): - bangumi_info = self._acfun_api_v2(bangumi_id, - 'http://apipc.app.acfun.cn/v2/bangumis/' + bangumi_id[2:], + bangumi_info = self._acfun_api_v2( + bangumi_id, 'http://apipc.app.acfun.cn/v2/bangumis/' + bangumi_id[2:], query={'page': '{num:%d,size:%d}' % (1, self._PAGE_SIZE)}, note='Downloading Bangumi info') if 'tags' in bangumi_info: @@ -418,7 +415,8 @@ class _AcFunListIE(_AcFunBaseIE): ) def _acfun_list(self, videos_info, video_id, entries): - return self.playlist_result(entries, video_id, + return self.playlist_result( + entries, video_id, videos_info.get('title'), self._get_desc(videos_info)) @@ -444,16 +442,16 @@ class AcFunUserIE(_AcFunListIE): 'userId': user_id, 'type': 1, } - info = self._acfun_api_v0(user_id, - 'http://api.app.acfun.cn/apiserver/user/contribution', + info = self._acfun_api_v0( + user_id, 'http://api.app.acfun.cn/apiserver/user/contribution', query=query, note='Downloading user videos info, page=%d' % page) for video in info['page']['list']: yield self._acfun_entry(video) def _acfun_user_info(self, user_id): - info = self._acfun_api_v0(user_id, - 'http://api.app.acfun.cn/apiserver/profile', + info = self._acfun_api_v0( + user_id, 'http://api.app.acfun.cn/apiserver/profile', query={'userId': user_id}, note='Downloading user info') return info['fullUser'] @@ -465,9 +463,9 @@ class AcFunUserIE(_AcFunListIE): functools.partial(self._acfun_user_video_page, user_id, self._PAGE_SIZE), self._PAGE_SIZE, use_cache=True) return self._acfun_list({ - 'title': user_info['username'], - 'description': user_info['signature'], - }, user_id, paged) + 'title': user_info['username'], + 'description': user_info['signature'], + }, user_id, paged) class _AcFunAlbumIE(_AcFunListIE): @@ -485,8 +483,8 @@ class _AcFunAlbumIE(_AcFunListIE): 'groupId': group_id, 'page': '{num:%d,size:%d}' % (page, pagesize), } - info = self._acfun_api_v1(album_id, - self._ACFUN_API_ALBUM + album_id[2:] + '/contents', + info = self._acfun_api_v1( + album_id, self._ACFUN_API_ALBUM + album_id[2:] + '/contents', query=query, note='Downloading Album group info, group=%d, page=%d' % (group_id, page)) return info['list'] @@ -494,8 +492,8 @@ class _AcFunAlbumIE(_AcFunListIE): def _acfun_album_info(self, album_id): if album_id in self._ACFUN_ALBUM_CACHE: return self._ACFUN_ALBUM_CACHE[album_id] - album_info = self._acfun_api_v1(album_id, - self._ACFUN_API_ALBUM + album_id[2:], + album_info = self._acfun_api_v1( + album_id, self._ACFUN_API_ALBUM + album_id[2:], note='Downloading Album info') album_groups = [] for group in album_info.pop('groups'): @@ -572,5 +570,6 @@ class AcFunAlbumIE(_AcFunAlbumIE): ie=AcFunAlbumGroupIE.ie_key(), video_title='%s_%s' % (album_info.get('title'), group['groupName']), ) for idx, group in enumerate(album_info['groups'])] - return self.playlist_result(entries, album_id, + return self.playlist_result( + entries, album_id, album_info.get('title'), self._get_desc(album_info))