From 8bc3881be7604d51ce8512134f9211ffbe7d1f7f Mon Sep 17 00:00:00 2001 From: Date: Tue, 25 Jul 2017 23:48:30 +1000 Subject: [PATCH] init version. could download track and album --- youtube_dl/extractor/ximalaya.py | 131 +++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 youtube_dl/extractor/ximalaya.py diff --git a/youtube_dl/extractor/ximalaya.py b/youtube_dl/extractor/ximalaya.py new file mode 100644 index 000000000..00e343296 --- /dev/null +++ b/youtube_dl/extractor/ximalaya.py @@ -0,0 +1,131 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +import re + + +class XimalayaBaseIE(InfoExtractor): + _VALID_URL = r'https?://www\.ximalaya\.com/(?P[0-9]+)/sound/(?P[0-9]+)' + + _GEO_COUNTRIES = ['CN'] + + def _extract_track(self, item_id): + # http://www.ximalaya.com/tracks/44404156.json + + item_info = self._download_json( + 'http://www.ximalaya.com/tracks/' + item_id + '.json', item_id, encoding='utf-8') + return { + 'id': item_info.get('id'), + 'url': item_info.get('play_path'), + 'title': item_info.get('title'), + 'creator': item_info.get('nickname'), + 'album': item_info.get('album_title') or item_info.get('title'), + 'artist': item_info.get('nickname'), + } + + +class XimalayaSongIE(XimalayaBaseIE): + IE_NAME = 'Ximalaya:song' + IE_DEST = '喜马拉雅 - 声音' + + _VALID_URL = r'https?://www\.ximalaya\.com/[0-9]+/sound/(?P[0-9]+)' + + _TEST = [{ + 'url': 'http://www.ximalaya.com/20924760/sound/44404156', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + "id": 44404156, + "play_path_64": "http://audio.xmcdn.com/group31/M07/17/FF/wKgJX1lvHLWyLhhQAD-yGZR0pZM415.m4a", + "play_path_32": "http://audio.xmcdn.com/group30/M0B/2D/8F/wKgJXllvIQ7SaqGFABhdLSg2RAk021.m4a", + "play_path": "http://audio.xmcdn.com/group31/M07/17/FF/wKgJX1lvHLWyLhhQAD-yGZR0pZM415.m4a", + "duration": 515, + "title": "\u6210\u4e3a\u8427\u5cf0\uff1a\u91d1\u5eb8\u7b14\u4e0b\u7684\u7537\u6027\u8fdb\u5316\u53f2", + "nickname": "\u4e09\u8054\u751f\u6d3b\u5468\u520a", + "uid": 20924760, + "waveform": "group31/M07/17/FF/wKgJX1lvHLCjOxmLAAAKOKKeWgA0908.js", + "upload_id": "u_45633516", + "cover_url": "http://fdfs.xmcdn.com/group25/M07/4C/C2/wKgJNlguXkmxdj2zAACSQPpffck622.jpg", + "cover_url_142": "http://fdfs.xmcdn.com/group25/M07/4C/C2/wKgJNlguXkmxdj2zAACSQPpffck622_web_large.jpg", + "formatted_created_at": "7\u670819\u65e5 16:53", + "is_favorited": 'false', + "play_count": 30628, + "comments_count": 8, + "shares_count": 2, + "favorites_count": 42, + "album_id": 376177, + "album_title": "\u4e09\u8054\u2022\u542c\u5468\u520a", + "intro": 'null', + "have_more_intro": 'false', + "time_until_now": "4\u5929\u524d", + "category_name": "news", + "category_title": "\u5934\u6761", + "played_secs": 'null', + "is_paid": 'false', + "is_free": 'null', + "price": 'null', + "discounted_price": 'null' + } + }] + + def _real_extract(self, url): + return self._extract_track(self._match_id(url)) + + +class XimalayaAlbumIE(XimalayaBaseIE): + IE_NAME = 'Ximalaya:album' + IE_DEST = '喜马拉雅 - 专辑' + + _VALID_URL = r'http://www\.ximalaya\.com/[0-9]+/album/(?P[0-9]+)' + + def next_page(self, url): + webpage = self._download_webpage(url, self._match_id(url)) + + entries = re.findall( + r'
  • ', + webpage) + + # next page + # r']](.*)]]', webpage, + 'album name') + + entries = re.findall( + r'
  • ', + webpage) + + # next page + # r'