From 350d7963db671884acd43f56f41bd499efd8e74a Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 30 Apr 2016 11:12:11 +0100 Subject: [PATCH 1/9] [pbs] fix the least bitrate http url construction --- youtube_dl/extractor/pbs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 38cdb9975..75c36a621 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -514,6 +514,8 @@ class PBSIE(InfoExtractor): bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None) if not bitrate: continue + if bitrate == '192k': + bitrate = 'baseline' f = m3u8_format.copy() f.update({ 'url': re.sub(r'\d+k|baseline', bitrate, http_url), From 35cd2f4c253fa9d37b6a253f9f63bfe258d8f334 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 30 Apr 2016 11:31:09 +0100 Subject: [PATCH 2/9] [pbs] extract only the formats that we know that they will be available as http format https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications --- youtube_dl/extractor/pbs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 75c36a621..17c85dd7b 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -512,7 +512,9 @@ class PBSIE(InfoExtractor): if http_url: for m3u8_format in m3u8_formats: bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None) - if not bitrate: + # extract only the formats that we know that they will be available as http format. + # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications + if not bitrate or bitrate not in ('192k', '400k', '800k', '1200k', '2500k'): continue if bitrate == '192k': bitrate = 'baseline' From 7691184a3128bd46544ff49e264322d5e9187fdc Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 30 Apr 2016 12:57:30 +0100 Subject: [PATCH 3/9] [pbs] remove duplicate format --- youtube_dl/extractor/pbs.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 17c85dd7b..35fb1798d 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -514,10 +514,8 @@ class PBSIE(InfoExtractor): bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None) # extract only the formats that we know that they will be available as http format. # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications - if not bitrate or bitrate not in ('192k', '400k', '800k', '1200k', '2500k'): + if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'): continue - if bitrate == '192k': - bitrate = 'baseline' f = m3u8_format.copy() f.update({ 'url': re.sub(r'\d+k|baseline', bitrate, http_url), From e0e9bbb0e9dd92f526b04584c47e6509a73fed04 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 30 Apr 2016 14:02:17 +0100 Subject: [PATCH 4/9] [pbs] extract srt and vtt subtitles --- youtube_dl/extractor/pbs.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 35fb1798d..81918ac6e 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -537,6 +537,19 @@ class PBSIE(InfoExtractor): 'ext': 'ttml', 'url': closed_captions_url, }] + mobj = re.search(r'/(\d+)_Encoded\.dfxp', closed_captions_url) + if mobj: + ttml_caption_suffix, ttml_caption_id = mobj.group(0, 1) + ttml_caption_id = int(ttml_caption_id) + subtitles['en'].extend([{ + 'url': closed_captions_url.replace( + ttml_caption_suffix, '/%d_Encoded.srt' % (ttml_caption_id + 1)), + 'ext': 'srt', + }, { + 'url': closed_captions_url.replace( + ttml_caption_suffix, '/%d_Encoded.vtt' % (ttml_caption_id + 2)), + 'ext': 'vtt', + }]) # info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc) # Try turning it to 'program - title' naming scheme if possible From d41ee7b7745d59d398f37b435146d4036e4a7448 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 30 Apr 2016 19:22:42 +0600 Subject: [PATCH 5/9] [vlive] Pass Referer as bytestring (Closes #9352) --- youtube_dl/extractor/vlive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 7f9e99ec2..a672ea9c5 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -43,7 +43,7 @@ class VLiveIE(InfoExtractor): status_params = self._download_json( 'http://www.vlive.tv/video/status?videoSeq=%s' % video_id, video_id, 'Downloading JSON status', - headers={'Referer': url}) + headers={'Referer': url.encode('utf-8')}) status = status_params.get('status') air_start = status_params.get('onAirStartAt', '') is_live = status_params.get('isLive') From 11fa3d7f997019dfce8b670e1b10042ac2004f69 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 30 Apr 2016 15:41:22 +0100 Subject: [PATCH 6/9] [ted] extract all http formats --- youtube_dl/extractor/ted.py | 45 ++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index cf8851438..aea6a02a7 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -102,9 +102,9 @@ class TEDIE(InfoExtractor): }] _NATIVE_FORMATS = { - 'low': {'preference': 1, 'width': 320, 'height': 180}, - 'medium': {'preference': 2, 'width': 512, 'height': 288}, - 'high': {'preference': 3, 'width': 854, 'height': 480}, + 'low': {'width': 320, 'height': 180}, + 'medium': {'width': 512, 'height': 288}, + 'high': {'width': 854, 'height': 480}, } def _extract_info(self, webpage): @@ -171,15 +171,21 @@ class TEDIE(InfoExtractor): if finfo: f.update(finfo) + http_url = None for format_id, resources in talk_info['resources'].items(): if format_id == 'h264': for resource in resources: + h264_url = resource.get('file') + if not h264_url: + continue bitrate = int_or_none(resource.get('bitrate')) formats.append({ - 'url': resource['file'], + 'url': h264_url, 'format_id': '%s-%sk' % (format_id, bitrate), 'tbr': bitrate, }) + if re.search('\d+k', h264_url): + http_url = h264_url elif format_id == 'rtmp': streamer = talk_info.get('streamer') if not streamer: @@ -195,16 +201,24 @@ class TEDIE(InfoExtractor): 'tbr': int_or_none(resource.get('bitrate')), }) elif format_id == 'hls': - hls_formats = self._extract_m3u8_formats( - resources.get('stream'), video_name, 'mp4', m3u8_id=format_id) - for f in hls_formats: - if f.get('format_id') == 'hls-meta': - continue - if not f.get('height'): - f['vcodec'] = 'none' - else: - f['acodec'] = 'none' - formats.extend(hls_formats) + formats.extend(self._extract_m3u8_formats( + resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False)) + + m3u8_formats = list(filter( + lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + formats)) + if http_url: + for m3u8_format in m3u8_formats: + bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None) + if not bitrate: + continue + f = m3u8_format.copy() + f.update({ + 'url': re.sub(r'\d+k', bitrate, http_url), + 'format_id': m3u8_format['format_id'].replace('hls', 'http'), + 'protocol': 'http', + }) + formats.append(f) audio_download = talk_info.get('audioDownload') if audio_download: @@ -212,10 +226,9 @@ class TEDIE(InfoExtractor): 'url': audio_download, 'format_id': 'audio', 'vcodec': 'none', - 'preference': -0.5, }) - self._sort_formats(formats) + self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) video_id = compat_str(talk_info['id']) From f628d800fbaefe180bd354a0ff8a9009bc64da41 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 30 Apr 2016 16:34:57 +0100 Subject: [PATCH 7/9] [ted] add support for youtube embeds and update tests --- youtube_dl/extractor/ted.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index aea6a02a7..451cde76d 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -27,7 +27,7 @@ class TEDIE(InfoExtractor): ''' _TESTS = [{ 'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', - 'md5': 'fc94ac279feebbce69f21c0c6ee82810', + 'md5': '0de43ac406aa3e4ea74b66c9c7789b13', 'info_dict': { 'id': '102', 'ext': 'mp4', @@ -37,21 +37,26 @@ class TEDIE(InfoExtractor): 'consciousness, but that half the time our brains are ' 'actively fooling us.'), 'uploader': 'Dan Dennett', - 'width': 854, + 'width': 853, 'duration': 1308, } }, { 'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms', - 'md5': '226f4fb9c62380d11b7995efa4c87994', + 'md5': 'b899ac15e345fb39534d913f7606082b', 'info_dict': { - 'id': 'vishal-sikka-the-beauty-and-power-of-algorithms', + 'id': 'tSVI8ta_P4w', 'ext': 'mp4', 'title': 'Vishal Sikka: The beauty and power of algorithms', 'thumbnail': 're:^https?://.+\.jpg', - 'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.', - } + 'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4', + 'upload_date': '20140122', + 'uploader_id': 'TEDInstitute', + 'uploader': 'TED Institute', + }, + 'add_ie': ['Youtube'], }, { 'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best', + 'md5': '71b3ab2f4233012dce09d515c9c39ce2', 'info_dict': { 'id': '1972', 'ext': 'mp4', @@ -228,7 +233,7 @@ class TEDIE(InfoExtractor): 'vcodec': 'none', }) - self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) + self._sort_formats(formats) video_id = compat_str(talk_info['id']) @@ -267,7 +272,11 @@ class TEDIE(InfoExtractor): config_json = self._html_search_regex( r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*', - webpage, 'config') + webpage, 'config', default=None) + if not config_json: + embed_url = self._search_regex( + r"]+class='pages-video-embed__video__object'[^>]+src='([^']+)'", webpage, 'embed url') + return self.url_result(self._proto_relative_url(embed_url)) config = json.loads(config_json)['config'] video_url = config['video']['url'] thumbnail = config.get('image', {}).get('url') From 89c0dc9a5fadc3927f7c03f5829e4f2ef8555888 Mon Sep 17 00:00:00 2001 From: BlahGeek Date: Sat, 30 Apr 2016 21:32:54 +0800 Subject: [PATCH 8/9] [xiami] Add xiami extractor --- youtube_dl/extractor/extractors.py | 6 ++ youtube_dl/extractor/xiami.py | 161 +++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+) create mode 100644 youtube_dl/extractor/xiami.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b1b7f9b42..14ca9eaee 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -941,6 +941,12 @@ from .xhamster import ( XHamsterIE, XHamsterEmbedIE, ) +from .xiami import ( + XiamiIE, + XiamiAlbumIE, + XiamiArtistIE, + XiamiCollectionIE +) from .xminus import XMinusIE from .xnxx import XNXXIE from .xstream import XstreamIE diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py new file mode 100644 index 000000000..a28d63c48 --- /dev/null +++ b/youtube_dl/extractor/xiami.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + xpath_element, + xpath_text, + xpath_with_ns, + int_or_none, + ExtractorError +) +from ..compat import compat_urllib_parse_unquote + + +class XiamiBaseIE(InfoExtractor): + + _XML_BASE_URL = 'http://www.xiami.com/song/playlist/id' + _NS_MAP = {'xm': 'http://xspf.org/ns/0/'} + + def _extract_track(self, track): + artist = xpath_text(track, xpath_with_ns('xm:artist', self._NS_MAP), default='') + artist = artist.split(';') + + ret = { + 'id': xpath_text(track, xpath_with_ns('xm:song_id', self._NS_MAP)), + 'title': xpath_text(track, xpath_with_ns('xm:title', self._NS_MAP)), + 'album': xpath_text(track, xpath_with_ns('xm:album_name', self._NS_MAP)), + 'artist': ';'.join(artist) if artist else None, + 'creator': artist[0] if artist else None, + 'url': self._decrypt(xpath_text(track, xpath_with_ns('xm:location', self._NS_MAP))), + 'thumbnail': xpath_text(track, xpath_with_ns('xm:pic', self._NS_MAP), default=None), + 'duration': int_or_none(xpath_text(track, xpath_with_ns('xm:length', self._NS_MAP))), + } + + lyrics_url = xpath_text(track, xpath_with_ns('xm:lyric', self._NS_MAP)) + if lyrics_url and lyrics_url.endswith('.lrc'): + ret['description'] = self._download_webpage(lyrics_url, ret['id']) + return ret + + def _extract_xml(self, _id, typ=''): + playlist = self._download_xml('%s/%s%s' % (self._XML_BASE_URL, _id, typ), _id) + tracklist = xpath_element(playlist, xpath_with_ns('./xm:trackList', self._NS_MAP)) + + if not len(tracklist): + raise ExtractorError('No track found') + return [self._extract_track(track) for track in tracklist] + + @staticmethod + def _decrypt(origin): + n = int(origin[0]) + origin = origin[1:] + short_lenth = len(origin) // n + long_num = len(origin) - short_lenth * n + l = tuple() + for i in range(0, n): + length = short_lenth + if i < long_num: + length += 1 + l += (origin[0:length], ) + origin = origin[length:] + ans = '' + for i in range(0, short_lenth + 1): + for j in range(0, n): + if len(l[j])>i: + ans += l[j][i] + return compat_urllib_parse_unquote(ans).replace('^', '0') + + +class XiamiIE(XiamiBaseIE): + IE_NAME = 'xiami:song' + IE_DESC = '虾米音乐' + _VALID_URL = r'http://www\.xiami\.com/song/(?P[0-9]+)' + _TESTS = [ + { + 'url': 'http://www.xiami.com/song/1775610518', + 'md5': '521dd6bea40fd5c9c69f913c232cb57e', + 'info_dict': { + 'id': '1775610518', + 'ext': 'mp3', + 'title': 'Woman', + 'creator': 'HONNE', + 'album': 'Woman', + 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', + 'description': 'md5:052ec7de41ca19f67e7fd70a1bfc4e0b', + } + }, + { + 'url': 'http://www.xiami.com/song/1775256504', + 'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc', + 'info_dict': { + 'id': '1775256504', + 'ext': 'mp3', + 'title': '悟空', + 'creator': '戴荃', + 'album': '悟空', + 'description': 'md5:206e67e84f9bed1d473d04196a00b990', + } + }, + ] + + def _real_extract(self, url): + _id = self._match_id(url) + return self._extract_xml(_id)[0] + + +class XiamiAlbumIE(XiamiBaseIE): + IE_NAME = 'xiami:album' + IE_DESC = '虾米音乐 - 专辑' + _VALID_URL = r'http://www\.xiami\.com/album/(?P[0-9]+)' + _TESTS = [ + { + 'url': 'http://www.xiami.com/album/2100300444', + 'info_dict': { + 'id': '2100300444', + }, + 'playlist_count': 10, + }, + { + 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', + 'only_matching': True, + } + ] + + def _real_extract(self, url): + _id = self._match_id(url) + return self.playlist_result(self._extract_xml(_id, '/type/1'), _id) + + +class XiamiArtistIE(XiamiBaseIE): + IE_NAME = 'xiami:artist' + IE_DESC = '虾米音乐 - 歌手' + _VALID_URL = r'http://www\.xiami\.com/artist/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp', + 'info_dict': { + 'id': '2132', + }, + 'playlist_count': 20, + } + + def _real_extract(self, url): + _id = self._match_id(url) + return self.playlist_result(self._extract_xml(_id, '/type/2'), _id) + + +class XiamiCollectionIE(XiamiBaseIE): + IE_NAME = 'xiami:collection' + IE_DESC = '虾米音乐 - 精选集' + _VALID_URL = r'http://www\.xiami\.com/collect/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr', + 'info_dict': { + 'id': '156527391', + }, + 'playlist_count': 26, + } + + def _real_extract(self, url): + _id = self._match_id(url) + return self.playlist_result(self._extract_xml(_id, '/type/3'), _id) From 4e0c0c1508810eb494cd32ef00fb75d03d03ce6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 30 Apr 2016 21:50:23 +0600 Subject: [PATCH 9/9] [xiami] Improve extraction (Closes #9079) * Switch to JSON source * Add abstract IE for playlists * Extract more track related metadata --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/xiami.py | 193 ++++++++++++++--------------- 2 files changed, 96 insertions(+), 99 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 14ca9eaee..737960a01 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -942,7 +942,7 @@ from .xhamster import ( XHamsterEmbedIE, ) from .xiami import ( - XiamiIE, + XiamiSongIE, XiamiAlbumIE, XiamiArtistIE, XiamiCollectionIE diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index a28d63c48..e4ed306b4 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -1,50 +1,42 @@ -# -*- coding: utf-8 -*- - +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - xpath_element, - xpath_text, - xpath_with_ns, - int_or_none, - ExtractorError -) from ..compat import compat_urllib_parse_unquote +from ..utils import int_or_none class XiamiBaseIE(InfoExtractor): + _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id' - _XML_BASE_URL = 'http://www.xiami.com/song/playlist/id' - _NS_MAP = {'xm': 'http://xspf.org/ns/0/'} + def _extract_track(self, track, track_id=None): + title = track['title'] + track_url = self._decrypt(track['location']) - def _extract_track(self, track): - artist = xpath_text(track, xpath_with_ns('xm:artist', self._NS_MAP), default='') - artist = artist.split(';') + subtitles = {} + lyrics_url = track.get('lyric_url') or track.get('lyric') + if lyrics_url and lyrics_url.startswith('http'): + subtitles['origin'] = [{'url': lyrics_url}] - ret = { - 'id': xpath_text(track, xpath_with_ns('xm:song_id', self._NS_MAP)), - 'title': xpath_text(track, xpath_with_ns('xm:title', self._NS_MAP)), - 'album': xpath_text(track, xpath_with_ns('xm:album_name', self._NS_MAP)), - 'artist': ';'.join(artist) if artist else None, - 'creator': artist[0] if artist else None, - 'url': self._decrypt(xpath_text(track, xpath_with_ns('xm:location', self._NS_MAP))), - 'thumbnail': xpath_text(track, xpath_with_ns('xm:pic', self._NS_MAP), default=None), - 'duration': int_or_none(xpath_text(track, xpath_with_ns('xm:length', self._NS_MAP))), + return { + 'id': track.get('song_id') or track_id, + 'url': track_url, + 'title': title, + 'thumbnail': track.get('pic') or track.get('album_pic'), + 'duration': int_or_none(track.get('length')), + 'creator': track.get('artist', '').split(';')[0], + 'track': title, + 'album': track.get('album_name'), + 'artist': track.get('artist'), + 'subtitles': subtitles, } - lyrics_url = xpath_text(track, xpath_with_ns('xm:lyric', self._NS_MAP)) - if lyrics_url and lyrics_url.endswith('.lrc'): - ret['description'] = self._download_webpage(lyrics_url, ret['id']) - return ret - - def _extract_xml(self, _id, typ=''): - playlist = self._download_xml('%s/%s%s' % (self._XML_BASE_URL, _id, typ), _id) - tracklist = xpath_element(playlist, xpath_with_ns('./xm:trackList', self._NS_MAP)) - - if not len(tracklist): - raise ExtractorError('No track found') - return [self._extract_track(track) for track in tracklist] + def _extract_tracks(self, item_id, typ=None): + playlist = self._download_json( + '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id) + return [ + self._extract_track(track, item_id) + for track in playlist['data']['trackList']] @staticmethod def _decrypt(origin): @@ -62,75 +54,87 @@ class XiamiBaseIE(InfoExtractor): ans = '' for i in range(0, short_lenth + 1): for j in range(0, n): - if len(l[j])>i: + if len(l[j]) > i: ans += l[j][i] return compat_urllib_parse_unquote(ans).replace('^', '0') -class XiamiIE(XiamiBaseIE): +class XiamiSongIE(XiamiBaseIE): IE_NAME = 'xiami:song' IE_DESC = '虾米音乐' - _VALID_URL = r'http://www\.xiami\.com/song/(?P[0-9]+)' - _TESTS = [ - { - 'url': 'http://www.xiami.com/song/1775610518', - 'md5': '521dd6bea40fd5c9c69f913c232cb57e', - 'info_dict': { - 'id': '1775610518', - 'ext': 'mp3', - 'title': 'Woman', - 'creator': 'HONNE', - 'album': 'Woman', - 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', - 'description': 'md5:052ec7de41ca19f67e7fd70a1bfc4e0b', - } - }, - { - 'url': 'http://www.xiami.com/song/1775256504', - 'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc', - 'info_dict': { - 'id': '1775256504', - 'ext': 'mp3', - 'title': '悟空', - 'creator': '戴荃', - 'album': '悟空', - 'description': 'md5:206e67e84f9bed1d473d04196a00b990', - } - }, - ] + _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://www.xiami.com/song/1775610518', + 'md5': '521dd6bea40fd5c9c69f913c232cb57e', + 'info_dict': { + 'id': '1775610518', + 'ext': 'mp3', + 'title': 'Woman', + 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', + 'duration': 265, + 'creator': 'HONNE', + 'track': 'Woman', + 'album': 'Woman', + 'artist': 'HONNE', + 'subtitles': { + 'origin': [{ + 'ext': 'lrc', + }], + }, + } + }, { + 'url': 'http://www.xiami.com/song/1775256504', + 'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc', + 'info_dict': { + 'id': '1775256504', + 'ext': 'mp3', + 'title': '悟空', + 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', + 'duration': 200, + 'creator': '戴荃', + 'track': '悟空', + 'album': '悟空', + 'artist': '戴荃', + 'subtitles': { + 'origin': [{ + 'ext': 'lrc', + }], + }, + } + }] def _real_extract(self, url): - _id = self._match_id(url) - return self._extract_xml(_id)[0] + return self._extract_tracks(self._match_id(url))[0] -class XiamiAlbumIE(XiamiBaseIE): +class XiamiPlaylistBaseIE(XiamiBaseIE): + def _real_extract(self, url): + item_id = self._match_id(url) + return self.playlist_result(self._extract_tracks(item_id, self._TYPE), item_id) + + +class XiamiAlbumIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:album' IE_DESC = '虾米音乐 - 专辑' - _VALID_URL = r'http://www\.xiami\.com/album/(?P[0-9]+)' - _TESTS = [ - { - 'url': 'http://www.xiami.com/album/2100300444', - 'info_dict': { - 'id': '2100300444', - }, - 'playlist_count': 10, + _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P[0-9]+)' + _TYPE = '1' + _TESTS = [{ + 'url': 'http://www.xiami.com/album/2100300444', + 'info_dict': { + 'id': '2100300444', }, - { - 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', - 'only_matching': True, - } - ] - - def _real_extract(self, url): - _id = self._match_id(url) - return self.playlist_result(self._extract_xml(_id, '/type/1'), _id) + 'playlist_count': 10, + }, { + 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', + 'only_matching': True, + }] -class XiamiArtistIE(XiamiBaseIE): +class XiamiArtistIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:artist' IE_DESC = '虾米音乐 - 歌手' - _VALID_URL = r'http://www\.xiami\.com/artist/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P[0-9]+)' + _TYPE = '2' _TEST = { 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp', 'info_dict': { @@ -139,23 +143,16 @@ class XiamiArtistIE(XiamiBaseIE): 'playlist_count': 20, } - def _real_extract(self, url): - _id = self._match_id(url) - return self.playlist_result(self._extract_xml(_id, '/type/2'), _id) - -class XiamiCollectionIE(XiamiBaseIE): +class XiamiCollectionIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:collection' IE_DESC = '虾米音乐 - 精选集' - _VALID_URL = r'http://www\.xiami\.com/collect/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P[0-9]+)' + _TYPE = '3' _TEST = { 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr', 'info_dict': { 'id': '156527391', }, - 'playlist_count': 26, + 'playlist_mincount': 29, } - - def _real_extract(self, url): - _id = self._match_id(url) - return self.playlist_result(self._extract_xml(_id, '/type/3'), _id)