From 91e64c6941d9057b8b20405323988480746ead3e Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Thu, 10 Aug 2017 08:14:15 -0400 Subject: [PATCH 1/6] [spreaker] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/spreaker.py | 161 +++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 youtube_dl/extractor/spreaker.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 897557f93..09ee08aad 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -958,6 +958,7 @@ from .sport5 import Sport5IE from .sportbox import SportBoxEmbedIE from .sportdeutschland import SportDeutschlandIE from .sportschau import SportschauIE +from .spreaker import SpreakerIE from .sprout import SproutIE from .srgssr import ( SRGSSRIE, diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py new file mode 100644 index 000000000..3982267a8 --- /dev/null +++ b/youtube_dl/extractor/spreaker.py @@ -0,0 +1,161 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none, ExtractorError + + +class SpreakerIE(InfoExtractor): + IE_NAME = 'spreaker' + _VALID_URL = r"""(?x)^ + https?:// + (?:www.|api.)? + spreaker.com/ + (?: + show/[a-z0-9_-]+| + user/[a-z0-9_-]+/[a-z0-9_-]| + episode/(?P[0-9]+) + ) + """ + _TESTS = [ + { + 'url': 'https://www.spreaker.com/show/success-with-music', + 'info_dict': { + 'title': 'Success With Music', + 'id': 2317431, + }, + 'playlist_mincount': 14, + }, + { + 'url': ('https://www.spreaker.com/user/9780658/swm-ep15-how-to-' + 'market-your-music-part-2'), + 'info_dict': { + 'id': '12534508', + 'ext': 'mp3', + 'title': 'Marketing Your Music - Part 2', + 'upload_date': '20170809', + 'uploader': 'SWM', + 'uploader_id': 9780658, + }, + }, + { + 'url': 'https://api.spreaker.com/episode/12534508', + 'info_dict': { + 'id': '12534508', + 'ext': 'mp3', + 'title': 'Marketing Your Music - Part 2', + 'upload_date': '20170809', + 'uploader': 'SWM', + 'uploader_id': 9780658, + }, + } + ] + + def _spreaker_episode_data_to_info(self, data): + upload_date = data['published_at'][0:10].replace('-', '') + author = data.get('author') + if not author: + author = {} + stats = data.get('stats') + view_count = like_count = comment_count = 0 + show = data.get('show') + if not show: + show = {} + else: + show_image = show.get('image') + if not show_image: + show_image = {} + + if stats: + view_count = (stats.get('plays', 0) + + stats.get('plays_streaming', 0) + + stats.get('plays_download', 0)) + like_count = stats.get('likes', 0) + comment_count = stats.get('messages', 0) + + return { + 'id': compat_str(data['episode_id']), + 'title': data['title'], + 'url': data['download_url'], + 'display_id': data.get('permalink'), + 'webpage_url': data.get('site_url'), + 'uploader': author.get('fullname'), + 'creator': author.get('fullname'), + 'release_date': upload_date, + 'upload_date': upload_date, + 'uploader_id': author.get('user_id'), + 'duration': int_or_none(data.get('length')), + 'view_count': int_or_none(view_count), + 'like_count': int_or_none(like_count), + 'comment_count': int_or_none(comment_count), + 'format': 'MPEG Layer 3', + 'format_id': 'mp3', + 'container': 'mp3', + 'ext': 'mp3', + 'thumbnail': show_image.get('big_url'), + 'language': show.get('language'), + 'thumbnails': [ + { + 'id': show_image.get('image_id'), + 'url': show_image.get('big_url'), + 'width': int_or_none(show_image.get('width')), + 'height': int_or_none(show_image.get('height')), + }, + { + 'url': show_image.get('large_url'), + }, + { + 'url': show_image.get('medium_url') + }, + { + 'url': show_image.get('small_url') + }, + ], + } + + def _real_extract(self, url): + episode_id = self._match_id(url) + + if re.match(r'^[0-9]+$', episode_id): + data_url = url + elif '/show/' in url: + html = self._download_webpage(url, None) + playlist_url = self._html_search_regex( + r'data-playlist_url="(?Phttps\://[^"]+")', html, 'url') + items = self._download_json(playlist_url, None) + items = items['response']['playlist']['items'] + + if not items: + raise ExtractorError('Empty playlist') + + urls = [x['api_url'] for x in items] + ret = [] + for index, url in enumerate(urls): + data = self._download_json(url, None)['response']['episode'] + dict_ = self._spreaker_episode_data_to_info(data) + dict_.update({ + 'playlist_id': compat_str(data['show_id']), + 'playlist_title': data['show']['title'], + 'playlist_index': index, + }) + ret.append(dict_) + + return self.playlist_result(ret, + data['show_id'], + data['show']['title']) + else: + html = self._download_webpage(url, None) + episode_id = self._html_search_regex( + r'data-episode_id="(?P[0-9]+)"', html, 'id') + if not re.match(r'^[0-9]+$', episode_id): + raise ExtractorError('Could not find episode ID') + data_url = 'https://api.spreaker.com/episode/%s' % (episode_id) + + data = self._download_json(data_url, episode_id)['response']['episode'] + if not data['download_enabled']: + raise ExtractorError('Not supported yet') + + return self._spreaker_episode_data_to_info(data) From 2eb228df1cb2ad0c91ef37fe25be47add412d313 Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Thu, 10 Aug 2017 16:38:47 -0400 Subject: [PATCH 2/6] [spreaker] Fixes requested Escape . in regexes Make separate extractors for episode page, playlist (show), API Support API's direct links to MP3 files Make counts set to None in case they are not found Handle when published_at is not present Other fixes --- youtube_dl/extractor/extractors.py | 6 +- youtube_dl/extractor/spreaker.py | 176 +++++++++++++++-------------- 2 files changed, 99 insertions(+), 83 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 09ee08aad..7ca2cfd19 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -958,7 +958,11 @@ from .sport5 import Sport5IE from .sportbox import SportBoxEmbedIE from .sportdeutschland import SportDeutschlandIE from .sportschau import SportschauIE -from .spreaker import SpreakerIE +from .spreaker import ( + SpreakerIE, + SpreakerAPIEpisodeIE, + SpreakerPlaylistIE +) from .sprout import SproutIE from .srgssr import ( SRGSSRIE, diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py index 3982267a8..d2fb6c304 100644 --- a/youtube_dl/extractor/spreaker.py +++ b/youtube_dl/extractor/spreaker.py @@ -8,39 +8,49 @@ from ..compat import compat_str from ..utils import int_or_none, ExtractorError -class SpreakerIE(InfoExtractor): +class SpreakerPlaylistIE(InfoExtractor): IE_NAME = 'spreaker' - _VALID_URL = r"""(?x)^ - https?:// - (?:www.|api.)? - spreaker.com/ - (?: - show/[a-z0-9_-]+| - user/[a-z0-9_-]+/[a-z0-9_-]| - episode/(?P[0-9]+) - ) - """ - _TESTS = [ - { + _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/show/[a-z0-9_-]+' + _TEST = { 'url': 'https://www.spreaker.com/show/success-with-music', 'info_dict': { 'title': 'Success With Music', 'id': 2317431, }, 'playlist_mincount': 14, - }, - { - 'url': ('https://www.spreaker.com/user/9780658/swm-ep15-how-to-' - 'market-your-music-part-2'), - 'info_dict': { - 'id': '12534508', - 'ext': 'mp3', - 'title': 'Marketing Your Music - Part 2', - 'upload_date': '20170809', - 'uploader': 'SWM', - 'uploader_id': 9780658, - }, - }, + } + + def _real_extract(self, url): + html = self._download_webpage(url, None) + playlist_url = self._html_search_regex( + r'data-playlist_url="(?Phttps\://[^"]+")', html, 'url') + items = self._download_json(playlist_url, None) + items = items['response']['playlist']['items'] + + if not items: + raise ExtractorError('Empty playlist') + + urls = [x['api_url'] for x in items] + ret = [] + for index, url in enumerate(urls): + data = self._download_json(url, None)['response']['episode'] + dict_ = SpreakerIE._spreaker_episode_data_to_info(data) + dict_.update({ + 'playlist_id': compat_str(data['show_id']), + 'playlist_title': data['show']['title'], + 'playlist_index': index, + }) + ret.append(dict_) + + return self.playlist_result(ret, + data['show_id'], + data['show']['title']) + + +class SpreakerAPIEpisodeIE(InfoExtractor): + IE_NAME = 'spreaker' + _VALID_URL = r'^https?://(?:api\.)?spreaker\.com/(?:download/)?episode/(?P[0-9]+)(?:/[^\.]+\.mp3$)?' + _TESTS = [ { 'url': 'https://api.spreaker.com/episode/12534508', 'info_dict': { @@ -51,23 +61,59 @@ class SpreakerIE(InfoExtractor): 'uploader': 'SWM', 'uploader_id': 9780658, }, - } + }, + { + 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3', + 'info_dict': { + 'id': '12534508', + 'ext': 'mp3', + 'title': 'Marketing Your Music - Part 2', + 'upload_date': '20170809', + 'uploader': 'SWM', + 'uploader_id': 9780658, + }, + }, ] - def _spreaker_episode_data_to_info(self, data): - upload_date = data['published_at'][0:10].replace('-', '') - author = data.get('author') - if not author: - author = {} - stats = data.get('stats') - view_count = like_count = comment_count = 0 - show = data.get('show') - if not show: - show = {} - else: - show_image = show.get('image') - if not show_image: - show_image = {} + def _real_extract(self, url): + episode_id = self._match_id(url) + if not re.match(r'^[0-9]+$', episode_id): + raise ExtractorError('Invalid ID') + + url = 'https://api.spreaker.com/episode/%s' % (episode_id,) + data = self._download_json(url, episode_id)['response']['episode'] + if not data['download_enabled']: + raise ExtractorError('Not supported yet') + + return SpreakerIE._spreaker_episode_data_to_info(data) + + +class SpreakerIE(InfoExtractor): + IE_NAME = 'spreaker' + _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/user/[a-z0-9_-]+/[a-z0-9_-]' + _TEST = { + 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2', + 'info_dict': { + 'id': '12534508', + 'ext': 'mp3', + 'title': 'Marketing Your Music - Part 2', + 'upload_date': '20170809', + 'uploader': 'SWM', + 'uploader_id': 9780658, + }, + } + + @staticmethod + def _spreaker_episode_data_to_info(data): + published_at = data.get('published_at') + upload_date = None + if published_at: + upload_date = published_at[0:10].replace('-', '') + author = data.get('author', {}) + stats = data.get('stats', {}) + view_count = like_count = comment_count = None + show = data.get('show', {}) + show_image = show.get('image', {}) if stats: view_count = (stats.get('plays', 0) + @@ -117,45 +163,11 @@ class SpreakerIE(InfoExtractor): } def _real_extract(self, url): - episode_id = self._match_id(url) + html = self._download_webpage(url, None) + episode_id = self._html_search_regex( + r'data-episode_id="(?P[0-9]+)"', html, 'id') + if not re.match(r'^[0-9]+$', episode_id): + raise ExtractorError('Could not find episode ID') + data_url = 'https://api.spreaker.com/episode/%s' % (episode_id) - if re.match(r'^[0-9]+$', episode_id): - data_url = url - elif '/show/' in url: - html = self._download_webpage(url, None) - playlist_url = self._html_search_regex( - r'data-playlist_url="(?Phttps\://[^"]+")', html, 'url') - items = self._download_json(playlist_url, None) - items = items['response']['playlist']['items'] - - if not items: - raise ExtractorError('Empty playlist') - - urls = [x['api_url'] for x in items] - ret = [] - for index, url in enumerate(urls): - data = self._download_json(url, None)['response']['episode'] - dict_ = self._spreaker_episode_data_to_info(data) - dict_.update({ - 'playlist_id': compat_str(data['show_id']), - 'playlist_title': data['show']['title'], - 'playlist_index': index, - }) - ret.append(dict_) - - return self.playlist_result(ret, - data['show_id'], - data['show']['title']) - else: - html = self._download_webpage(url, None) - episode_id = self._html_search_regex( - r'data-episode_id="(?P[0-9]+)"', html, 'id') - if not re.match(r'^[0-9]+$', episode_id): - raise ExtractorError('Could not find episode ID') - data_url = 'https://api.spreaker.com/episode/%s' % (episode_id) - - data = self._download_json(data_url, episode_id)['response']['episode'] - if not data['download_enabled']: - raise ExtractorError('Not supported yet') - - return self._spreaker_episode_data_to_info(data) + return self.url_result(data_url) From a894bc1c3e5580f90faa965d83073ec5be2a7159 Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Thu, 10 Aug 2017 16:42:45 -0400 Subject: [PATCH 3/6] [spreaker] Make IE_NAME values unique --- youtube_dl/extractor/spreaker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py index d2fb6c304..60f6af624 100644 --- a/youtube_dl/extractor/spreaker.py +++ b/youtube_dl/extractor/spreaker.py @@ -9,7 +9,7 @@ from ..utils import int_or_none, ExtractorError class SpreakerPlaylistIE(InfoExtractor): - IE_NAME = 'spreaker' + IE_NAME = 'spreaker:playlist' _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/show/[a-z0-9_-]+' _TEST = { 'url': 'https://www.spreaker.com/show/success-with-music', @@ -48,7 +48,7 @@ class SpreakerPlaylistIE(InfoExtractor): class SpreakerAPIEpisodeIE(InfoExtractor): - IE_NAME = 'spreaker' + IE_NAME = 'spreaker:api' _VALID_URL = r'^https?://(?:api\.)?spreaker\.com/(?:download/)?episode/(?P[0-9]+)(?:/[^\.]+\.mp3$)?' _TESTS = [ { From 1c9e16b8b9f9fb73c5a717423891ccba96a3f41d Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Thu, 10 Aug 2017 23:28:19 -0400 Subject: [PATCH 4/6] [spreaker] Set extractor name to spreaker to override IE_NAME --- youtube_dl/extractor/spreaker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py index 60f6af624..40ccdd973 100644 --- a/youtube_dl/extractor/spreaker.py +++ b/youtube_dl/extractor/spreaker.py @@ -160,6 +160,7 @@ class SpreakerIE(InfoExtractor): 'url': show_image.get('small_url') }, ], + 'extractor': 'spreaker', } def _real_extract(self, url): From b653c19f8fa1d05b2b7479c0bf60e4c83e431717 Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Fri, 11 Aug 2017 00:31:57 -0400 Subject: [PATCH 5/6] [spreaker] Handle when playlist JSON has multiple pages --- youtube_dl/extractor/spreaker.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py index 40ccdd973..e27078b76 100644 --- a/youtube_dl/extractor/spreaker.py +++ b/youtube_dl/extractor/spreaker.py @@ -24,12 +24,30 @@ class SpreakerPlaylistIE(InfoExtractor): html = self._download_webpage(url, None) playlist_url = self._html_search_regex( r'data-playlist_url="(?Phttps\://[^"]+")', html, 'url') - items = self._download_json(playlist_url, None) - items = items['response']['playlist']['items'] + items = self._download_json(playlist_url, + None, + 'Downloading playlist JSON') + playlist = items['response']['playlist'] + next_url = playlist.get('next_url') + items = playlist.get('items', []) if not items: raise ExtractorError('Empty playlist') + page_no = 2 + download_str = 'Downloading playlist JSON page #%d' + while next_url: + items_ = self._download_json(next_url, + None, + download_str % (page_no,)) + playlist_ = items_['response']['playlist'] + new_items = playlist_.get('items', []) + if not new_items: + break + items += new_items + next_url = playlist_.get('next_url') + page_no += 1 + urls = [x['api_url'] for x in items] ret = [] for index, url in enumerate(urls): From 5ec9047d2cba4e29aab1e5d1c5ee7d5b8a4b1ae7 Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Sun, 1 Jul 2018 17:25:38 -0400 Subject: [PATCH 6/6] [spreaker] Fixes requested --- youtube_dl/extractor/extractors.py | 2 - youtube_dl/extractor/spreaker.py | 224 ++++++++++++++--------------- 2 files changed, 112 insertions(+), 114 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 95927dd7b..03c857aac 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1022,10 +1022,8 @@ from .stitcher import StitcherIE from .sport5 import Sport5IE from .sportbox import SportBoxEmbedIE from .sportdeutschland import SportDeutschlandIE -from .sportschau import SportschauIE from .spreaker import ( SpreakerIE, - SpreakerAPIEpisodeIE, SpreakerPlaylistIE ) from .springboardplatform import SpringboardPlatformIE diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py index e27078b76..d89fcdc92 100644 --- a/youtube_dl/extractor/spreaker.py +++ b/youtube_dl/extractor/spreaker.py @@ -8,16 +8,84 @@ from ..compat import compat_str from ..utils import int_or_none, ExtractorError +def _spreaker_episode_data_to_info(data): + published_at = data.get('published_at') + upload_date = None + if published_at: + upload_date = published_at[0:10].replace('-', '') + author = data.get('author', {}) + stats = data.get('stats', {}) + view_count = like_count = comment_count = None + show = data.get('show', {}) + show_image = show.get('image', {}) + + if stats: + plays = stats.get('plays') + plays_streaming = stats.get('plays_streaming') + plays_download = stats.get('plays_download') + view_count = None + for x in [plays, plays_streaming, plays_download]: + if x is None: + continue + if view_count is None: + view_count = x + else: + view_count += x + like_count = stats.get('likes') + comment_count = stats.get('messages') + + return { + 'id': compat_str(data['episode_id']), + 'title': data['title'], + 'url': data['download_url'], + 'display_id': data.get('permalink'), + 'webpage_url': data.get('site_url'), + 'uploader': author.get('fullname'), + 'creator': author.get('fullname'), + 'release_date': upload_date, + 'upload_date': upload_date, + 'uploader_id': author.get('user_id'), + 'duration': int_or_none(data.get('length')), + 'view_count': int_or_none(view_count), + 'like_count': int_or_none(like_count), + 'comment_count': int_or_none(comment_count), + 'format': 'MPEG Layer 3', + 'format_id': 'mp3', + 'container': 'mp3', + 'ext': 'mp3', + 'thumbnail': show_image.get('big_url'), + 'language': show.get('language'), + 'thumbnails': [ + { + 'id': show_image.get('image_id'), + 'url': show_image.get('big_url'), + 'width': int_or_none(show_image.get('width')), + 'height': int_or_none(show_image.get('height')), + }, + { + 'url': show_image.get('large_url'), + }, + { + 'url': show_image.get('medium_url') + }, + { + 'url': show_image.get('small_url') + }, + ], + 'extractor': 'spreaker', + } + + class SpreakerPlaylistIE(InfoExtractor): IE_NAME = 'spreaker:playlist' - _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/show/[a-z0-9_-]+' + _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/[a-z0-9_-]+' _TEST = { - 'url': 'https://www.spreaker.com/show/success-with-music', - 'info_dict': { - 'title': 'Success With Music', - 'id': 2317431, - }, - 'playlist_mincount': 14, + 'url': 'https://www.spreaker.com/show/success-with-music', + 'info_dict': { + 'title': 'Success With Music', + 'id': 2317431, + }, + 'playlist_mincount': 14, } def _real_extract(self, url): @@ -49,144 +117,76 @@ class SpreakerPlaylistIE(InfoExtractor): page_no += 1 urls = [x['api_url'] for x in items] - ret = [] + entries = [] for index, url in enumerate(urls): data = self._download_json(url, None)['response']['episode'] - dict_ = SpreakerIE._spreaker_episode_data_to_info(data) - dict_.update({ - 'playlist_id': compat_str(data['show_id']), - 'playlist_title': data['show']['title'], - 'playlist_index': index, - }) - ret.append(dict_) + dict_ = _spreaker_episode_data_to_info(data) + entries.append(dict_) - return self.playlist_result(ret, + return self.playlist_result(entries, data['show_id'], data['show']['title']) -class SpreakerAPIEpisodeIE(InfoExtractor): - IE_NAME = 'spreaker:api' - _VALID_URL = r'^https?://(?:api\.)?spreaker\.com/(?:download/)?episode/(?P[0-9]+)(?:/[^\.]+\.mp3$)?' +class SpreakerIE(InfoExtractor): + IE_NAME = 'spreaker' + _VALID_URL = (r'https?://(?:(?:api|www)\.)?spreaker\.com/' + r'(?:(?:(?:download/)?episode/(?P[0-9]+)' + r'(?:/[^\.]+\.mp3$)?)|user/[a-z0-9_-]+/[a-z0-9_-]+)') _TESTS = [ { 'url': 'https://api.spreaker.com/episode/12534508', 'info_dict': { 'id': '12534508', 'ext': 'mp3', - 'title': 'Marketing Your Music - Part 2', + 'title': 'EP:15 | Music Marketing (Likes) - Part 2', 'upload_date': '20170809', 'uploader': 'SWM', 'uploader_id': 9780658, }, }, { - 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3', + 'url': ('https://api.spreaker.com/download/episode/12534508/' + 'swm_ep15_how_to_market_your_music_part_2.mp3'), 'info_dict': { 'id': '12534508', 'ext': 'mp3', - 'title': 'Marketing Your Music - Part 2', + 'title': 'EP:15 | Music Marketing (Likes) - Part 2', 'upload_date': '20170809', 'uploader': 'SWM', 'uploader_id': 9780658, }, }, + { + 'url': ('https://www.spreaker.com/user/9780658/swm-ep15-how-to-' + 'market-your-music-part-2'), + 'info_dict': { + 'id': '12534508', + 'ext': 'mp3', + 'title': 'EP:15 | Music Marketing (Likes) - Part 2', + 'upload_date': '20170809', + 'uploader': 'SWM', + 'uploader_id': 9780658, + }, + } ] def _real_extract(self, url): episode_id = self._match_id(url) - if not re.match(r'^[0-9]+$', episode_id): - raise ExtractorError('Invalid ID') + if re.match(r'^[0-9]+$', episode_id): + url = 'https://api.spreaker.com/episode/%s' % (episode_id,) + else: + html = self._download_webpage(url, + None, + note='Downloading episode page') + episode_id = self._html_search_regex( + r'data-episode_id="(?P[0-9]+)"', html, 'id') + if not re.match(r'^[0-9]+$', episode_id): + raise ExtractorError('Could not find episode ID') + url = 'https://api.spreaker.com/episode/%s' % (episode_id) - url = 'https://api.spreaker.com/episode/%s' % (episode_id,) data = self._download_json(url, episode_id)['response']['episode'] if not data['download_enabled']: raise ExtractorError('Not supported yet') - return SpreakerIE._spreaker_episode_data_to_info(data) - - -class SpreakerIE(InfoExtractor): - IE_NAME = 'spreaker' - _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/user/[a-z0-9_-]+/[a-z0-9_-]' - _TEST = { - 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2', - 'info_dict': { - 'id': '12534508', - 'ext': 'mp3', - 'title': 'Marketing Your Music - Part 2', - 'upload_date': '20170809', - 'uploader': 'SWM', - 'uploader_id': 9780658, - }, - } - - @staticmethod - def _spreaker_episode_data_to_info(data): - published_at = data.get('published_at') - upload_date = None - if published_at: - upload_date = published_at[0:10].replace('-', '') - author = data.get('author', {}) - stats = data.get('stats', {}) - view_count = like_count = comment_count = None - show = data.get('show', {}) - show_image = show.get('image', {}) - - if stats: - view_count = (stats.get('plays', 0) + - stats.get('plays_streaming', 0) + - stats.get('plays_download', 0)) - like_count = stats.get('likes', 0) - comment_count = stats.get('messages', 0) - - return { - 'id': compat_str(data['episode_id']), - 'title': data['title'], - 'url': data['download_url'], - 'display_id': data.get('permalink'), - 'webpage_url': data.get('site_url'), - 'uploader': author.get('fullname'), - 'creator': author.get('fullname'), - 'release_date': upload_date, - 'upload_date': upload_date, - 'uploader_id': author.get('user_id'), - 'duration': int_or_none(data.get('length')), - 'view_count': int_or_none(view_count), - 'like_count': int_or_none(like_count), - 'comment_count': int_or_none(comment_count), - 'format': 'MPEG Layer 3', - 'format_id': 'mp3', - 'container': 'mp3', - 'ext': 'mp3', - 'thumbnail': show_image.get('big_url'), - 'language': show.get('language'), - 'thumbnails': [ - { - 'id': show_image.get('image_id'), - 'url': show_image.get('big_url'), - 'width': int_or_none(show_image.get('width')), - 'height': int_or_none(show_image.get('height')), - }, - { - 'url': show_image.get('large_url'), - }, - { - 'url': show_image.get('medium_url') - }, - { - 'url': show_image.get('small_url') - }, - ], - 'extractor': 'spreaker', - } - - def _real_extract(self, url): - html = self._download_webpage(url, None) - episode_id = self._html_search_regex( - r'data-episode_id="(?P[0-9]+)"', html, 'id') - if not re.match(r'^[0-9]+$', episode_id): - raise ExtractorError('Could not find episode ID') - data_url = 'https://api.spreaker.com/episode/%s' % (episode_id) - - return self.url_result(data_url) + return _spreaker_episode_data_to_info(data)