From 1a3ef62ac89993e9af5ed68d790d993c5a267f68 Mon Sep 17 00:00:00 2001 From: reiv Date: Sat, 17 Oct 2015 18:23:46 +0200 Subject: [PATCH 1/6] [soundcloud] Add Soundcloud search extractor --- youtube_dl/extractor/__init__.py | 3 +- youtube_dl/extractor/soundcloud.py | 88 +++++++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 462717b1e..b3cc107e7 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -567,7 +567,8 @@ from .soundcloud import ( SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE, - SoundcloudPlaylistIE + SoundcloudPlaylistIE, + SoundcloudSearchIE ) from .soundgasm import ( SoundgasmIE, diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 2b60d354a..7395a9848 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re import itertools -from .common import InfoExtractor +from .common import ( + InfoExtractor, + SearchInfoExtractor +) from ..compat import ( compat_str, compat_urlparse, @@ -469,3 +472,86 @@ class SoundcloudPlaylistIE(SoundcloudIE): 'description': data.get('description'), 'entries': entries, } + + +class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): + IE_NAME = 'soundcloud:search' + IE_DESC = 'Soundcloud search' + _MAX_RESULTS = 200 + _TESTS = [{ + 'url': 'scsearch15:post-avant jazzcore', + 'info_dict': { + 'title': 'post-avant jazzcore', + }, + 'playlist_count': 15, + }] + + _SEARCH_KEY = 'scsearch' + _RESULTS_PER_PAGE = 50 + + def _get_collection(self, endpoint, collection_id, **query): + import itertools + + query['limit'] = self._RESULTS_PER_PAGE + query['client_id'] = self._CLIENT_ID + query['linked_partitioning'] = '1' + + api_base_url = '{0}//api-v2.soundcloud.com'.format(self.http_scheme()) + + total_results = self._MAX_RESULTS + collected_results = 0 + + next_url = None + + for i in itertools.count(): + + if not next_url: + query['offset'] = i * self._RESULTS_PER_PAGE + data = compat_urllib_parse.urlencode(query) + next_url = '{0}{1}?{2}'.format(api_base_url, endpoint, data) + + response = self._download_json(next_url, + video_id=collection_id, + note='Downloading page {0}'.format(i+1), + errnote='Unable to download API page') + + total_results = int(response.get( + u'total_results', total_results)) + + collection = response['collection'] + collected_results += len(collection) + + for item in filter(bool, collection): + yield item + + if collected_results >= total_results or not collection: + break + + next_url = response.get(u'next_href', None) + + def _get_n_results(self, query, n): + + results = [] + + tracks = self._get_collection('/search/tracks', + collection_id='Query "{}"'.format(query), + q=query.encode('utf-8')) + + for _ in range(n): + try: + track = next(tracks) + except StopIteration: + break + uri = track[u'uri'] + title = track[u'title'] + username = track[u'user'][u'username'] + results.append(self.url_result( + url=uri, + video_title='{0} - {1}'.format(username, title))) + + if not results: + raise ExtractorError( + '[soundcloud] No track results', expected=True) + + return self.playlist_result(results[:n], playlist_title=query) + From 3a338aa58de5b65f971b05943cc4160c7e485afd Mon Sep 17 00:00:00 2001 From: reiv Date: Sat, 17 Oct 2015 21:18:42 +0200 Subject: [PATCH 2/6] Fix some compatibility issues, cleanup. --- youtube_dl/extractor/soundcloud.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 7395a9848..3fe991849 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -488,27 +488,23 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): _SEARCH_KEY = 'scsearch' _RESULTS_PER_PAGE = 50 + _API_V2_BASE = 'https://api-v2.soundcloud.com' def _get_collection(self, endpoint, collection_id, **query): - import itertools - query['limit'] = self._RESULTS_PER_PAGE query['client_id'] = self._CLIENT_ID query['linked_partitioning'] = '1' - api_base_url = '{0}//api-v2.soundcloud.com'.format(self.http_scheme()) - total_results = self._MAX_RESULTS collected_results = 0 next_url = None for i in itertools.count(): - if not next_url: query['offset'] = i * self._RESULTS_PER_PAGE data = compat_urllib_parse.urlencode(query) - next_url = '{0}{1}?{2}'.format(api_base_url, endpoint, data) + next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) response = self._download_json(next_url, video_id=collection_id, @@ -516,7 +512,7 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): errnote='Unable to download API page') total_results = int(response.get( - u'total_results', total_results)) + 'total_results', total_results)) collection = response['collection'] collected_results += len(collection) @@ -527,14 +523,13 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): if collected_results >= total_results or not collection: break - next_url = response.get(u'next_href', None) + next_url = response.get('next_href', None) def _get_n_results(self, query, n): - results = [] tracks = self._get_collection('/search/tracks', - collection_id='Query "{}"'.format(query), + collection_id='Query "{0}"'.format(query), q=query.encode('utf-8')) for _ in range(n): @@ -542,12 +537,9 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): track = next(tracks) except StopIteration: break - uri = track[u'uri'] - title = track[u'title'] - username = track[u'user'][u'username'] - results.append(self.url_result( - url=uri, - video_title='{0} - {1}'.format(username, title))) + uri = track['uri'] + title = track['title'] + results.append(self.url_result(url=uri)) if not results: raise ExtractorError( From 7fa56bd34564e0855b51b0899b4966e4110a8147 Mon Sep 17 00:00:00 2001 From: reiv Date: Sat, 17 Oct 2015 22:36:08 +0200 Subject: [PATCH 3/6] Simplify with itertools.islice(). --- youtube_dl/extractor/soundcloud.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 3fe991849..959f27975 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -532,11 +532,7 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): collection_id='Query "{0}"'.format(query), q=query.encode('utf-8')) - for _ in range(n): - try: - track = next(tracks) - except StopIteration: - break + for track in itertools.islice(tracks, n): uri = track['uri'] title = track['title'] results.append(self.url_result(url=uri)) From 49a1d71e8749c881f565e574aa43ca53ff2b7010 Mon Sep 17 00:00:00 2001 From: reiv Date: Sat, 17 Oct 2015 22:47:16 +0200 Subject: [PATCH 4/6] Rewrite as list comprehension. --- youtube_dl/extractor/soundcloud.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 959f27975..c623bb6de 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -526,16 +526,12 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): next_url = response.get('next_href', None) def _get_n_results(self, query, n): - results = [] - tracks = self._get_collection('/search/tracks', collection_id='Query "{0}"'.format(query), q=query.encode('utf-8')) - for track in itertools.islice(tracks, n): - uri = track['uri'] - title = track['title'] - results.append(self.url_result(url=uri)) + results = [self.url_result(url=track['uri']) + for track in itertools.islice(tracks, n)] if not results: raise ExtractorError( From 2a0ec3f81d5fd60ed0832be66153cca7918fa5b7 Mon Sep 17 00:00:00 2001 From: reiv Date: Sun, 18 Oct 2015 12:43:25 +0200 Subject: [PATCH 5/6] [soundcloud] Use correct error message conventions --- youtube_dl/extractor/soundcloud.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index c623bb6de..6b510a43b 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -535,7 +535,7 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): if not results: raise ExtractorError( - '[soundcloud] No track results', expected=True) + 'Soundcloud said: No track results', expected=True) - return self.playlist_result(results[:n], playlist_title=query) + return self.playlist_result(results, playlist_title=query) From e3e37ffe3f6d432a9f7785cadf8201442625a290 Mon Sep 17 00:00:00 2001 From: reiv Date: Fri, 30 Oct 2015 23:56:07 +0100 Subject: [PATCH 6/6] [soundcloud] Remove limit on search results --- youtube_dl/extractor/soundcloud.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 6b510a43b..bba29d845 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -477,7 +477,7 @@ class SoundcloudPlaylistIE(SoundcloudIE): class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): IE_NAME = 'soundcloud:search' IE_DESC = 'Soundcloud search' - _MAX_RESULTS = 200 + _MAX_RESULTS = float('inf') _TESTS = [{ 'url': 'scsearch15:post-avant jazzcore', 'info_dict': { @@ -487,24 +487,28 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): }] _SEARCH_KEY = 'scsearch' - _RESULTS_PER_PAGE = 50 + _MAX_RESULTS_PER_PAGE = 200 + _DEFAULT_RESULTS_PER_PAGE = 50 _API_V2_BASE = 'https://api-v2.soundcloud.com' def _get_collection(self, endpoint, collection_id, **query): - query['limit'] = self._RESULTS_PER_PAGE + query['limit'] = results_per_page = min( + query.get('limit', self._DEFAULT_RESULTS_PER_PAGE), + self._MAX_RESULTS_PER_PAGE) query['client_id'] = self._CLIENT_ID query['linked_partitioning'] = '1' - total_results = self._MAX_RESULTS + total_results = None collected_results = 0 next_url = None for i in itertools.count(): if not next_url: - query['offset'] = i * self._RESULTS_PER_PAGE + query['offset'] = i * results_per_page data = compat_urllib_parse.urlencode(query) - next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) + next_url = '{0}{1}?{2}'.format( + self._API_V2_BASE, endpoint, data) response = self._download_json(next_url, video_id=collection_id, @@ -520,7 +524,8 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): for item in filter(bool, collection): yield item - if collected_results >= total_results or not collection: + if (total_results is not None and + collected_results >= total_results) or not collection: break next_url = response.get('next_href', None) @@ -528,7 +533,7 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): def _get_n_results(self, query, n): tracks = self._get_collection('/search/tracks', collection_id='Query "{0}"'.format(query), - q=query.encode('utf-8')) + limit=n, q=query) results = [self.url_result(url=track['uri']) for track in itertools.islice(tracks, n)]