From f4dd4b9b81e1a6867ffe34ee87ac399372c5c47e Mon Sep 17 00:00:00 2001 From: Ryan Hayward Date: Mon, 21 May 2018 11:31:01 -0500 Subject: [PATCH 1/3] Fix for issue 15310 Found that _real_extract(...) was not converting id from int to string. Wrapped it in compat_str(), which fixed the alarming message. --- youtube_dl/extractor/audiomack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index f3bd4d444..62049b921 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -65,7 +65,7 @@ class AudiomackIE(InfoExtractor): return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'} return { - 'id': api_response.get('id', album_url_tag), + 'id': compat_str(api_response.get('id', album_url_tag)), 'uploader': api_response.get('artist'), 'title': api_response.get('title'), 'url': api_response['url'], From 09a25faef3a65904b17998622215f6a5634b952e Mon Sep 17 00:00:00 2001 From: Ryan Hayward Date: Thu, 31 May 2018 11:20:57 -0500 Subject: [PATCH 2/3] Found that "guid":"\d+" in webpage found mediaIds Looks like the existing searches for Caffeine function calls or iframe src mediaIds are not found in the webpage content for the provided URL. Found that there's json which has "guid":"\d+" which gets the mediaIds now. Added an extend in case some content might still have the old iframe or Caffeine calls. --- youtube_dl/extractor/cbc.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index ce8e3d346..35252b264 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -136,9 +136,15 @@ class CBCIE(InfoExtractor): entries = [ self._extract_player_init(player_init, display_id) for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)] + entries.extend([ self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r']+src="[^"]+?mediaId=(\d+)"', webpage)]) + + entries.extend([ + self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + for media_id in re.findall(r'"guid":"(\d+)"', webpage)]) + return self.playlist_result( entries, display_id, strip_or_none(title), self._og_search_description(webpage)) From 6d73ff329c27d71539f3f33ec3bee4e7c0ebca11 Mon Sep 17 00:00:00 2001 From: Ryan Hayward Date: Thu, 31 May 2018 13:09:30 -0500 Subject: [PATCH 3/3] combined regex results to de-duplicate code --- youtube_dl/extractor/cbc.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 35252b264..a632ad620 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -139,11 +139,9 @@ class CBCIE(InfoExtractor): entries.extend([ self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) - for media_id in re.findall(r']+src="[^"]+?mediaId=(\d+)"', webpage)]) - - entries.extend([ - self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) - for media_id in re.findall(r'"guid":"(\d+)"', webpage)]) + for media_id in + re.findall(r']+src="[^"]+?mediaId=(\d+)"', webpage) + re.findall(r'"guid":"(\d+)"', webpage) + ]) return self.playlist_result( entries, display_id, strip_or_none(title),