From 09a25faef3a65904b17998622215f6a5634b952e Mon Sep 17 00:00:00 2001 From: Ryan Hayward Date: Thu, 31 May 2018 11:20:57 -0500 Subject: [PATCH] Found that "guid":"\d+" in webpage found mediaIds Looks like the existing searches for Caffeine function calls or iframe src mediaIds are not found in the webpage content for the provided URL. Found that there's json which has "guid":"\d+" which gets the mediaIds now. Added an extend in case some content might still have the old iframe or Caffeine calls. --- youtube_dl/extractor/cbc.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index ce8e3d346..35252b264 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -136,9 +136,15 @@ class CBCIE(InfoExtractor): entries = [ self._extract_player_init(player_init, display_id) for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)] + entries.extend([ self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r']+src="[^"]+?mediaId=(\d+)"', webpage)]) + + entries.extend([ + self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + for media_id in re.findall(r'"guid":"(\d+)"', webpage)]) + return self.playlist_result( entries, display_id, strip_or_none(title), self._og_search_description(webpage))