Found that "guid":"\d+" in webpage found mediaIds

Looks like the existing searches for Caffeine function calls or iframe src mediaIds are not found in the webpage content for the provided URL. Found that there's json which has "guid":"\d+" which gets the mediaIds now. Added an extend in case some content might still have the old iframe or Caffeine calls.
2018-05-31 11:20:57 -05:00 · 2018-05-31 11:20:57 -05:00 · 09a25faef3
commit 09a25faef3
parent 0ec6aa0324
1 changed files with 6 additions and 0 deletions
--- a/youtube_dl/extractor/cbc.py
+++ b/youtube_dl/extractor/cbc.py
@ -136,9 +136,15 @@ class CBCIE(InfoExtractor):
        entries = [
            self._extract_player_init(player_init, display_id)
            for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
+
        entries.extend([
            self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
            for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
+
+        entries.extend([
+            self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
+            for media_id in re.findall(r'"guid":"(\d+)"', webpage)])
+
        return self.playlist_result(
            entries, display_id, strip_or_none(title),
            self._og_search_description(webpage))