Found that "guid":"\d+" in webpage found mediaIds

Looks like the existing searches  for Caffeine function calls or
iframe src mediaIds are not found in the webpage content for the
provided URL.  Found that there's json which has "guid":"\d+"
which gets the mediaIds now.  Added an extend in case some content
might still have the old iframe or Caffeine calls.
This commit is contained in:
Ryan Hayward 2018-05-31 11:20:57 -05:00
parent 0ec6aa0324
commit 09a25faef3

View File

@ -136,9 +136,15 @@ class CBCIE(InfoExtractor):
entries = [
self._extract_player_init(player_init, display_id)
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
entries.extend([
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
entries.extend([
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
for media_id in re.findall(r'"guid":"(\d+)"', webpage)])
return self.playlist_result(
entries, display_id, strip_or_none(title),
self._og_search_description(webpage))