[cbc] Add extractor

This commit is contained in:
James Oakley 2015-11-11 20:16:53 -08:00
parent 031ec536f0
commit 7c6bc92ae0
2 changed files with 98 additions and 0 deletions

View File

@ -71,6 +71,7 @@ from .camdemy import (
from .canal13cl import Canal13clIE from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .cbc import CBCIE
from .cbs import CBSIE from .cbs import CBSIE
from .cbsnews import CBSNewsIE from .cbsnews import CBSNewsIE
from .cbssports import CBSSportsIE from .cbssports import CBSSportsIE

View File

@ -0,0 +1,97 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .common import InfoExtractor
CBC_CAFFEINE_MODULES_URL = 'http://www.cbc.ca/i/caffeine/js/Caffeine.modules.js'
class CBCIE(InfoExtractor):
IE_DESC = 'cbc.ca'
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/.*/episodes/(?P<id>season-\d+/.+)'
_TESTS = [{
'url': 'http://www.cbc.ca/22minutes/episodes/season-23/episode-197',
'md5': '9108d19314a116778932b874caf9bc91',
'info_dict': {
'id': 'season-23/episode-197',
'ext': 'mp4',
'title': '22 Minutes - S23E01 - Episode 1',
'description': 'md5:03e943f67d535a48522b5bb4ba7cf812',
'thumbnail': 're:http://.*\.jpg',
'duration': 1315,
'timestamp': 1444177800,
'upload_date': '20151007',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
episode_page = self._download_webpage(url, video_id)
clip_id = self._search_regex(
r"CBC.APP.Caffeine.initInstance\({'clipId':\s*'(\d+)'",
episode_page,
'Clip ID'
)
caffeine_js = self._download_webpage(CBC_CAFFEINE_MODULES_URL, video_id)
caffeine_content_url = self._search_regex(
r'(http://tpfeed.cbc.ca/[a-zA-Z0-9/]+\?byContent=byReleases%3DbyId%253D)',
caffeine_js,
'Caffeine content URL'
)
caffeine_media_url = self._search_regex(
r'(http://tpfeed.cbc.ca/[a-zA-Z0-9/_]+\?)"',
caffeine_js,
'Caffeine media URL'
)
mpx_account_id = self._search_regex(
r'MPX_ACCOUNT_PID:"([a-zA-Z0-9/]+)"',
caffeine_js,
'MPX account ID'
)
caffeine_content = self._download_json(
caffeine_content_url + clip_id + '&fields=content',
video_id
)
media_id = caffeine_content['entries'][0]['content'][0]['releases'][0]['mediaId'].split('/')[-1]
caffeine_media = self._download_json(
caffeine_media_url + 'q=*&byGuid=' + media_id,
video_id
)
episode_info = caffeine_media['entries'][0]
from pprint import pprint
pprint(episode_info)
thumbnails = []
for thumbnail in episode_info['thumbnails']:
thumbnails.append(
{
'url': thumbnail['url'],
'width': thumbnail['width'],
'height': thumbnail['height'],
}
)
theplatform_url = 'http://player.theplatform.com/p/{mpx_account_id}/default_prod_vms/embed/select/media/{pid}'.format(
mpx_account_id=mpx_account_id,
pid=episode_info['pid']
)
return {
'_type': 'url_transparent',
'id': video_id,
'title': '{cbc$show} - S{cbc$seasonNumber}E{cbc$episodeNumber} - {title}'.format(**episode_info),
'description': episode_info['description'],
'timestamp': episode_info['pubDate']/1000,
'url': theplatform_url,
'thumbnails': thumbnails,
}