[cbc] Add extractor
This commit is contained in:
parent
031ec536f0
commit
7c6bc92ae0
@ -71,6 +71,7 @@ from .camdemy import (
|
|||||||
from .canal13cl import Canal13clIE
|
from .canal13cl import Canal13clIE
|
||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
|
from .cbc import CBCIE
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .cbsnews import CBSNewsIE
|
from .cbsnews import CBSNewsIE
|
||||||
from .cbssports import CBSSportsIE
|
from .cbssports import CBSSportsIE
|
||||||
|
97
youtube_dl/extractor/cbc.py
Normal file
97
youtube_dl/extractor/cbc.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
CBC_CAFFEINE_MODULES_URL = 'http://www.cbc.ca/i/caffeine/js/Caffeine.modules.js'
|
||||||
|
|
||||||
|
|
||||||
|
class CBCIE(InfoExtractor):
|
||||||
|
IE_DESC = 'cbc.ca'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/.*/episodes/(?P<id>season-\d+/.+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.cbc.ca/22minutes/episodes/season-23/episode-197',
|
||||||
|
'md5': '9108d19314a116778932b874caf9bc91',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'season-23/episode-197',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '22 Minutes - S23E01 - Episode 1',
|
||||||
|
'description': 'md5:03e943f67d535a48522b5bb4ba7cf812',
|
||||||
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
|
'duration': 1315,
|
||||||
|
'timestamp': 1444177800,
|
||||||
|
'upload_date': '20151007',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
episode_page = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
clip_id = self._search_regex(
|
||||||
|
r"CBC.APP.Caffeine.initInstance\({'clipId':\s*'(\d+)'",
|
||||||
|
episode_page,
|
||||||
|
'Clip ID'
|
||||||
|
)
|
||||||
|
|
||||||
|
caffeine_js = self._download_webpage(CBC_CAFFEINE_MODULES_URL, video_id)
|
||||||
|
|
||||||
|
caffeine_content_url = self._search_regex(
|
||||||
|
r'(http://tpfeed.cbc.ca/[a-zA-Z0-9/]+\?byContent=byReleases%3DbyId%253D)',
|
||||||
|
caffeine_js,
|
||||||
|
'Caffeine content URL'
|
||||||
|
)
|
||||||
|
|
||||||
|
caffeine_media_url = self._search_regex(
|
||||||
|
r'(http://tpfeed.cbc.ca/[a-zA-Z0-9/_]+\?)"',
|
||||||
|
caffeine_js,
|
||||||
|
'Caffeine media URL'
|
||||||
|
)
|
||||||
|
|
||||||
|
mpx_account_id = self._search_regex(
|
||||||
|
r'MPX_ACCOUNT_PID:"([a-zA-Z0-9/]+)"',
|
||||||
|
caffeine_js,
|
||||||
|
'MPX account ID'
|
||||||
|
)
|
||||||
|
|
||||||
|
caffeine_content = self._download_json(
|
||||||
|
caffeine_content_url + clip_id + '&fields=content',
|
||||||
|
video_id
|
||||||
|
)
|
||||||
|
media_id = caffeine_content['entries'][0]['content'][0]['releases'][0]['mediaId'].split('/')[-1]
|
||||||
|
|
||||||
|
caffeine_media = self._download_json(
|
||||||
|
caffeine_media_url + 'q=*&byGuid=' + media_id,
|
||||||
|
video_id
|
||||||
|
)
|
||||||
|
|
||||||
|
episode_info = caffeine_media['entries'][0]
|
||||||
|
from pprint import pprint
|
||||||
|
pprint(episode_info)
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for thumbnail in episode_info['thumbnails']:
|
||||||
|
thumbnails.append(
|
||||||
|
{
|
||||||
|
'url': thumbnail['url'],
|
||||||
|
'width': thumbnail['width'],
|
||||||
|
'height': thumbnail['height'],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
theplatform_url = 'http://player.theplatform.com/p/{mpx_account_id}/default_prod_vms/embed/select/media/{pid}'.format(
|
||||||
|
mpx_account_id=mpx_account_id,
|
||||||
|
pid=episode_info['pid']
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': video_id,
|
||||||
|
'title': '{cbc$show} - S{cbc$seasonNumber}E{cbc$episodeNumber} - {title}'.format(**episode_info),
|
||||||
|
'description': episode_info['description'],
|
||||||
|
'timestamp': episode_info['pubDate']/1000,
|
||||||
|
'url': theplatform_url,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user