[ncpaclassic] Add new extractor

This commit is contained in:
difeng 2017-01-03 17:57:44 +08:00
parent 424ed37ec4
commit 248d456db3
2 changed files with 108 additions and 0 deletions

View File

@ -664,6 +664,10 @@ from .nytimes import (
)
from .nuvid import NuvidIE
from .nzz import NZZIE
from .ncpaclassic import (
NcpaClassicVideoIE,
NcpaClassicAudioIE
)
from .odatv import OdaTVIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE

View File

@ -0,0 +1,104 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class NcpaClassicVideoIE(InfoExtractor):
_VALID_URL = r'http://www\.ncpa-classic\.com/[0-9]{4}/[0-9]{2}/[0-9]{2}/VID[E A](?P<id>\w*)\.shtml'
_TESTS = [{
'url': 'http://www.ncpa-classic.com/2013/05/22/VIDE1369219508996867.shtml',
'info_dict': {
'id': '1369219508996867',
'title': '小泽征尔音乐塾 音乐梦想无国界_古典音乐频道'
},
'playlist_count': 8,
},{
'url': 'http://ncpa-classic.cntv.cn/2013/05/22/VIDE1369219508996867.shtml',
'info_dict': {
'id': '1369219508996867',
'title': '小泽征尔音乐塾 音乐梦想无国界_古典音乐频道'
},
'playlist_count': 8,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url,playlist_id)
entries = []
if 'VIDE' in url:
videoCenterId = self._html_search_regex(r'var initMyAray=\s *\'(?P<videoCenterId>\w*)\'',webpage,'videoCenterId', group='videoCenterId')
playlist_title = self._html_search_regex(
r'<title>(?P<title>.*)</title>', webpage,
'title', group='title')
api_result = self._download_json(
'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=%s&tz=-8&from=000dajuyuan&url=%s&idl=32&idlr=32&modifyed=false' % (
videoCenterId,url),playlist_id, 'Get playlist links')
entries = [{'_type': 'video',
'id':'%s' % idx,
'title':playlist_title,
'url': video.get('url')
} for idx,video in enumerate(api_result['video']['chapters2'])]
elif 'VIDA' in url:
playlist_title = self._html_search_regex(
r'<title>(?P<title>.*)</title>', webpage,
'title', group='title')
sub_titles = re.findall(r'<td.*changeAudio_url.*>(.*)</td>',webpage)
vida_ids = re.findall(r'"(\w{32})"',webpage)
for idx,vida_id in enumerate(vida_ids):
title = sub_titles[idx]
api_result = self._download_json(
'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=%s&tz=-8&from=000dajuyuan&url=%s&idl=32&idlr=32&modifyed=false' % (
vida_id,url),playlist_id, 'Get playlist links')
video_json = api_result['video']['chapters']
real_url = video_json[0]['url']
entries.append({'_type': 'video',
'id':'%s' % idx,
'title':title,
'url': real_url})
else:
raise ExtractorError('Unexpected url %s' % url, expected=True)
return self.playlist_result(
entries, playlist_id, playlist_title)
class NcpaClassicAudioIE(InfoExtractor):
_VALID_URL = r'http://www\.ncpa-classic\.com/clt/more/(?P<id>[0-9]*)/index.shtml'
_TESTS = [{
'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
'info_dict': {
'id': '416',
'title': '来自维也纳的新年贺礼'
},
'playlist_count': 1,
},{
'url': 'http://ncpa-classic.cntv.cn/clt/more/416/index.shtml',
'info_dict': {
'id': '416',
'title': '来自维也纳的新年贺礼'
},
'playlist_count': 1,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url,playlist_id)
videoCenterId = self._html_search_regex(r'\"(?P<videoCenterId>\w{32})\"',webpage,'videoCenterId', group='videoCenterId')
playlist_title = self._html_search_regex(
r'<title>(?P<title>.*)</title>', webpage,
'title', group='title')
api_result = self._download_json(
'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=%s&tz=-8&tai=dajuyuanaudio' % (
videoCenterId),playlist_id, 'Get playlist links')
entries = [{'_type': 'video',
'id': '%s' % idx,
'title':playlist_title,
'url': video.get('url')
} for idx,video in enumerate(api_result['video']['chapters'])]
return self.playlist_result(
entries, playlist_id, playlist_title)