[ncpaclassic] Add new extractor
This commit is contained in:
parent
424ed37ec4
commit
248d456db3
@ -664,6 +664,10 @@ from .nytimes import (
|
|||||||
)
|
)
|
||||||
from .nuvid import NuvidIE
|
from .nuvid import NuvidIE
|
||||||
from .nzz import NZZIE
|
from .nzz import NZZIE
|
||||||
|
from .ncpaclassic import (
|
||||||
|
NcpaClassicVideoIE,
|
||||||
|
NcpaClassicAudioIE
|
||||||
|
)
|
||||||
from .odatv import OdaTVIE
|
from .odatv import OdaTVIE
|
||||||
from .odnoklassniki import OdnoklassnikiIE
|
from .odnoklassniki import OdnoklassnikiIE
|
||||||
from .oktoberfesttv import OktoberfestTVIE
|
from .oktoberfesttv import OktoberfestTVIE
|
||||||
|
104
youtube_dl/extractor/ncpaclassic.py
Normal file
104
youtube_dl/extractor/ncpaclassic.py
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
class NcpaClassicVideoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.ncpa-classic\.com/[0-9]{4}/[0-9]{2}/[0-9]{2}/VID[E A](?P<id>\w*)\.shtml'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.ncpa-classic.com/2013/05/22/VIDE1369219508996867.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1369219508996867',
|
||||||
|
'title': '小泽征尔音乐塾 音乐梦想无国界_古典音乐频道'
|
||||||
|
},
|
||||||
|
'playlist_count': 8,
|
||||||
|
},{
|
||||||
|
'url': 'http://ncpa-classic.cntv.cn/2013/05/22/VIDE1369219508996867.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1369219508996867',
|
||||||
|
'title': '小泽征尔音乐塾 音乐梦想无国界_古典音乐频道'
|
||||||
|
},
|
||||||
|
'playlist_count': 8,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url,playlist_id)
|
||||||
|
entries = []
|
||||||
|
if 'VIDE' in url:
|
||||||
|
videoCenterId = self._html_search_regex(r'var initMyAray=\s *\'(?P<videoCenterId>\w*)\'',webpage,'videoCenterId', group='videoCenterId')
|
||||||
|
playlist_title = self._html_search_regex(
|
||||||
|
r'<title>(?P<title>.*)</title>', webpage,
|
||||||
|
'title', group='title')
|
||||||
|
api_result = self._download_json(
|
||||||
|
'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=%s&tz=-8&from=000dajuyuan&url=%s&idl=32&idlr=32&modifyed=false' % (
|
||||||
|
videoCenterId,url),playlist_id, 'Get playlist links')
|
||||||
|
entries = [{'_type': 'video',
|
||||||
|
'id':'%s' % idx,
|
||||||
|
'title':playlist_title,
|
||||||
|
'url': video.get('url')
|
||||||
|
} for idx,video in enumerate(api_result['video']['chapters2'])]
|
||||||
|
|
||||||
|
elif 'VIDA' in url:
|
||||||
|
playlist_title = self._html_search_regex(
|
||||||
|
r'<title>(?P<title>.*)</title>', webpage,
|
||||||
|
'title', group='title')
|
||||||
|
sub_titles = re.findall(r'<td.*changeAudio_url.*>(.*)</td>',webpage)
|
||||||
|
vida_ids = re.findall(r'"(\w{32})"',webpage)
|
||||||
|
for idx,vida_id in enumerate(vida_ids):
|
||||||
|
title = sub_titles[idx]
|
||||||
|
api_result = self._download_json(
|
||||||
|
'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=%s&tz=-8&from=000dajuyuan&url=%s&idl=32&idlr=32&modifyed=false' % (
|
||||||
|
vida_id,url),playlist_id, 'Get playlist links')
|
||||||
|
video_json = api_result['video']['chapters']
|
||||||
|
real_url = video_json[0]['url']
|
||||||
|
entries.append({'_type': 'video',
|
||||||
|
'id':'%s' % idx,
|
||||||
|
'title':title,
|
||||||
|
'url': real_url})
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unexpected url %s' % url, expected=True)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title)
|
||||||
|
|
||||||
|
|
||||||
|
class NcpaClassicAudioIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.ncpa-classic\.com/clt/more/(?P<id>[0-9]*)/index.shtml'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '416',
|
||||||
|
'title': '来自维也纳的新年贺礼'
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
},{
|
||||||
|
'url': 'http://ncpa-classic.cntv.cn/clt/more/416/index.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '416',
|
||||||
|
'title': '来自维也纳的新年贺礼'
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url,playlist_id)
|
||||||
|
videoCenterId = self._html_search_regex(r'\"(?P<videoCenterId>\w{32})\"',webpage,'videoCenterId', group='videoCenterId')
|
||||||
|
playlist_title = self._html_search_regex(
|
||||||
|
r'<title>(?P<title>.*)</title>', webpage,
|
||||||
|
'title', group='title')
|
||||||
|
api_result = self._download_json(
|
||||||
|
'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=%s&tz=-8&tai=dajuyuanaudio' % (
|
||||||
|
videoCenterId),playlist_id, 'Get playlist links')
|
||||||
|
entries = [{'_type': 'video',
|
||||||
|
'id': '%s' % idx,
|
||||||
|
'title':playlist_title,
|
||||||
|
'url': video.get('url')
|
||||||
|
} for idx,video in enumerate(api_result['video']['chapters'])]
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title)
|
Loading…
x
Reference in New Issue
Block a user