More metadata
This commit is contained in:
parent
b8bd2a22cb
commit
64828b8fe8
@ -117,12 +117,14 @@ class CCTVIE(InfoExtractor):
|
|||||||
# older multi-part streams, non-HLS
|
# older multi-part streams, non-HLS
|
||||||
'url': 'http://english.cntv.cn/program/learnchinese/20110325/103360.shtml',
|
'url': 'http://english.cntv.cn/program/learnchinese/20110325/103360.shtml',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '20110325100557_00',
|
'id': '20110325100557',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Learn to Speak Chinese Edition 24-2011 (Chapter 01)',
|
'title': 're:^Learn to Speak Chinese Edition 24-2011',
|
||||||
'duration': 295,
|
|
||||||
'timestamp': 1301053440,
|
'timestamp': 1301053440,
|
||||||
'upload_date': '20110325',
|
'upload_date': '20110325',
|
||||||
|
'uploader': 'Beauty',
|
||||||
|
'creator': 'CNTV',
|
||||||
|
'description': 'Mike:兰兰,你在哪儿啊?\nMike:Lan Lan,where are you?\n兰兰:噢,是麦克呀。我刚才去游泳了,正打算回家呢。麦克,你有什么事儿吗?',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml',
|
'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml',
|
||||||
@ -174,27 +176,32 @@ class CCTVIE(InfoExtractor):
|
|||||||
data = re.sub(r'(?:\s+)?<\!\-+[^\-]+\-+>.*', '', data)
|
data = re.sub(r'(?:\s+)?<\!\-+[^\-]+\-+>.*', '', data)
|
||||||
data = self._parse_json(data, video_id)
|
data = self._parse_json(data, video_id)
|
||||||
entries = []
|
entries = []
|
||||||
title = data.get('title')
|
title = data['title']
|
||||||
upload_date = self._search_regex(
|
upload_date = self._search_regex(
|
||||||
'<em>(?:\s+)?(\d{2}\-\d{2}\-\d{4}\s+\d{2}\:\d{2})[^<]+',
|
'<em>(?:\s+)?(\d{2}\-\d{2}\-\d{4}\s+\d{2}\:\d{2})[^<]+',
|
||||||
webpage, 'upload date', fatal=False).strip()
|
webpage, 'upload date', fatal=False).strip()
|
||||||
upload_date = re.sub(r'\s+', ' ', upload_date)
|
upload_date = re.sub(r'\s+', ' ', upload_date)
|
||||||
udt = datetime.strptime(upload_date, '%m-%d-%Y %H:%M')
|
udt = datetime.strptime(upload_date, '%m-%d-%Y %H:%M')
|
||||||
|
desc = self._html_search_meta('description', webpage, 'description')
|
||||||
|
desc = desc.replace('\r', '\n').replace('\n ', '\n')
|
||||||
|
creator = self._html_search_regex(r'<b>(?:\s+)?Source\:(?:\s+)?</b>(?:\s+)?([^<]+)',
|
||||||
|
webpage, 'source')
|
||||||
|
editor = self._html_search_regex(r'<b>(?:\s+)?Editor\:</b>(?:\s+)?([^<\|]+)',
|
||||||
|
webpage, 'editor').strip()
|
||||||
|
|
||||||
for i, chapter in enumerate(data.get('chapters', [])):
|
for i, chapter in enumerate(data.get('chapters', [])):
|
||||||
url = chapter.get('url')
|
url = chapter.get('url')
|
||||||
if title:
|
|
||||||
ctitle = '%s (Chapter %02d)' % (title, i + 1,)
|
|
||||||
else:
|
|
||||||
ctitle = 'Chapter %02d' % (i + 1,)
|
|
||||||
if url:
|
if url:
|
||||||
if not url.startswith('http'):
|
if not url.startswith('http'):
|
||||||
url = re.sub(r'^[^\:]+', 'http', url)
|
url = re.sub(r'^[^\:]+', 'http', url)
|
||||||
entries.append(dict(id='%s_%02d' % (video_id, i,),
|
entries.append(dict(id=video_id,
|
||||||
thumbnail=data.get('imagePath'),
|
thumbnail=data.get('imagePath'),
|
||||||
title=ctitle,
|
title='%s - %02d' % (title, i + 1,),
|
||||||
duration=int_or_none(chapter.get('duration')),
|
duration=int_or_none(chapter.get('duration')),
|
||||||
upload_date=udt.strftime('%Y%m%d'),
|
upload_date=udt.strftime('%Y%m%d'),
|
||||||
|
description=desc,
|
||||||
|
uploader=editor,
|
||||||
|
creator=creator,
|
||||||
timestamp=timegm(udt.timetuple()),
|
timestamp=timegm(udt.timetuple()),
|
||||||
url=url))
|
url=url))
|
||||||
return self.playlist_result(entries,
|
return self.playlist_result(entries,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user