From d15dbdb3570cdf1551bcadcadf69c84320eb71c5 Mon Sep 17 00:00:00 2001 From: clauderains Date: Thu, 7 Jan 2016 18:04:16 +0800 Subject: [PATCH] add ifeng extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ifeng.py | 53 ++++++++++++++++++++++++++++++++ youtube_dl/extractor/qqvideo.py | 4 +-- 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 youtube_dl/extractor/ifeng.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 989331bcb..8c5fd0f29 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -271,6 +271,7 @@ from .howstuffworks import HowStuffWorksIE from .huffpost import HuffPostIE from .hypem import HypemIE from .iconosquare import IconosquareIE +from .ifeng import IfengIE from .ign import ( IGNIE, OneUPIE, diff --git a/youtube_dl/extractor/ifeng.py b/youtube_dl/extractor/ifeng.py new file mode 100644 index 000000000..7f0b41743 --- /dev/null +++ b/youtube_dl/extractor/ifeng.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class IfengIE(InfoExtractor): + IE_NAME = 'ifeng' + IE_DESC = '凤凰网' + # http://v.ifeng.com/mil/mainland/201601/01d92436-8afe-4af0-82a4-cef889018295.shtml + # http://v.ifeng.com/ent/mingxing/201601/01e29bc2-1e89-41ee-9a91-25d56e2b0740.shtml + _VALID_URL = r'http://v\.ifeng\.com/.+?/(?P[\w\-\d]+)\.shtml' + _TESTS = [{ + 'url': 'http://v.ifeng.com/mil/mainland/201601/01d92436-8afe-4af0-82a4-cef889018295.shtml', + 'info_dict': { + 'id': '01d92436-8afe-4af0-82a4-cef889018295', + 'ext': 'mp4', + 'title': '中国火箭军正式亮相 多支导弹旅携罕见导弹出镜', + } + }, + { + 'url': 'http://v.ifeng.com/ent/mingxing/201601/01e29bc2-1e89-41ee-9a91-25d56e2b0740.shtml', + 'info_dict': { + 'id': '01e29bc2-1e89-41ee-9a91-25d56e2b0740', + 'ext': 'mp4', + 'title': '陈羽凡锁骨骨折 盼早日康复', + }, + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url); + + d = video_id[-2] + dd = video_id[-2:] + + info_doc = self._download_xml( + 'http://v.ifeng.com/video_info_new/%s/%s/%s.xml' % (d, dd, video_id), + video_id, 'fetch video metadata') + + title = info_doc.find('./item').get('Name') + + for element in info_doc.findall('./videos/video[@mediaType=\'mp4\']'): + url = element.get('VideoPlayUrl') + if element.get('type') == '500k': + break + + return { + 'id': video_id, + 'title': title, + 'url': url, + 'ext': 'mp4', + } diff --git a/youtube_dl/extractor/qqvideo.py b/youtube_dl/extractor/qqvideo.py index 3336b1b1e..76eeeb519 100644 --- a/youtube_dl/extractor/qqvideo.py +++ b/youtube_dl/extractor/qqvideo.py @@ -15,7 +15,7 @@ class QqVideoIE(InfoExtractor): _TESTS = [{ 'url': 'http://v.qq.com/page/9/n/6/9jWRYWGYvn6.html', 'info_dict': { - 'id': '42', + 'id': '9jWRYWGYvn6', 'ext': 'mp4', 'title': '歼-20试飞63次 国防部指挥例行试验', } @@ -23,7 +23,7 @@ class QqVideoIE(InfoExtractor): { 'url': 'http://v.qq.com/cover/o/oy8cl3wkrebcv8h.html?vid=x001970x491', 'info_dict': { - 'id': '42', + 'id': 'x001970x491', 'ext': 'mp4', 'title': '韩国青瓦台召开紧急会议 国防部加紧检查战备状态', },