From a93652070dc27dfee8e57cccae2709b21a643f20 Mon Sep 17 00:00:00 2001 From: zhengxin Date: Fri, 16 Oct 2015 17:24:19 +0800 Subject: [PATCH] add imooc --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/imooc.py | 49 ++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 youtube_dl/extractor/imooc.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 462717b1e..e1b7dd151 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -243,6 +243,7 @@ from .imgur import ( ImgurIE, ImgurAlbumIE, ) +from .imooc import ImoocVideoIE from .ina import InaIE from .indavideo import ( IndavideoIE, diff --git a/youtube_dl/extractor/imooc.py b/youtube_dl/extractor/imooc.py new file mode 100644 index 000000000..c2c295206 --- /dev/null +++ b/youtube_dl/extractor/imooc.py @@ -0,0 +1,49 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_unquote, + compat_urllib_request, +) + +class ImoocVideoIE(InfoExtractor): + _VALID_URL = r'http://www.imooc.com/video/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.imooc.com/video/5454', + 'md5': '03a0f36327721551fce08776fe8f70f1', + 'info_dict': { + 'id': '5454', + 'ext': 'mp4', + 'title': '3-1 网络环境查看命令', + } + } + + # _ANDROID_USER_AGENT = 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5' + # _ANDROID_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)' + def _real_extract(self, url): + video_id = self._match_id(url) + # android_req = compat_urllib_request.Request(url) + # android_req.add_header('User-Agent', self._ANDROID_USER_AGENT) + # webpage = self._download_webpage(android_req, video_id, fatal=False) + webpage = self._download_webpage(url, video_id) + + print webpage + + title = self._search_regex(r'var videoTitle = (.+?)', webpage, 'title') + # title = self._search_regex(r'(.+?)', webpage, 'title') + + # url = self._search_regex(r'property="(.+?)"', webpage, 'url'); + url = self._search_regex(r'webkit-playsinline src="(.+?)"', webpage, 'url') + # url = self._html_search_regex(r'', webpage, 'url') + # + # description = self._html_search_regex( + # r'(?s)
.*?
]*>(.*?)
', + # webpage, 'description', fatal=False) + return { + 'id': video_id, + 'title': title, + 'url': url + } \ No newline at end of file