diff --git a/youtube_dl/extractor/imooc.py b/youtube_dl/extractor/imooc.py index c2c295206..f42f67873 100644 --- a/youtube_dl/extractor/imooc.py +++ b/youtube_dl/extractor/imooc.py @@ -11,37 +11,36 @@ from ..compat import ( class ImoocVideoIE(InfoExtractor): _VALID_URL = r'http://www.imooc.com/video/(?P[0-9]+)' - _TEST = { + _TESTS = [ + { + 'url': 'http://www.imooc.com/video/6511', + 'md5': '756ca7b6e934aedee496e208f290bff3', + 'info_dict': { + 'id': '6511', + 'ext': 'mp4', + 'title': 'Bash变量与变量分类'} + }, + { 'url': 'http://www.imooc.com/video/5454', - 'md5': '03a0f36327721551fce08776fe8f70f1', + 'md5': '1feb8b14a07f5272b400b271292cc1f6', 'info_dict': { 'id': '5454', 'ext': 'mp4', - 'title': '3-1 网络环境查看命令', + 'title': '网络环境查看命令', } } + ] - # _ANDROID_USER_AGENT = 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5' - # _ANDROID_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)' def _real_extract(self, url): video_id = self._match_id(url) - # android_req = compat_urllib_request.Request(url) - # android_req.add_header('User-Agent', self._ANDROID_USER_AGENT) - # webpage = self._download_webpage(android_req, video_id, fatal=False) - webpage = self._download_webpage(url, video_id) - print webpage + json_url = 'http://www.imooc.com/course/ajaxmediainfo/?mid=%s&mode=flash' % video_id + data = self._download_json(json_url, video_id, 'Downloading video formats') - title = self._search_regex(r'var videoTitle = (.+?)', webpage, 'title') - # title = self._search_regex(r'(.+?)', webpage, 'title') + url = data['data']['result']['mpath'][0] + + title = data['data']['result']['name'] - # url = self._search_regex(r'property="(.+?)"', webpage, 'url'); - url = self._search_regex(r'webkit-playsinline src="(.+?)"', webpage, 'url') - # url = self._html_search_regex(r'', webpage, 'url') - # - # description = self._html_search_regex( - # r'(?s)
.*?
]*>(.*?)
', - # webpage, 'description', fatal=False) return { 'id': video_id, 'title': title,