From ae311b639ecb40e36027ce595248fce36e84ab21 Mon Sep 17 00:00:00 2001 From: Timendum Date: Mon, 11 Sep 2017 11:11:18 +0200 Subject: [PATCH] [ku6] extractor rewritten --- youtube_dl/extractor/ku6.py | 50 ++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/ku6.py b/youtube_dl/extractor/ku6.py index a574408e5..4cf35c510 100644 --- a/youtube_dl/extractor/ku6.py +++ b/youtube_dl/extractor/ku6.py @@ -1,32 +1,48 @@ from __future__ import unicode_literals +import re from .common import InfoExtractor class Ku6IE(InfoExtractor): - _VALID_URL = r'https?://v\.ku6\.com/show/(?P[a-zA-Z0-9\-\_]+)(?:\.)*html' - _TEST = { - 'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html', - 'md5': '01203549b9efbb45f4b87d55bdea1ed1', + _VALID_URL = r'https?://www\.ku6\.com/[^/]+/[^\.]+\.html\?vid=(?P[a-zA-Z0-9\-\_]+)' + _TESTS = [{ + 'url': 'http://www.ku6.com/2017/detail.html?vid=cJlL_h5g7wWOKKQ4fGXdvg', + 'md5': '52a37c7a99741911b9a08f141be1ee15', 'info_dict': { - 'id': 'JG-8yS14xzBr4bCn1pu0xw', - 'ext': 'f4v', - 'title': 'techniques test', + 'id': 'cJlL_h5g7wWOKKQ4fGXdvg', + 'ext': 'mp4', + 'title': '大吉成长记 第98集 金银小饰品', + }, + }, { + # found in webpage javascript + 'url': 'http://www.ku6.com/2017/detail-zt.html?vid=bb3s1AQX8uQqLCtPZmyd02', + 'md5': '1f4f977bbd935bbc51846bb543d9d1e7', + 'info_dict': { + 'id': 'bb3s1AQX8uQqLCtPZmyd02', + 'ext': 'mp4', + 'title': '“迎接十九大、忠诚保平安”综合实战演练暨誓师动员大会', } - } + }] def _real_extract(self, url): video_id = self._match_id(url) + json_data = self._download_json('http://ku6.51y5.net/detail.do?vid=%s' % video_id, video_id) + + if json_data['data']: + return { + 'id': video_id, + 'title': json_data['data']['title'], + 'url': json_data['data']['video']['playUrl'] + } + webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex( - r'

(.*?)

', webpage, 'title') - dataUrl = 'http://v.ku6.com/fetchVideo4Player/%s.html' % video_id - jsonData = self._download_json(dataUrl, video_id) - downloadUrl = jsonData['data']['f'] - + dataMap = self._html_search_regex(video_id + r':([^}]+})', webpage, 'dataMap') + # add quote in JSON object keys + dataMap = re.sub(r'([{,])([a-zA-Z]+)', r'\1"\2"', dataMap) + json_data = self._parse_json(dataMap, video_id) return { 'id': video_id, - 'title': title, - 'url': downloadUrl + 'title': json_data['title'], + 'url': json_data['playUrl'] }