[ifeng] JavaScript object transformed to a Python dictionary instead of a long regular expression

This commit is contained in:
Yonghui Chen 2018-01-18 15:05:49 +08:00
parent 76c5ae8e80
commit b95dff5125

View File

@ -6,7 +6,9 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
unified_strdate, unified_strdate,
int_or_none
) )
import re
class IFengIE(InfoExtractor): class IFengIE(InfoExtractor):
@ -37,28 +39,14 @@ class IFengIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex( video_info = self._parse_json(self._html_search_regex(
r'"name": "(?P<value>(.+?))"', r'var videoinfo = (?P<value>({.+?}));',
webpage, 'title', group='value') webpage, 'video_info', flags=re.DOTALL, group='value'), video_id)
video_url = self._html_search_regex(
r'"videoPlayUrl": "(?P<value>(.+?))"', video_url = video_info.get('videoPlayUrl')
webpage, 'url', group='value')
if not video_url: if not video_url:
self._report_error(title) self._report_error(video_url)
thumbnail = self._html_search_regex(
r'"videoLargePoster": "(?P<value>(.+?))"',
webpage, 'thumbnail', group='value', fatal=False)
uploader = self._html_search_regex(
r'"columnName":"(?P<value>(.+?))"',
webpage, 'uploader', group='value', fatal=False)
duration = self._html_search_regex(
r'"duration": "(?P<value>(.+?))"',
webpage, 'duration', group='value', fatal=False)
upload_date = unified_strdate(self._html_search_regex(
r'"createdate": "(?P<value>(.+?))"',
webpage, 'createdate', group='value', fatal=False))
formats = [ formats = [
{ {
@ -70,10 +58,10 @@ class IFengIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': video_info.get('name'),
'duration': int(duration), 'duration': int_or_none(video_info.get('duration')),
'uploader': uploader, 'uploader': video_info.get('columnName'),
'upload_date': upload_date, 'upload_date': unified_strdate(video_info.get('createdate')),
'thumbnail': thumbnail, 'thumbnail': video_info.get('videoLargePoster'),
'formats': formats, 'formats': formats,
} }