[twentymin] Began to fix 20min.ch extractor.

This commit is contained in:
Alex Seiler 2017-01-09 21:19:55 +01:00
parent e7ea724cb9
commit 3c3e04c975

View File

@ -60,6 +60,7 @@ class TwentyMinutenIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id display_id = mobj.group('display_id') or video_id
print('DISPLAY_ID: {}'.format(display_id))
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
@ -75,13 +76,23 @@ class TwentyMinutenIE(InfoExtractor):
if not title: if not title:
title = remove_end(re.sub( title = remove_end(re.sub(
r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News')
print('TITLE: {}'.format(title))
# if not video_id:
# video_id = self._search_regex(
# r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id')
if not video_id: if not video_id:
video_id = self._search_regex( videoplayer_url = self._html_search_regex(
r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id') r'<iframe[^>]+src="((?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=*?[^"]+)"',
webpage, '20min embed URL', default=None)
vid = re.match(r'videoID@\d+', videoplayer_url)
print(vid)
description = self._html_search_meta( description = self._html_search_meta(
'description', webpage, 'description') 'description', webpage, 'description')
print('DESCRIPTION: {}'.format(description))
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
return { return {