[twentymin] Fixed 20min information extractor. Tried to add better video description handler, but the site seems to be too inconsistent...
This commit is contained in:
parent
69807d59ed
commit
eaffc609a7
@ -4,7 +4,12 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import remove_end
|
from ..utils import (
|
||||||
|
remove_end,
|
||||||
|
ExtractorError,
|
||||||
|
clean_html,
|
||||||
|
get_element_by_class
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TwentyMinutenIE(InfoExtractor):
|
class TwentyMinutenIE(InfoExtractor):
|
||||||
@ -99,10 +104,16 @@ class TwentyMinutenIE(InfoExtractor):
|
|||||||
r'.*videoId@(\d+)',
|
r'.*videoId@(\d+)',
|
||||||
params, 'Video Id', default=None) if params is not None else ''
|
params, 'Video Id', default=None) if params is not None else ''
|
||||||
print('VIDEO ID: {}'.format(video_id))
|
print('VIDEO ID: {}'.format(video_id))
|
||||||
|
if not video_id: # the article does not contain a video
|
||||||
|
raise ExtractorError('No media links found on %s.' % url, expected=True)
|
||||||
|
|
||||||
|
# # Try to use the real video description:
|
||||||
description = self._html_search_meta(
|
# description = clean_html(get_element_by_class('caption', webpage))
|
||||||
'description', webpage, 'description')
|
# # Otherwise, use the lead text of the article as the video description:
|
||||||
|
# if not description:
|
||||||
|
# description = self._html_search_meta(
|
||||||
|
# 'description', webpage, 'description')
|
||||||
|
description = self._html_search_meta('description', webpage, 'description')
|
||||||
print('DESCRIPTION: {}'.format(description))
|
print('DESCRIPTION: {}'.format(description))
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
print('THUMBNAIL: {}'.format(thumbnail))
|
print('THUMBNAIL: {}'.format(thumbnail))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user