identifying multiple assets in a webpage for html5
This commit is contained in:
parent
4e4799176e
commit
55347a7d59
@ -312,6 +312,18 @@ class InfoExtractor(object):
|
||||
if video_id is not None:
|
||||
video_info['id'] = video_id
|
||||
return video_info
|
||||
|
||||
@staticmethod
|
||||
def video_result(video_url=None, video_id=None, uploader=None, video_title=None):
|
||||
"""Returns a url that points to a page that should be processed"""
|
||||
#TODO: ie should be the class used for getting the info
|
||||
video_info = {'_type': 'video',
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'uploader': uploader,
|
||||
'title': video_title}
|
||||
return video_info
|
||||
|
||||
@staticmethod
|
||||
def playlist_result(entries, playlist_id=None, playlist_title=None):
|
||||
"""Returns a playlist"""
|
||||
|
@ -348,7 +348,13 @@ class GenericIE(InfoExtractor):
|
||||
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
|
||||
if mobj is None:
|
||||
# HTML5 video
|
||||
mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
|
||||
matches = re.findall(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
|
||||
if matches:
|
||||
urlrs = [self.video_result(unescapeHTML(tuppl), video_id, video_uploader,video_title)
|
||||
for tuppl in matches]
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
|
||||
if mobj is None:
|
||||
raise ExtractorError('Unsupported URL: %s' % url)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user