identifying multiple assets in a webpage for html5

This commit is contained in:
renu 2014-01-22 11:00:29 +05:30
parent 4e4799176e
commit 55347a7d59
2 changed files with 19 additions and 1 deletions

View File

@ -312,6 +312,18 @@ class InfoExtractor(object):
if video_id is not None:
video_info['id'] = video_id
return video_info
@staticmethod
def video_result(video_url=None, video_id=None, uploader=None, video_title=None):
"""Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info
video_info = {'_type': 'video',
'url': video_url,
'id': video_id,
'uploader': uploader,
'title': video_title}
return video_info
@staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None):
"""Returns a playlist"""

View File

@ -348,7 +348,13 @@ class GenericIE(InfoExtractor):
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
if mobj is None:
# HTML5 video
mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
matches = re.findall(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
if matches:
urlrs = [self.video_result(unescapeHTML(tuppl), video_id, video_uploader,video_title)
for tuppl in matches]
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
if mobj is None:
raise ExtractorError('Unsupported URL: %s' % url)