[googledrive] Add new extractor

This commit is contained in:
remitamine 2015-06-24 01:13:23 +01:00
parent 255f5694aa
commit 372b3d1634
2 changed files with 107 additions and 0 deletions

View File

@ -205,6 +205,7 @@ from .globo import GloboIE
from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE
from .googledrive import GoogleDriveIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE

View File

@ -0,0 +1,106 @@
from .common import InfoExtractor
from ..utils import RegexNotFoundError
class GoogleDriveIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/))(?P<id>.+?)(?:&|/|$)'
_TEST = {
'url': 'https://drive.google.com/file/d/0BzpExh0WzJF0NlR5WUlxdEVsY0U/edit?pli=1',
'info_dict': {
'id': '0BzpExh0WzJF0NlR5WUlxdEVsY0U',
'ext': 'mp4',
'title': '[AHSH] Fairy Tail S2 - 01 [720p].mp4',
}
}
_formats = {
'5': {'ext': 'flv'},
'6': {'ext': 'flv'},
'13': {'ext': '3gp'},
'17': {'ext': '3gp'},
'18': {'ext': 'mp4'},
'22': {'ext': 'mp4'},
'34': {'ext': 'flv'},
'35': {'ext': 'flv'},
'36': {'ext': '3gp'},
'37': {'ext': 'mp4'},
'38': {'ext': 'mp4'},
'43': {'ext': 'webm'},
'44': {'ext': 'webm'},
'45': {'ext': 'webm'},
'46': {'ext': 'webm'},
'59': {'ext': 'mp4'}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://docs.google.com/file/d/'+video_id, video_id, encoding='unicode_escape'
)
try:
title = self._html_search_regex(
r'"title","(?P<title>.*?)"',
webpage,
'title',
group='title'
)
fmt_stream_map = self._html_search_regex(
r'"fmt_stream_map","(?P<fmt_stream_map>.*?)"',
webpage,
'fmt_stream_map',
group='fmt_stream_map'
)
fmt_list = self._html_search_regex(
r'"fmt_list","(?P<fmt_list>.*?)"',
webpage,
'fmt_list',
group='fmt_list'
)
# timestamp = self._html_search_regex(
# r'"timestamp","(?P<timestamp>.*?)"',
# webpage,
# 'timestamp',
# group='timestamp'
# )
length_seconds = self._html_search_regex(
r'"length_seconds","(?P<length_seconds>.*?)"',
webpage,
'length_seconds',
group='length_seconds'
)
except RegexNotFoundError:
try:
reason = self._html_search_regex(
r'"reason","(?P<reason>.*?)"',
webpage,
'reason',
group='reason'
)
self.report_warning(reason)
return
except RegexNotFoundError:
self.report_warning('not a video')
return
fmt_stream_map = fmt_stream_map.split(',')
fmt_list = fmt_list.split(',')
formats = []
for i in range(len(fmt_stream_map)):
fmt_id, fmt_url = fmt_stream_map[i].split('|')
resolution = fmt_list[i].split('/')[1]
width, height = resolution.split('x')
formats.append({
'url': fmt_url,
'format_id': fmt_id,
'resolution': resolution,
'width': int(width),
'height': int(height),
'ext': self._formats[fmt_id]['ext']
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
# 'timestamp': int(timestamp),
'duration': int(length_seconds),
'formats': formats
}