From e77f114bf98aa9bc0fb2dc791b1761d90ff93880 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Fri, 27 May 2016 21:35:57 -0500 Subject: [PATCH] Add Original Download Original is the actual file (rather than a google-reencoded version) --- youtube_dl/extractor/googledrive.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 766fc26d0..a74b7179f 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -83,6 +83,29 @@ class GoogleDriveIE(InfoExtractor): }) self._sort_formats(formats) + downloadable = True + # DownloadPage will either be the actual file, a "we can't virus-scan this" page with a confirmation button, or a "you don't have permission" page. + # The actual file supports range requests, but the confirmation/permission pages don't, so this will download the whole page for either of those. + downloadPage = self._download_webpage('https://docs.google.com/uc?export=download&id=%s' % video_id, video_id, headers={'Range': 'bytes=0-15'}, encoding='unicode_escape') + if 'html' in downloadPage: + confirm = self._search_regex(r'confirm=([^&"]+)', downloadPage, 'confirm', default=None) + if confirm: + dlstring = 'https://docs.google.com/uc?export=download&confirm=%s&id=%s' % (confirm, video_id) + else: + downloadable = False + else: + dlstring = 'https://docs.google.com/uc?export=download&id=%s' % video_id + if downloadable: + originalExtension = self._search_regex(r'"([^"]+)",[^,]*,[^,]*$', webpage, 'original extension', default=None) + originalSize = int_or_none(self._search_regex(r'"([^"]+)"[^"]*\n[^\n]*,[^,]*$', webpage, 'original size', default=None)) + formats.append({ + 'url': dlstring, + 'format_id': 'Original', + 'ext': originalExtension, + 'filesize': originalSize, + 'protocol': 'https', + }) + return { 'id': video_id, 'title': title,