From 6bb3efe8dffc5f93716eb58e3af6fb5e6a51a336 Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Sat, 9 Dec 2017 00:33:28 -0600 Subject: [PATCH] [youtube] add storyboards meta field with list and write options Storyboards are grids of small images that appear when the user hovers their cursor over a video's timeline. See related issue #9868. Options added: * --list-storyboards * --write-storyboards --- youtube_dl/YoutubeDL.py | 58 ++++++++++++++++++++++--------- youtube_dl/__init__.py | 2 ++ youtube_dl/extractor/common.py | 6 ++++ youtube_dl/extractor/youtube.py | 60 +++++++++++++++++++++++++++++++++ youtube_dl/options.py | 8 +++++ 5 files changed, 118 insertions(+), 16 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 19370f62b..d1c13726e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -181,6 +181,7 @@ class YoutubeDL(object): writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file write_all_thumbnails: Write all thumbnail formats to files + writestoryboards: Write all storyboards (grid of video frames) to a file writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatically generated subtitles to a file allsubtitles: Downloads all the subtitles of the video @@ -277,6 +278,7 @@ class YoutubeDL(object): [sleep_interval; max_sleep_interval]. listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. + list_storyboards: Print a table of all storyboards and exit. match_filter: A function that gets called with the info_dict of every video. If it returns a message, the video is ignored. @@ -1467,6 +1469,10 @@ class YoutubeDL(object): self.list_thumbnails(info_dict) return + if self.params.get('list_storyboards'): + self.list_thumbnails(info_dict, item_name='storyboards') + return + thumbnail = info_dict.get('thumbnail') if thumbnail: info_dict['thumbnail'] = sanitize_url(thumbnail) @@ -2208,17 +2214,27 @@ class YoutubeDL(object): '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(header_line, table))) - def list_thumbnails(self, info_dict): - thumbnails = info_dict.get('thumbnails') + def list_thumbnails(self, info_dict, item_name='thumbnails'): + thumbnails = info_dict.get(item_name) if not thumbnails: - self.to_screen('[info] No thumbnails present for %s' % info_dict['id']) + self.to_screen('[info] No %s present for %s' % (item_name, info_dict['id'])) return self.to_screen( - '[info] Thumbnails for %s:' % info_dict['id']) - self.to_screen(render_table( - ['ID', 'width', 'height', 'URL'], - [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) + '[info] %s for %s:' % (item_name.title(), info_dict['id'])) + + columns = ['ID', 'width', 'height'] + if item_name == 'storyboards': + columns += ['cols', 'rows', 'frames'] + columns += ['URL'] + + table = [] + for t in thumbnails: + table.append([]) + for column in columns: + table[-1].append(t.get(column.lower(), 'unknown')) + + self.to_screen(render_table(columns, table)) def list_subtitles(self, video_id, subtitles, name='subtitles'): if not subtitles: @@ -2383,12 +2399,16 @@ class YoutubeDL(object): return encoding def _write_thumbnails(self, info_dict, filename): + item_name = 'thumbnail' if self.params.get('writethumbnail', False): thumbnails = info_dict.get('thumbnails') if thumbnails: thumbnails = [thumbnails[-1]] elif self.params.get('write_all_thumbnails', False): thumbnails = info_dict.get('thumbnails') + elif self.params.get('writestoryboards', False): + thumbnails = info_dict.get('storyboards') + item_name = 'storyboard' else: return @@ -2398,22 +2418,28 @@ class YoutubeDL(object): for t in thumbnails: thumb_ext = determine_ext(t['url'], 'jpg') - suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' + if item_name == 'thumbnails': + suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' + else: + suffix = '_%s_%s' % (item_name, t['id']) thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): - self.to_screen('[%s] %s: Thumbnail %sis already present' % - (info_dict['extractor'], info_dict['id'], thumb_display_id)) + self.to_screen('[%s] %s: %s %sis already present' % + (info_dict['extractor'], info_dict['id'], + item_name.title(), thumb_display_id)) else: - self.to_screen('[%s] %s: Downloading thumbnail %s...' % - (info_dict['extractor'], info_dict['id'], thumb_display_id)) + self.to_screen('[%s] %s: Downloading %s %s...' % + (info_dict['extractor'], info_dict['id'], + item_name, thumb_display_id)) try: uf = self.urlopen(t['url']) with open(encodeFilename(thumb_filename), 'wb') as thumbf: shutil.copyfileobj(uf, thumbf) - self.to_screen('[%s] %s: Writing thumbnail %sto: %s' % - (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) + self.to_screen('[%s] %s: Writing %s %sto: %s' % + (info_dict['extractor'], info_dict['id'], + item_name, thumb_display_id, thumb_filename)) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_warning('Unable to download thumbnail "%s": %s' % - (t['url'], error_to_compat_str(err))) + self.report_warning('Unable to download %s "%s": %s' % + (t['url'], item_name, error_to_compat_str(err))) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 9a659fc65..22321e6ac 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -371,6 +371,7 @@ def _real_main(argv=None): 'writeinfojson': opts.writeinfojson, 'writethumbnail': opts.writethumbnail, 'write_all_thumbnails': opts.write_all_thumbnails, + 'writestoryboards': opts.writestoryboards, 'writesubtitles': opts.writesubtitles, 'writeautomaticsub': opts.writeautomaticsub, 'allsubtitles': opts.allsubtitles, @@ -418,6 +419,7 @@ def _real_main(argv=None): 'max_sleep_interval': opts.max_sleep_interval, 'external_downloader': opts.external_downloader, 'list_thumbnails': opts.list_thumbnails, + 'list_storyboards': opts.list_storyboards, 'playlist_items': opts.playlist_items, 'xattr_set_filesize': opts.xattr_set_filesize, 'match_filter': match_filter, diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a61753b17..0fad73792 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -224,6 +224,12 @@ class InfoExtractor(object): deprecated) * "filesize" (optional, int) thumbnail: Full URL to a video thumbnail image. + storyboards: A list of dictionaries representing storyboards. + A storyboard is an image grid made of frames from the video. + This has the same structure as the thumbnails list, plus: + * "cols" (optional, int) + * "rows" (optional, int) + * "frames" (optional, int) description: Full video description. uploader: Full name of the video uploader. license: License name the video is licensed under. diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index fec17987b..06964aeaa 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -10,6 +10,7 @@ import random import re import time import traceback +import math from .common import InfoExtractor, SearchInfoExtractor from ..jsinterp import JSInterpreter @@ -1740,8 +1741,65 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if dash_mpd and dash_mpd not in dash_mpds: dash_mpds.append(dash_mpd) + def get_storyboards(video_info): + storyboards = [] + spec = video_info.get('storyboard_spec', []) + + for s in spec: + s_parts = s.split('|') + base_url = s_parts[0] + i = 0 + for params in s_parts[1:]: + storyboard_attrib = params.split('#') + if len(storyboard_attrib) != 8: + self._downloader.report_warning('Unable to extract storyboard') + continue + + frame_width = int_or_none(storyboard_attrib[0]) + frame_height = int_or_none(storyboard_attrib[1]) + total_frames = int_or_none(storyboard_attrib[2]) + cols = int_or_none(storyboard_attrib[3]) + rows = int_or_none(storyboard_attrib[4]) + filename = storyboard_attrib[6] + sigh = storyboard_attrib[7] + + if frame_width and frame_height and cols and rows and total_frames: + frames = cols * rows + width, height = frame_width * cols, frame_height * rows + n_images = int(math.ceil(total_frames / float(cols * rows))) + else: + self._downloader.report_warning('Unable to extract storyboard') + continue + + storyboards_url = base_url.replace('$L', compat_str(i)) + '?' + for j in range(n_images): + url = storyboards_url.replace('$N', filename).replace('$M', compat_str(j)) + 'sigh=' + sigh + if j == n_images-1: + remaining_frames = total_frames % (cols * rows) + if remaining_frames != 0: + frames = remaining_frames + rows = int(math.ceil(float(remaining_frames) / rows)) + height = rows * frame_height + if rows == 1: + cols = remaining_frames + width = cols * frame_width + + storyboards.append({ + 'id': 'L' + compat_str(i) + '-M' + compat_str(j), + 'width': width, + 'height': height, + 'cols': cols, + 'rows': rows, + 'frames': frames, + 'url': url + }) + i += 1 + + return storyboards + is_live = None view_count = None + storyboards = None def extract_view_count(v_info): return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) @@ -1786,6 +1844,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_response = extract_player_response(pl_response, video_id) add_dash_mpd(video_info) view_count = extract_view_count(video_info) + storyboards = get_storyboards(video_info) else: age_gate = False # Try looking directly into the video webpage @@ -2416,6 +2475,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': video_title, 'alt_title': video_alt_title or track, 'thumbnail': video_thumbnail, + 'storyboards': storyboards, 'description': video_description, 'categories': video_categories, 'tags': video_tags, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 6d5ac62b3..e0169347b 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -777,6 +777,14 @@ def parseOpts(overrideArguments=None): '--list-thumbnails', action='store_true', dest='list_thumbnails', default=False, help='Simulate and list all available thumbnail formats') + thumbnail.add_option( + '--write-storyboards', + action='store_true', dest='writestoryboards', default=False, + help='Write all storyboards (grid of video frames) to disk') + thumbnail.add_option( + '--list-storyboards', + action='store_true', dest='list_storyboards', default=False, + help='Simulate and list all available storyboards') postproc = optparse.OptionGroup(parser, 'Post-processing Options') postproc.add_option(