[youtube] add storyboards meta field with list and write options

Storyboards are grids of small images that appear when
the user hovers their cursor over a video's timeline.
See related issue #9868.

Options added:
  * --list-storyboards
  * --write-storyboards
This commit is contained in:
Marc Abonce Seguin 2017-12-09 00:33:28 -06:00
parent e1723c4bac
commit 6bb3efe8df
5 changed files with 118 additions and 16 deletions

View File

@ -181,6 +181,7 @@ class YoutubeDL(object):
writeannotations: Write the video annotations to a .annotations.xml file writeannotations: Write the video annotations to a .annotations.xml file
writethumbnail: Write the thumbnail image to a file writethumbnail: Write the thumbnail image to a file
write_all_thumbnails: Write all thumbnail formats to files write_all_thumbnails: Write all thumbnail formats to files
writestoryboards: Write all storyboards (grid of video frames) to a file
writesubtitles: Write the video subtitles to a file writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatically generated subtitles to a file writeautomaticsub: Write the automatically generated subtitles to a file
allsubtitles: Downloads all the subtitles of the video allsubtitles: Downloads all the subtitles of the video
@ -277,6 +278,7 @@ class YoutubeDL(object):
[sleep_interval; max_sleep_interval]. [sleep_interval; max_sleep_interval].
listformats: Print an overview of available video formats and exit. listformats: Print an overview of available video formats and exit.
list_thumbnails: Print a table of all thumbnails and exit. list_thumbnails: Print a table of all thumbnails and exit.
list_storyboards: Print a table of all storyboards and exit.
match_filter: A function that gets called with the info_dict of match_filter: A function that gets called with the info_dict of
every video. every video.
If it returns a message, the video is ignored. If it returns a message, the video is ignored.
@ -1467,6 +1469,10 @@ class YoutubeDL(object):
self.list_thumbnails(info_dict) self.list_thumbnails(info_dict)
return return
if self.params.get('list_storyboards'):
self.list_thumbnails(info_dict, item_name='storyboards')
return
thumbnail = info_dict.get('thumbnail') thumbnail = info_dict.get('thumbnail')
if thumbnail: if thumbnail:
info_dict['thumbnail'] = sanitize_url(thumbnail) info_dict['thumbnail'] = sanitize_url(thumbnail)
@ -2208,17 +2214,27 @@ class YoutubeDL(object):
'[info] Available formats for %s:\n%s' % '[info] Available formats for %s:\n%s' %
(info_dict['id'], render_table(header_line, table))) (info_dict['id'], render_table(header_line, table)))
def list_thumbnails(self, info_dict): def list_thumbnails(self, info_dict, item_name='thumbnails'):
thumbnails = info_dict.get('thumbnails') thumbnails = info_dict.get(item_name)
if not thumbnails: if not thumbnails:
self.to_screen('[info] No thumbnails present for %s' % info_dict['id']) self.to_screen('[info] No %s present for %s' % (item_name, info_dict['id']))
return return
self.to_screen( self.to_screen(
'[info] Thumbnails for %s:' % info_dict['id']) '[info] %s for %s:' % (item_name.title(), info_dict['id']))
self.to_screen(render_table(
['ID', 'width', 'height', 'URL'], columns = ['ID', 'width', 'height']
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) if item_name == 'storyboards':
columns += ['cols', 'rows', 'frames']
columns += ['URL']
table = []
for t in thumbnails:
table.append([])
for column in columns:
table[-1].append(t.get(column.lower(), 'unknown'))
self.to_screen(render_table(columns, table))
def list_subtitles(self, video_id, subtitles, name='subtitles'): def list_subtitles(self, video_id, subtitles, name='subtitles'):
if not subtitles: if not subtitles:
@ -2383,12 +2399,16 @@ class YoutubeDL(object):
return encoding return encoding
def _write_thumbnails(self, info_dict, filename): def _write_thumbnails(self, info_dict, filename):
item_name = 'thumbnail'
if self.params.get('writethumbnail', False): if self.params.get('writethumbnail', False):
thumbnails = info_dict.get('thumbnails') thumbnails = info_dict.get('thumbnails')
if thumbnails: if thumbnails:
thumbnails = [thumbnails[-1]] thumbnails = [thumbnails[-1]]
elif self.params.get('write_all_thumbnails', False): elif self.params.get('write_all_thumbnails', False):
thumbnails = info_dict.get('thumbnails') thumbnails = info_dict.get('thumbnails')
elif self.params.get('writestoryboards', False):
thumbnails = info_dict.get('storyboards')
item_name = 'storyboard'
else: else:
return return
@ -2398,22 +2418,28 @@ class YoutubeDL(object):
for t in thumbnails: for t in thumbnails:
thumb_ext = determine_ext(t['url'], 'jpg') thumb_ext = determine_ext(t['url'], 'jpg')
suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' if item_name == 'thumbnails':
suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
else:
suffix = '_%s_%s' % (item_name, t['id'])
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
self.to_screen('[%s] %s: Thumbnail %sis already present' % self.to_screen('[%s] %s: %s %sis already present' %
(info_dict['extractor'], info_dict['id'], thumb_display_id)) (info_dict['extractor'], info_dict['id'],
item_name.title(), thumb_display_id))
else: else:
self.to_screen('[%s] %s: Downloading thumbnail %s...' % self.to_screen('[%s] %s: Downloading %s %s...' %
(info_dict['extractor'], info_dict['id'], thumb_display_id)) (info_dict['extractor'], info_dict['id'],
item_name, thumb_display_id))
try: try:
uf = self.urlopen(t['url']) uf = self.urlopen(t['url'])
with open(encodeFilename(thumb_filename), 'wb') as thumbf: with open(encodeFilename(thumb_filename), 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf) shutil.copyfileobj(uf, thumbf)
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' % self.to_screen('[%s] %s: Writing %s %sto: %s' %
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) (info_dict['extractor'], info_dict['id'],
item_name, thumb_display_id, thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download thumbnail "%s": %s' % self.report_warning('Unable to download %s "%s": %s' %
(t['url'], error_to_compat_str(err))) (t['url'], item_name, error_to_compat_str(err)))

View File

@ -371,6 +371,7 @@ def _real_main(argv=None):
'writeinfojson': opts.writeinfojson, 'writeinfojson': opts.writeinfojson,
'writethumbnail': opts.writethumbnail, 'writethumbnail': opts.writethumbnail,
'write_all_thumbnails': opts.write_all_thumbnails, 'write_all_thumbnails': opts.write_all_thumbnails,
'writestoryboards': opts.writestoryboards,
'writesubtitles': opts.writesubtitles, 'writesubtitles': opts.writesubtitles,
'writeautomaticsub': opts.writeautomaticsub, 'writeautomaticsub': opts.writeautomaticsub,
'allsubtitles': opts.allsubtitles, 'allsubtitles': opts.allsubtitles,
@ -418,6 +419,7 @@ def _real_main(argv=None):
'max_sleep_interval': opts.max_sleep_interval, 'max_sleep_interval': opts.max_sleep_interval,
'external_downloader': opts.external_downloader, 'external_downloader': opts.external_downloader,
'list_thumbnails': opts.list_thumbnails, 'list_thumbnails': opts.list_thumbnails,
'list_storyboards': opts.list_storyboards,
'playlist_items': opts.playlist_items, 'playlist_items': opts.playlist_items,
'xattr_set_filesize': opts.xattr_set_filesize, 'xattr_set_filesize': opts.xattr_set_filesize,
'match_filter': match_filter, 'match_filter': match_filter,

View File

@ -224,6 +224,12 @@ class InfoExtractor(object):
deprecated) deprecated)
* "filesize" (optional, int) * "filesize" (optional, int)
thumbnail: Full URL to a video thumbnail image. thumbnail: Full URL to a video thumbnail image.
storyboards: A list of dictionaries representing storyboards.
A storyboard is an image grid made of frames from the video.
This has the same structure as the thumbnails list, plus:
* "cols" (optional, int)
* "rows" (optional, int)
* "frames" (optional, int)
description: Full video description. description: Full video description.
uploader: Full name of the video uploader. uploader: Full name of the video uploader.
license: License name the video is licensed under. license: License name the video is licensed under.

View File

@ -10,6 +10,7 @@ import random
import re import re
import time import time
import traceback import traceback
import math
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
@ -1740,8 +1741,65 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if dash_mpd and dash_mpd not in dash_mpds: if dash_mpd and dash_mpd not in dash_mpds:
dash_mpds.append(dash_mpd) dash_mpds.append(dash_mpd)
def get_storyboards(video_info):
storyboards = []
spec = video_info.get('storyboard_spec', [])
for s in spec:
s_parts = s.split('|')
base_url = s_parts[0]
i = 0
for params in s_parts[1:]:
storyboard_attrib = params.split('#')
if len(storyboard_attrib) != 8:
self._downloader.report_warning('Unable to extract storyboard')
continue
frame_width = int_or_none(storyboard_attrib[0])
frame_height = int_or_none(storyboard_attrib[1])
total_frames = int_or_none(storyboard_attrib[2])
cols = int_or_none(storyboard_attrib[3])
rows = int_or_none(storyboard_attrib[4])
filename = storyboard_attrib[6]
sigh = storyboard_attrib[7]
if frame_width and frame_height and cols and rows and total_frames:
frames = cols * rows
width, height = frame_width * cols, frame_height * rows
n_images = int(math.ceil(total_frames / float(cols * rows)))
else:
self._downloader.report_warning('Unable to extract storyboard')
continue
storyboards_url = base_url.replace('$L', compat_str(i)) + '?'
for j in range(n_images):
url = storyboards_url.replace('$N', filename).replace('$M', compat_str(j)) + 'sigh=' + sigh
if j == n_images-1:
remaining_frames = total_frames % (cols * rows)
if remaining_frames != 0:
frames = remaining_frames
rows = int(math.ceil(float(remaining_frames) / rows))
height = rows * frame_height
if rows == 1:
cols = remaining_frames
width = cols * frame_width
storyboards.append({
'id': 'L' + compat_str(i) + '-M' + compat_str(j),
'width': width,
'height': height,
'cols': cols,
'rows': rows,
'frames': frames,
'url': url
})
i += 1
return storyboards
is_live = None is_live = None
view_count = None view_count = None
storyboards = None
def extract_view_count(v_info): def extract_view_count(v_info):
return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
@ -1786,6 +1844,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_response = extract_player_response(pl_response, video_id) player_response = extract_player_response(pl_response, video_id)
add_dash_mpd(video_info) add_dash_mpd(video_info)
view_count = extract_view_count(video_info) view_count = extract_view_count(video_info)
storyboards = get_storyboards(video_info)
else: else:
age_gate = False age_gate = False
# Try looking directly into the video webpage # Try looking directly into the video webpage
@ -2416,6 +2475,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': video_title, 'title': video_title,
'alt_title': video_alt_title or track, 'alt_title': video_alt_title or track,
'thumbnail': video_thumbnail, 'thumbnail': video_thumbnail,
'storyboards': storyboards,
'description': video_description, 'description': video_description,
'categories': video_categories, 'categories': video_categories,
'tags': video_tags, 'tags': video_tags,

View File

@ -777,6 +777,14 @@ def parseOpts(overrideArguments=None):
'--list-thumbnails', '--list-thumbnails',
action='store_true', dest='list_thumbnails', default=False, action='store_true', dest='list_thumbnails', default=False,
help='Simulate and list all available thumbnail formats') help='Simulate and list all available thumbnail formats')
thumbnail.add_option(
'--write-storyboards',
action='store_true', dest='writestoryboards', default=False,
help='Write all storyboards (grid of video frames) to disk')
thumbnail.add_option(
'--list-storyboards',
action='store_true', dest='list_storyboards', default=False,
help='Simulate and list all available storyboards')
postproc = optparse.OptionGroup(parser, 'Post-processing Options') postproc = optparse.OptionGroup(parser, 'Post-processing Options')
postproc.add_option( postproc.add_option(