A new feature which writes the video's metadata (title, url, description, etc.)

to the file's extended attributes (xattrs).

A 'utils.write_xattr' function was created which uses whatever method of writing
xattrs exists on the current platform. (First it tries the 'xattr' module,
then the 'setfattr' commandline tool.)

If neither of these facilities are found, the function isn't undefined,
and the metadata writer does nothing.
This commit is contained in:
epitron 2013-09-27 15:45:18 -04:00
parent 9abb32045a
commit 6bf653e6cf
5 changed files with 116 additions and 21 deletions

Binary file not shown.

View File

@ -573,6 +573,7 @@ class YoutubeDL(object):
if success:
try:
self.post_process(filename, info_dict)
self.set_xattrs(filename, info_dict)
except (PostProcessingError) as err:
self.report_error(u'postprocessing: %s' % str(err))
return
@ -594,6 +595,52 @@ class YoutubeDL(object):
return self._download_retcode
def set_xattrs(self, filename, info_dict):
""" Set extended attributes on downloaded file (if the xattr module is installed). """
#
# More info about extended attributes for media:
# http://freedesktop.org/wiki/CommonExtendedAttributes/
# http://www.freedesktop.org/wiki/PhreedomDraft/
# http://dublincore.org/documents/usageguide/elements.shtml
#
# TODO:
# * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated)
# * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution'
#
try:
write_xattr
except NameError: # This box can't set xattrs
return False
try:
self.to_screen('[download] Writing metadata to the file\'s xattrs')
xattr_mapping = {
'user.xdg.referrer.url': 'referrer',
# 'user.xdg.comment': 'description',
'user.dublincore.title': 'title',
'user.dublincore.date': 'upload_date',
'user.dublincore.description': 'description',
'user.dublincore.contributor': 'uploader',
'user.dublincore.format': 'format',
}
for xattrname, infoname in xattr_mapping.items():
value = info_dict.get(infoname)
if value:
if infoname == "upload_date":
value = hyphenate_date(value)
write_xattr(filename, xattrname, value)
return True
except OSError:
# The filesystem doesn't support extended attributes
return False
def post_process(self, filename, ie_info):
"""Run all the postprocessors on the given file."""
info = dict(ie_info)

View File

@ -19,6 +19,7 @@ class VimeoIE(InfoExtractor):
# _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
_NETRC_MACHINE = 'vimeo'
_REFERRER_URL = 'https://vimeo.com/%s'
IE_NAME = u'vimeo'
_TESTS = [
{
@ -203,15 +204,16 @@ class VimeoIE(InfoExtractor):
%(video_id, sig, timestamp, video_quality, video_codec.upper())
return [{
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'id': video_id,
'url': video_url,
'referrer': self._REFERRER_URL % video_id,
'uploader': video_uploader,
'uploader_id': video_uploader_id,
'upload_date': video_upload_date,
'title': video_title,
'ext': video_extension,
'thumbnail': video_thumbnail,
'description': video_description,
'upload_date': video_upload_date,
'title': video_title,
'ext': video_extension,
'thumbnail': video_thumbnail,
'description': video_description,
}]

View File

@ -40,6 +40,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
_NETRC_MACHINE = 'youtube'
_REFERRER_URL = 'https://www.youtube.com/watch?v=%s'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
@ -1477,19 +1479,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
results.append({
'id': video_id,
'url': video_real_url,
'uploader': video_uploader,
'id': video_id,
'url': video_real_url,
'referrer': self._REFERRER_URL % video_id,
'uploader': video_uploader,
'uploader_id': video_uploader_id,
'upload_date': upload_date,
'title': video_title,
'ext': video_extension,
'format': video_format,
'thumbnail': video_thumbnail,
'description': video_description,
'player_url': player_url,
'subtitles': video_subtitles,
'duration': video_duration
'upload_date': upload_date,
'title': video_title,
'ext': video_extension,
'format': video_format,
'thumbnail': video_thumbnail,
'description': video_description,
'player_url': player_url,
'subtitles': video_subtitles,
'duration': video_duration
})
return results

View File

@ -758,7 +758,16 @@ def date_from_str(date_str):
delta = datetime.timedelta(**{unit: time})
return today + delta
return datetime.datetime.strptime(date_str, "%Y%m%d").date()
def hyphenate_date(date_str):
"""
Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
if match is not None:
return '-'.join(match.groups())
else:
return date_str
class DateRange(object):
"""Represents a time interval between two dates"""
def __init__(self, start=None, end=None):
@ -824,3 +833,37 @@ def intlist_to_bytes(xs):
return ''.join([chr(x) for x in xs])
else:
return bytes(xs)
try:
import xattr
def write_xattr(path, key, value):
return xattr.set(path, key, value)
except ImportError:
if os.name == 'posix':
def which(bin):
for dir in os.environ["PATH"].split(":"):
path = os.path.join(dir, bin)
if os.path.exists(path):
return path
if which("setfattr"): # wrap the 'setfattr' commandline tool
import subprocess
def write_xattr(path, key, value):
cmd = ["setfattr", "-n", key, "-v", value, path]
try:
output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
import errno
potential_errors = {
# setfattr: /tmp/blah: Operation not supported
"Operation not supported": errno.EOPNOTSUPP,
# setfattr: ~/blah: No such file or directory
"No such file or directory": errno.ENOENT
}
errorstr = e.output.strip().decode()
for potential_errorstr, potential_errno in potential_errors.items():
if errorstr.endswith(potential_errorstr):
e = OSError(potential_errno, potential_errorstr)
e.__cause__ = None
raise e
raise # Reraise unhandled error