diff --git a/youtube-dl b/youtube-dl index e3eb8774c..d2401a2d8 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 44a272e7e..62982521e 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -573,6 +573,7 @@ class YoutubeDL(object): if success: try: self.post_process(filename, info_dict) + self.set_xattrs(filename, info_dict) except (PostProcessingError) as err: self.report_error(u'postprocessing: %s' % str(err)) return @@ -594,6 +595,52 @@ class YoutubeDL(object): return self._download_retcode + def set_xattrs(self, filename, info_dict): + """ Set extended attributes on downloaded file (if the xattr module is installed). """ + # + # More info about extended attributes for media: + # http://freedesktop.org/wiki/CommonExtendedAttributes/ + # http://www.freedesktop.org/wiki/PhreedomDraft/ + # http://dublincore.org/documents/usageguide/elements.shtml + # + # TODO: + # * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated) + # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution' + # + try: + write_xattr + except NameError: # This box can't set xattrs + return False + + try: + self.to_screen('[download] Writing metadata to the file\'s xattrs') + + xattr_mapping = { + 'user.xdg.referrer.url': 'referrer', + # 'user.xdg.comment': 'description', + 'user.dublincore.title': 'title', + 'user.dublincore.date': 'upload_date', + 'user.dublincore.description': 'description', + 'user.dublincore.contributor': 'uploader', + 'user.dublincore.format': 'format', + } + + for xattrname, infoname in xattr_mapping.items(): + + value = info_dict.get(infoname) + + if value: + if infoname == "upload_date": + value = hyphenate_date(value) + + write_xattr(filename, xattrname, value) + + return True + + except OSError: + # The filesystem doesn't support extended attributes + return False + def post_process(self, filename, ie_info): """Run all the postprocessors on the given file.""" info = dict(ie_info) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 4a7d82b7a..d6ff05a37 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -19,6 +19,7 @@ class VimeoIE(InfoExtractor): # _VALID_URL matches Vimeo URLs _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)(?:[?].*)?$' _NETRC_MACHINE = 'vimeo' + _REFERRER_URL = 'https://vimeo.com/%s' IE_NAME = u'vimeo' _TESTS = [ { @@ -203,15 +204,16 @@ class VimeoIE(InfoExtractor): %(video_id, sig, timestamp, video_quality, video_codec.upper()) return [{ - 'id': video_id, - 'url': video_url, - 'uploader': video_uploader, + 'id': video_id, + 'url': video_url, + 'referrer': self._REFERRER_URL % video_id, + 'uploader': video_uploader, 'uploader_id': video_uploader_id, - 'upload_date': video_upload_date, - 'title': video_title, - 'ext': video_extension, - 'thumbnail': video_thumbnail, - 'description': video_description, + 'upload_date': video_upload_date, + 'title': video_title, + 'ext': video_extension, + 'thumbnail': video_thumbnail, + 'description': video_description, }] diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 53f13b516..85022d209 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -40,6 +40,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' + _REFERRER_URL = 'https://www.youtube.com/watch?v=%s' + # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False @@ -1477,19 +1479,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '') results.append({ - 'id': video_id, - 'url': video_real_url, - 'uploader': video_uploader, + 'id': video_id, + 'url': video_real_url, + 'referrer': self._REFERRER_URL % video_id, + 'uploader': video_uploader, 'uploader_id': video_uploader_id, - 'upload_date': upload_date, - 'title': video_title, - 'ext': video_extension, - 'format': video_format, - 'thumbnail': video_thumbnail, - 'description': video_description, - 'player_url': player_url, - 'subtitles': video_subtitles, - 'duration': video_duration + 'upload_date': upload_date, + 'title': video_title, + 'ext': video_extension, + 'format': video_format, + 'thumbnail': video_thumbnail, + 'description': video_description, + 'player_url': player_url, + 'subtitles': video_subtitles, + 'duration': video_duration }) return results diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 201ed255d..d558cdb72 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -758,7 +758,16 @@ def date_from_str(date_str): delta = datetime.timedelta(**{unit: time}) return today + delta return datetime.datetime.strptime(date_str, "%Y%m%d").date() - + +def hyphenate_date(date_str): + """ + Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" + match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str) + if match is not None: + return '-'.join(match.groups()) + else: + return date_str + class DateRange(object): """Represents a time interval between two dates""" def __init__(self, start=None, end=None): @@ -824,3 +833,37 @@ def intlist_to_bytes(xs): return ''.join([chr(x) for x in xs]) else: return bytes(xs) + +try: + import xattr + def write_xattr(path, key, value): + return xattr.set(path, key, value) +except ImportError: + if os.name == 'posix': + def which(bin): + for dir in os.environ["PATH"].split(":"): + path = os.path.join(dir, bin) + if os.path.exists(path): + return path + + if which("setfattr"): # wrap the 'setfattr' commandline tool + import subprocess + def write_xattr(path, key, value): + cmd = ["setfattr", "-n", key, "-v", value, path] + try: + output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + import errno + potential_errors = { + # setfattr: /tmp/blah: Operation not supported + "Operation not supported": errno.EOPNOTSUPP, + # setfattr: ~/blah: No such file or directory + "No such file or directory": errno.ENOENT + } + errorstr = e.output.strip().decode() + for potential_errorstr, potential_errno in potential_errors.items(): + if errorstr.endswith(potential_errorstr): + e = OSError(potential_errno, potential_errorstr) + e.__cause__ = None + raise e + raise # Reraise unhandled error