Allow creation of internet shortcut files with new --write-link switch and similar ones.

This commit is contained in:
Henrik Hank 2018-05-14 22:27:23 +02:00
parent 1e4fe5a7cc
commit ebd92e71f2
9 changed files with 284 additions and 15 deletions

View File

@ -176,7 +176,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
age
--download-archive FILE Download only videos not listed in the
archive file. Record the IDs of all
downloaded videos in it.
downloaded videos in it. When the switches
--write-link (or similar) and
--skip-download are used additionally, the
IDs will also be recorded, even though
nothing was actually downloaded.
--include-ads Download advertisements as well
(experimental)
@ -268,12 +272,24 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
--no-cache-dir Disable filesystem caching
--rm-cache-dir Delete all filesystem cache files
## Thumbnail images:
## Thumbnail Images:
--write-thumbnail Write thumbnail image to disk
--write-all-thumbnails Write all thumbnail image formats to disk
--list-thumbnails Simulate and list all available thumbnail
formats
## Internet Shortcut Options:
--write-link Write an internet shortcut file, depending
on the current platform (.url/.webloc/
.desktop). The URL may be cached by the OS.
--write-url-link Write a Windows internet shortcut file
(.url). Note that the OS caches the URL
based on the file path.
--write-webloc-link Write a macOS internet shortcut file
(.webloc)
--write-desktop-link Write a Linux internet shortcut file
(.desktop)
## Verbosity / Simulation Options:
-q, --quiet Activate quiet mode
--no-warnings Ignore warnings
@ -385,7 +401,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
--ap-list-mso List all supported multiple-system
operators
## Post-processing Options:
## Post-Processing Options:
-x, --extract-audio Convert video files to audio-only files
(requires ffmpeg or avconv and ffprobe or
avprobe)
@ -497,7 +513,11 @@ The `-o` option allows users to indicate a template for the output file names.
**tl;dr:** [navigate me to examples](#output-template-examples).
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operation.
You can, e.g., limit the number of characters of the title to prevent errors with too long filenames or file paths: `%(title).100s`. You may want to check the length of the rest of your filename or path and adapt the number accordingly.
Allowed names along with sequence type are:
- `id` (string): Video identifier
- `title` (string): Video title

View File

@ -35,6 +35,11 @@
"verbose": true,
"writedescription": false,
"writeinfojson": true,
"writeannotations": false,
"writelink": false,
"writeurllink": false,
"writewebloclink": false,
"writedesktoplink": false,
"writesubtitles": false,
"allsubtitles": false,
"listssubtitles": false,

View File

@ -18,6 +18,8 @@ from youtube_dl.compat import (
compat_shlex_split,
compat_str,
compat_struct_unpack,
compat_urllib_parse_quote,
compat_urllib_parse_quote_plus,
compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus,
compat_urllib_parse_urlencode,
@ -52,6 +54,29 @@ class TestCompat(unittest.TestCase):
dir(youtube_dl.compat))) - set(['unicode_literals'])
self.assertEqual(all_names, sorted(present_names))
def test_compat_urllib_parse_quote(self):
self.assertEqual(compat_urllib_parse_quote('abc def'), 'abc%20def')
self.assertEqual(compat_urllib_parse_quote('/~user/abc+def'), '/%7Euser/abc%2Bdef')
self.assertEqual(compat_urllib_parse_quote('/~user/abc+def', safe='/~+'), '/~user/abc+def')
self.assertEqual(compat_urllib_parse_quote(''), '')
self.assertEqual(compat_urllib_parse_quote('%'), '%25')
self.assertEqual(compat_urllib_parse_quote('%', safe='%'), '%')
self.assertEqual(compat_urllib_parse_quote('津波'), '%E6%B4%A5%E6%B3%A2')
self.assertEqual(
compat_urllib_parse_quote(
'''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''', safe='<>=":%/ \r\n'),
'''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a''')
self.assertEqual(
compat_urllib_parse_quote(
'''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%25Things%''', safe='% '),
'''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%''')
def test_compat_urllib_parse_quote_plus(self):
self.assertEqual(compat_urllib_parse_quote_plus('abc def'), 'abc+def')
self.assertEqual(compat_urllib_parse_quote_plus('~/abc def'), '%7E%2Fabc+def')
def test_compat_urllib_parse_unquote(self):
self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def')
@ -63,12 +88,14 @@ class TestCompat(unittest.TestCase):
self.assertEqual(compat_urllib_parse_unquote('%2f'), '/')
self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波')
self.assertEqual(
compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''),
compat_urllib_parse_unquote(
'''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''),
'''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''')
%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''')
self.assertEqual(
compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''),
compat_urllib_parse_unquote(
'''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''),
'''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''')
def test_compat_urllib_parse_unquote_plus(self):

View File

@ -96,6 +96,7 @@ from youtube_dl.utils import (
cli_valueless_option,
cli_bool_option,
parse_codecs,
iri_to_uri,
)
from youtube_dl.compat import (
compat_chr,
@ -1333,6 +1334,32 @@ Line 1
self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice'])
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
def test_iri_to_uri(self):
self.assertEqual(
iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'),
'https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b') # Same
self.assertEqual(
iri_to_uri('https://www.google.com/search?q=Käsesoßenrührlöffel'), # German for cheese sauce stirring spoon
'https://www.google.com/search?q=K%C3%A4seso%C3%9Fenr%C3%BChrl%C3%B6ffel')
self.assertEqual(
iri_to_uri('https://www.google.com/search?q=lt<+gt>+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#'),
'https://www.google.com/search?q=lt%3C+gt%3E+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#')
self.assertEqual(
iri_to_uri('http://правозащита38.рф/category/news/'),
'http://xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/')
self.assertEqual(
iri_to_uri('http://www.правозащита38.рф/category/news/'),
'http://www.xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/')
self.assertEqual(
iri_to_uri('https://i❤.ws/emojidomain/👍👏🤝💪'),
'https://xn--i-7iq.ws/emojidomain/%F0%9F%91%8D%F0%9F%91%8F%F0%9F%A4%9D%F0%9F%92%AA')
self.assertEqual(
iri_to_uri('http://日本語.jp/'),
'http://xn--wgv71a119e.jp/')
self.assertEqual(
iri_to_uri('http://导航.中国/'),
'http://xn--fet810g.xn--fiqs8s/')
if __name__ == '__main__':

View File

@ -49,6 +49,7 @@ from .utils import (
date_from_str,
DateRange,
DEFAULT_OUTTMPL,
DESKTOP_LINK_TEMPLATE,
determine_ext,
determine_protocol,
DownloadError,
@ -61,6 +62,7 @@ from .utils import (
formatSeconds,
GeoRestrictedError,
int_or_none,
iri_to_uri,
ISO3166Utils,
locked_file,
make_HTTPS_handler,
@ -83,9 +85,12 @@ from .utils import (
sanitized_Request,
std_headers,
subtitles_filename,
to_high_limit_path,
UnavailableVideoError,
url_basename,
URL_LINK_TEMPLATE,
version_tuple,
WEBLOC_LINK_TEMPLATE,
write_json_file,
write_string,
YoutubeDLCookieProcessor,
@ -178,6 +183,11 @@ class YoutubeDL(object):
writeannotations: Write the video annotations to a .annotations.xml file
writethumbnail: Write the thumbnail image to a file
write_all_thumbnails: Write all thumbnail formats to files
writelink: Write an internet shortcut file, depending on the
current platform (.url/.webloc/.desktop)
writeurllink: Write a Windows internet shortcut file (.url)
writewebloclink: Write a macOS internet shortcut file (.webloc)
writedesktoplink: Write a Linux internet shortcut file (.desktop)
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatically generated subtitles to a file
allsubtitles: Downloads all the subtitles of the video
@ -204,7 +214,9 @@ class YoutubeDL(object):
downloaded. None for no limit.
download_archive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded
again.
again. When 'writelink' (or similar) and
'skip_download' are also present, the videos will be
recorded, too.
cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
@ -1407,6 +1419,8 @@ class YoutubeDL(object):
raise ExtractorError('Missing "id" field in extractor result')
if 'title' not in info_dict:
raise ExtractorError('Missing "title" field in extractor result')
if 'webpage_url' not in info_dict:
raise ExtractorError('Missing "webpage_url" field in extractor result. Should have been augmented with it.')
def report_force_conversion(field, field_not, conversion):
self.report_warning(
@ -1836,7 +1850,56 @@ class YoutubeDL(object):
self._write_thumbnails(info_dict, filename)
if not self.params.get('skip_download', False):
# Write internet shortcut files
url_link = webloc_link = desktop_link = False
if self.params.get('writelink', False):
if sys.platform == "darwin": # macOS.
webloc_link = True
elif sys.platform.startswith("linux"):
desktop_link = True
else: # if sys.platform in ['win32', 'cygwin']:
url_link = True
if self.params.get('writeurllink', False):
url_link = True
if self.params.get('writewebloclink', False):
webloc_link = True
if self.params.get('writedesktoplink', False):
desktop_link = True
if url_link or webloc_link or desktop_link:
ascii_url = iri_to_uri(info_dict['webpage_url'])
def _write_link_file(extension, template, newline, embed_filename):
linkfn = replace_extension(filename, extension, info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
self.to_screen('[info] Internet shortcut is already present')
else:
try:
self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
template_vars = { 'url': ascii_url }
if embed_filename:
template_vars['filename'] = linkfn[:-len(extension) - 1]
linkfile.write(template % template_vars)
except (OSError, IOError):
self.report_error('Cannot write internet shortcut ' + linkfn)
return False
return True
if url_link:
if not _write_link_file('url', URL_LINK_TEMPLATE, '\r\n', embed_filename=False): return
if webloc_link:
if not _write_link_file('webloc', WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False): return
if desktop_link:
if not _write_link_file('desktop', DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True ): return
if self.params.get('skip_download', False):
# Regarding the download archive, consider internet shortcut creation in conjunction with the `--skip-download` switch as everything the user wants. (See also help for the`--download-archive` switch.)
if url_link or webloc_link or desktop_link:
self.record_download_archive(info_dict)
# Download
else: # No `--skip-download`
try:
def dl(name, info):
fd = get_suitable_downloader(info, self.params)(self, self.params)

View File

@ -369,6 +369,10 @@ def _real_main(argv=None):
'writeinfojson': opts.writeinfojson,
'writethumbnail': opts.writethumbnail,
'write_all_thumbnails': opts.write_all_thumbnails,
'writelink': opts.writelink,
'writeurllink': opts.writeurllink,
'writewebloclink': opts.writewebloclink,
'writedesktoplink': opts.writedesktoplink,
'writesubtitles': opts.writesubtitles,
'writeautomaticsub': opts.writeautomaticsub,
'allsubtitles': opts.allsubtitles,

View File

@ -37,15 +37,20 @@ try:
except ImportError: # Python 2
import urllib as compat_urllib_parse
try:
import urllib.parse as compat_urlparse
except ImportError: # Python 2
import urlparse as compat_urlparse
try:
from urllib.parse import urlparse as compat_urllib_parse_urlparse
except ImportError: # Python 2
from urlparse import urlparse as compat_urllib_parse_urlparse
try:
import urllib.parse as compat_urlparse
from urllib.parse import urlunparse as compat_urllib_parse_urlunparse
except ImportError: # Python 2
import urlparse as compat_urlparse
from urlparse import urlunparse as compat_urllib_parse_urlunparse
try:
import urllib.response as compat_urllib_response
@ -2354,6 +2359,20 @@ try:
except NameError:
compat_str = str
try:
from urllib.parse import quote as compat_urllib_parse_quote
from urllib.parse import quote_plus as compat_urllib_parse_quote_plus
except ImportError: # Python 2
def compat_urllib_parse_quote(string, safe='/'):
return compat_urllib_parse.quote(
string.encode('utf-8'),
str(safe))
def compat_urllib_parse_quote_plus(string, safe=''):
return compat_urllib_parse.quote_plus(
string.encode('utf-8'),
str(safe))
try:
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
from urllib.parse import unquote as compat_urllib_parse_unquote
@ -2992,11 +3011,14 @@ __all__ = [
'compat_tokenize_tokenize',
'compat_urllib_error',
'compat_urllib_parse',
'compat_urllib_parse_quote',
'compat_urllib_parse_quote_plus',
'compat_urllib_parse_unquote',
'compat_urllib_parse_unquote_plus',
'compat_urllib_parse_unquote_to_bytes',
'compat_urllib_parse_urlencode',
'compat_urllib_parse_urlparse',
'compat_urllib_parse_urlunparse',
'compat_urllib_request',
'compat_urllib_request_DataHandler',
'compat_urllib_response',

View File

@ -343,7 +343,7 @@ def parseOpts(overrideArguments=None):
selection.add_option(
'--download-archive', metavar='FILE',
dest='download_archive',
help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it. When the switches --write-link (or similar) and --skip-download are used additionally, the IDs will also be recorded, even though nothing was actually downloaded.')
selection.add_option(
'--include-ads',
dest='include_ads', action='store_true',
@ -764,7 +764,7 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='rm_cachedir',
help='Delete all filesystem cache files')
thumbnail = optparse.OptionGroup(parser, 'Thumbnail images')
thumbnail = optparse.OptionGroup(parser, 'Thumbnail Images')
thumbnail.add_option(
'--write-thumbnail',
action='store_true', dest='writethumbnail', default=False,
@ -778,7 +778,25 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='list_thumbnails', default=False,
help='Simulate and list all available thumbnail formats')
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
link = optparse.OptionGroup(parser, 'Internet Shortcut Options')
link.add_option(
'--write-link',
action='store_true', dest='writelink', default=False,
help='Write an internet shortcut file, depending on the current platform (.url/.webloc/.desktop). The URL may be cached by the OS.')
link.add_option(
'--write-url-link',
action='store_true', dest='writeurllink', default=False,
help='Write a Windows internet shortcut file (.url). Note that the OS caches the URL based on the file path.')
link.add_option(
'--write-webloc-link',
action='store_true', dest='writewebloclink', default=False,
help='Write a macOS internet shortcut file (.webloc)')
link.add_option(
'--write-desktop-link',
action='store_true', dest='writedesktoplink', default=False,
help='Write a Linux internet shortcut file (.desktop)')
postproc = optparse.OptionGroup(parser, 'Post-Processing Options')
postproc.add_option(
'-x', '--extract-audio',
action='store_true', dest='extractaudio', default=False,
@ -866,6 +884,7 @@ def parseOpts(overrideArguments=None):
parser.add_option_group(downloader)
parser.add_option_group(filesystem)
parser.add_option_group(thumbnail)
parser.add_option_group(link)
parser.add_option_group(verbosity)
parser.add_option_group(workarounds)
parser.add_option_group(video_format)

View File

@ -57,6 +57,9 @@ from .compat import (
compat_urllib_parse,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
compat_urllib_parse_urlunparse,
compat_urllib_parse_quote,
compat_urllib_parse_quote_plus,
compat_urllib_parse_unquote_plus,
compat_urllib_request,
compat_urlparse,
@ -3902,3 +3905,82 @@ def random_birthday(year_field, month_field, day_field):
month_field: str(random.randint(1, 12)),
day_field: str(random.randint(1, 31)),
}
# Templates for internet shortcut files, which are plain text files.
URL_LINK_TEMPLATE = '''
[InternetShortcut]
URL=%(url)s
'''.lstrip()
WEBLOC_LINK_TEMPLATE = '''
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
\t<key>URL</key>
\t<string>%(url)s</string>
</dict>
</plist>
'''.lstrip()
DESKTOP_LINK_TEMPLATE = '''
[Desktop Entry]
Encoding=UTF-8
Name=Link to %(filename)s
Type=Link
URL=%(url)s
Icon=text-html
'''.lstrip()
def iri_to_uri(iri):
"""
Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
"""
iri_parts = compat_urllib_parse_urlparse(iri)
if '[' in iri_parts.netloc:
raise ValueError('IPv6 URIs are not, yet, supported.')
# Querying `.netloc`, when there's only one bracket, also raises a ValueError.
# The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
net_location = ''
if iri_parts.username:
net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
if iri_parts.password is not None:
net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
net_location += '@'
net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
# The 'idna' encoding produces ASCII text.
if iri_parts.port is not None and iri_parts.port != 80:
net_location += ':' + str(iri_parts.port)
return compat_urllib_parse_urlunparse( (
iri_parts.scheme,
net_location,
compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
# Unsure about the `safe` argument , since this is a legacy way of handling parameters.
compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
# Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")) )
# Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
def to_high_limit_path(path):
if sys.platform in ['win32', 'cygwin']:
# Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
return r'\\?\ '.rstrip() + os.path.abspath(path)
return path