Add CLI options to filter usable extractors
Add `--enable-extractors` and `--disable-extractors` options, which make it possible to restrict the set of extractors to be considered when downloading. This is useful to handle URLs that match multiple extractors (although this should be rare), or only using particular modes of some extractors (for example, only live videos for Twitch, enabling only `twitch:stream`). Both options can be specified multiple times, and each argument is interpreted as a comma-separated list of fnmatch patterns, to allow the use of wildcards. Comparisons to extractor names are case-insensitive. The order of the arguments is not relevant - matching always proceeds as follows: - Initialize the set of considered extractors to all available - If --enable-extractors is specified, remove all extractors that *don't* match those patterns from consideration - If --disable-extractors is specified, remove all extractors that *do* match those patterns from consideration - If --age-limit is specified, remove all extractors that are not suitable from consideration Therefore, disables and the age limit take precedence over enables.
This commit is contained in:
parent
190d2027d0
commit
bb6f776271
@ -69,6 +69,14 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
extractors
|
extractors
|
||||||
--force-generic-extractor Force extraction to use the generic
|
--force-generic-extractor Force extraction to use the generic
|
||||||
extractor
|
extractor
|
||||||
|
--enable-extractors EXTRACTORS Enable only the chosen extractors. Comma-
|
||||||
|
separated list of patterns, wildcards
|
||||||
|
allowed. Example:
|
||||||
|
"twitch:*,youtube:*,vimeo"
|
||||||
|
--disable-extractors EXTRACTORS Disable the chosen extractors. Comma-
|
||||||
|
separated list of patterns, wildcards
|
||||||
|
allowed. Example:
|
||||||
|
"twitch:*,youtube:*,vimeo"
|
||||||
--default-search PREFIX Use this prefix for unqualified URLs. For
|
--default-search PREFIX Use this prefix for unqualified URLs. For
|
||||||
example "gvsearch2:" downloads two videos
|
example "gvsearch2:" downloads two videos
|
||||||
from google videos for youtube-dl "large
|
from google videos for youtube-dl "large
|
||||||
|
@ -11,6 +11,7 @@ import subprocess
|
|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from youtube_dl.utils import encodeArgument
|
from youtube_dl.utils import encodeArgument
|
||||||
|
from youtube_dl.extractor import gen_extractors, get_info_extractor
|
||||||
|
|
||||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
@ -39,5 +40,70 @@ class TestExecution(unittest.TestCase):
|
|||||||
_, stderr = p.communicate()
|
_, stderr = p.communicate()
|
||||||
self.assertFalse(stderr)
|
self.assertFalse(stderr)
|
||||||
|
|
||||||
|
ALL_EXTRACTORS = [ie.IE_NAME for ie in gen_extractors() if ie._WORKING]
|
||||||
|
EXTRACTOR_CASES = {
|
||||||
|
'unrestricted': {
|
||||||
|
'result': ALL_EXTRACTORS
|
||||||
|
},
|
||||||
|
'enable_all': {
|
||||||
|
'enable': '*',
|
||||||
|
'result': ALL_EXTRACTORS
|
||||||
|
},
|
||||||
|
'disable_all': {
|
||||||
|
'disable': '*',
|
||||||
|
'result': []
|
||||||
|
},
|
||||||
|
'enable_disable_all': {
|
||||||
|
'enable': '*',
|
||||||
|
'disable': '*',
|
||||||
|
'result': []
|
||||||
|
},
|
||||||
|
'enable_some': {
|
||||||
|
'enable': 'youtube,youporn',
|
||||||
|
'result': ['youtube', 'YouPorn']
|
||||||
|
},
|
||||||
|
'enable_and_filter': {
|
||||||
|
'enable': 'twitch:*',
|
||||||
|
'disable': 'twitch:stream',
|
||||||
|
'result': [ie for ie in ALL_EXTRACTORS if ie.startswith('twitch:') and ie != 'twitch:stream']
|
||||||
|
},
|
||||||
|
'enable_age_restricted': {
|
||||||
|
'enable': 'youporn',
|
||||||
|
'age_limit': 16,
|
||||||
|
'result': []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def gen_extractor_case(case):
|
||||||
|
enable = case.get('enable')
|
||||||
|
disable = case.get('disable')
|
||||||
|
age_limit = case.get('age_limit')
|
||||||
|
result = case['result']
|
||||||
|
|
||||||
|
def template(self):
|
||||||
|
args = [sys.executable, 'youtube_dl/__main__.py', '--list-extractors']
|
||||||
|
if enable:
|
||||||
|
args.extend(['--enable-extractors', enable])
|
||||||
|
if disable:
|
||||||
|
args.extend(['--disable-extractors', disable])
|
||||||
|
if age_limit:
|
||||||
|
args.extend(['--age-limit', str(age_limit)])
|
||||||
|
|
||||||
|
out = subprocess.check_output(args, cwd=rootDir, stderr=_DEV_NULL).decode('utf-8')
|
||||||
|
extractors = filter(lambda e: e and 'BROKEN' not in e, out.split('\n'))
|
||||||
|
self.assertItemsEqual(extractors, result)
|
||||||
|
|
||||||
|
return template
|
||||||
|
|
||||||
|
class TestExtractorSelection(unittest.TestCase):
|
||||||
|
pass
|
||||||
|
|
||||||
|
for name, case in EXTRACTOR_CASES.items():
|
||||||
|
test_method = gen_extractor_case(case)
|
||||||
|
test_name = str('test_' + name)
|
||||||
|
test_method.__name__ = test_name
|
||||||
|
setattr(TestExtractorSelection, test_name, test_method)
|
||||||
|
del test_method
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -10,7 +10,8 @@ import io
|
|||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import sys
|
import sys
|
||||||
|
import fnmatch
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
from .options import (
|
from .options import (
|
||||||
parseOpts,
|
parseOpts,
|
||||||
@ -40,7 +41,7 @@ from .update import update_self
|
|||||||
from .downloader import (
|
from .downloader import (
|
||||||
FileDownloader,
|
FileDownloader,
|
||||||
)
|
)
|
||||||
from .extractor import gen_extractors, list_extractors
|
from .extractor import gen_extractors, gen_extractor_classes
|
||||||
from .extractor.adobepass import MSO_INFO
|
from .extractor.adobepass import MSO_INFO
|
||||||
from .YoutubeDL import YoutubeDL
|
from .YoutubeDL import YoutubeDL
|
||||||
|
|
||||||
@ -100,15 +101,67 @@ def _real_main(argv=None):
|
|||||||
_enc = preferredencoding()
|
_enc = preferredencoding()
|
||||||
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
||||||
|
|
||||||
|
def get_usable_extractors(enable_patterns, disable_patterns, age_limit):
|
||||||
|
# Unfortunately it's necessary to create instances of all extractors
|
||||||
|
# instead of just looking at the classes, because some of them don't
|
||||||
|
# override the ie_key() classmethod to the correct value.
|
||||||
|
|
||||||
|
all_extractors = OrderedDict((ie.IE_NAME.lower(), ie) for ie in gen_extractors())
|
||||||
|
extractors = OrderedDict() if enable_patterns else all_extractors
|
||||||
|
|
||||||
|
if enable_patterns:
|
||||||
|
all_names = list(all_extractors.keys())
|
||||||
|
for pattern in enable_patterns:
|
||||||
|
accepted_names = fnmatch.filter(all_names, pattern)
|
||||||
|
for name in accepted_names:
|
||||||
|
if name not in extractors:
|
||||||
|
if opts.verbose:
|
||||||
|
write_string('[debug] Enabling extractor %s\n' % name)
|
||||||
|
|
||||||
|
extractors[name] = all_extractors[name]
|
||||||
|
|
||||||
|
if disable_patterns:
|
||||||
|
for pattern in disable_patterns:
|
||||||
|
rejected_names = fnmatch.filter(extractors.keys(), pattern)
|
||||||
|
for name in rejected_names:
|
||||||
|
if opts.verbose:
|
||||||
|
write_string('[debug] Disabling extractor %s\n' % name)
|
||||||
|
|
||||||
|
del extractors[name]
|
||||||
|
|
||||||
|
if age_limit:
|
||||||
|
for name, extractor in extractors.items():
|
||||||
|
if not extractor.is_suitable(age_limit):
|
||||||
|
if opts.verbose:
|
||||||
|
write_string('[debug] Extractor %s selected by filter, but ignored due to age limit\n' % name)
|
||||||
|
|
||||||
|
del extractors[name]
|
||||||
|
|
||||||
|
return extractors.values()
|
||||||
|
|
||||||
|
def patterns_from_args(args):
|
||||||
|
if not args:
|
||||||
|
return
|
||||||
|
|
||||||
|
for arg in args:
|
||||||
|
for pattern in arg.split(','):
|
||||||
|
yield pattern.lower()
|
||||||
|
|
||||||
|
enable_extractors = list(patterns_from_args(opts.enable_extractors))
|
||||||
|
disable_extractors = list(patterns_from_args(opts.disable_extractors))
|
||||||
|
extractors = get_usable_extractors(enable_extractors, disable_extractors, opts.age_limit)
|
||||||
|
|
||||||
if opts.list_extractors:
|
if opts.list_extractors:
|
||||||
for ie in list_extractors(opts.age_limit):
|
extractors.sort(key=lambda ie: ie.IE_NAME.lower())
|
||||||
|
for ie in extractors:
|
||||||
write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
|
write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
|
||||||
matchedUrls = [url for url in all_urls if ie.suitable(url)]
|
matchedUrls = [url for url in all_urls if ie.suitable(url)]
|
||||||
for mu in matchedUrls:
|
for mu in matchedUrls:
|
||||||
write_string(' ' + mu + '\n', out=sys.stdout)
|
write_string(' ' + mu + '\n', out=sys.stdout)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
if opts.list_extractor_descriptions:
|
if opts.list_extractor_descriptions:
|
||||||
for ie in list_extractors(opts.age_limit):
|
extractors.sort(key=lambda ie: ie.IE_NAME.lower())
|
||||||
|
for ie in extractors:
|
||||||
if not ie._WORKING:
|
if not ie._WORKING:
|
||||||
continue
|
continue
|
||||||
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
|
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
|
||||||
@ -413,7 +466,10 @@ def _real_main(argv=None):
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
if not extractors:
|
||||||
|
parser.error('No usable extractors selected')
|
||||||
|
|
||||||
|
with YoutubeDL(ydl_opts, auto_init=False) as ydl:
|
||||||
# Update version
|
# Update version
|
||||||
if opts.update_self:
|
if opts.update_self:
|
||||||
update_self(ydl.to_screen, opts.verbose, ydl._opener)
|
update_self(ydl.to_screen, opts.verbose, ydl._opener)
|
||||||
@ -422,6 +478,9 @@ def _real_main(argv=None):
|
|||||||
if opts.rm_cachedir:
|
if opts.rm_cachedir:
|
||||||
ydl.cache.remove()
|
ydl.cache.remove()
|
||||||
|
|
||||||
|
for extractor in extractors:
|
||||||
|
ydl.add_info_extractor(extractor)
|
||||||
|
|
||||||
# Maybe do nothing
|
# Maybe do nothing
|
||||||
if (len(all_urls) < 1) and (opts.load_info_filename is None):
|
if (len(all_urls) < 1) and (opts.load_info_filename is None):
|
||||||
if opts.update_self or opts.rm_cachedir:
|
if opts.update_self or opts.rm_cachedir:
|
||||||
|
@ -167,6 +167,14 @@ def parseOpts(overrideArguments=None):
|
|||||||
'--force-generic-extractor',
|
'--force-generic-extractor',
|
||||||
action='store_true', dest='force_generic_extractor', default=False,
|
action='store_true', dest='force_generic_extractor', default=False,
|
||||||
help='Force extraction to use the generic extractor')
|
help='Force extraction to use the generic extractor')
|
||||||
|
general.add_option(
|
||||||
|
'--enable-extractors', metavar='EXTRACTORS',
|
||||||
|
action='append', dest='enable_extractors',
|
||||||
|
help='Enable only the chosen extractors. Comma-separated list of patterns, wildcards allowed. Example: "twitch:*,youtube:*,vimeo"')
|
||||||
|
general.add_option(
|
||||||
|
'--disable-extractors', metavar='EXTRACTORS',
|
||||||
|
action='append', dest='disable_extractors',
|
||||||
|
help='Disable the chosen extractors. Comma-separated list of patterns, wildcards allowed. Example: "twitch:*,youtube:*,vimeo"')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--default-search',
|
'--default-search',
|
||||||
dest='default_search', metavar='PREFIX',
|
dest='default_search', metavar='PREFIX',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user