diff --git a/README.md b/README.md index 4debe15fe..c7a9f7995 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,14 @@ which means you can modify it, redistribute it or use it however you like. extractors --force-generic-extractor Force extraction to use the generic extractor + --enable-extractors EXTRACTORS Enable only the chosen extractors. Comma- + separated list of patterns, wildcards + allowed. Example: + "twitch:*,youtube:*,vimeo" + --disable-extractors EXTRACTORS Disable the chosen extractors. Comma- + separated list of patterns, wildcards + allowed. Example: + "twitch:*,youtube:*,vimeo" --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large diff --git a/test/test_execution.py b/test/test_execution.py index 620db080e..df13c8cc3 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -11,6 +11,7 @@ import subprocess sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.utils import encodeArgument +from youtube_dl.extractor import gen_extractors, get_info_extractor rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -39,5 +40,70 @@ class TestExecution(unittest.TestCase): _, stderr = p.communicate() self.assertFalse(stderr) +ALL_EXTRACTORS = [ie.IE_NAME for ie in gen_extractors() if ie._WORKING] +EXTRACTOR_CASES = { + 'unrestricted': { + 'result': ALL_EXTRACTORS + }, + 'enable_all': { + 'enable': '*', + 'result': ALL_EXTRACTORS + }, + 'disable_all': { + 'disable': '*', + 'result': [] + }, + 'enable_disable_all': { + 'enable': '*', + 'disable': '*', + 'result': [] + }, + 'enable_some': { + 'enable': 'youtube,youporn', + 'result': ['youtube', 'YouPorn'] + }, + 'enable_and_filter': { + 'enable': 'twitch:*', + 'disable': 'twitch:stream', + 'result': [ie for ie in ALL_EXTRACTORS if ie.startswith('twitch:') and ie != 'twitch:stream'] + }, + 'enable_age_restricted': { + 'enable': 'youporn', + 'age_limit': 16, + 'result': [] + } +} + +def gen_extractor_case(case): + enable = case.get('enable') + disable = case.get('disable') + age_limit = case.get('age_limit') + result = case['result'] + + def template(self): + args = [sys.executable, 'youtube_dl/__main__.py', '--list-extractors'] + if enable: + args.extend(['--enable-extractors', enable]) + if disable: + args.extend(['--disable-extractors', disable]) + if age_limit: + args.extend(['--age-limit', str(age_limit)]) + + out = subprocess.check_output(args, cwd=rootDir, stderr=_DEV_NULL).decode('utf-8') + extractors = filter(lambda e: e and 'BROKEN' not in e, out.split('\n')) + self.assertItemsEqual(extractors, result) + + return template + +class TestExtractorSelection(unittest.TestCase): + pass + +for name, case in EXTRACTOR_CASES.items(): + test_method = gen_extractor_case(case) + test_name = str('test_' + name) + test_method.__name__ = test_name + setattr(TestExtractorSelection, test_name, test_method) + del test_method + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1cf3140a0..3696751d3 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -10,7 +10,8 @@ import io import os import random import sys - +import fnmatch +from collections import OrderedDict from .options import ( parseOpts, @@ -40,7 +41,7 @@ from .update import update_self from .downloader import ( FileDownloader, ) -from .extractor import gen_extractors, list_extractors +from .extractor import gen_extractors, gen_extractor_classes from .extractor.adobepass import MSO_INFO from .YoutubeDL import YoutubeDL @@ -100,15 +101,67 @@ def _real_main(argv=None): _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] + def get_usable_extractors(enable_patterns, disable_patterns, age_limit): + # Unfortunately it's necessary to create instances of all extractors + # instead of just looking at the classes, because some of them don't + # override the ie_key() classmethod to the correct value. + + all_extractors = OrderedDict((ie.IE_NAME.lower(), ie) for ie in gen_extractors()) + extractors = OrderedDict() if enable_patterns else all_extractors + + if enable_patterns: + all_names = list(all_extractors.keys()) + for pattern in enable_patterns: + accepted_names = fnmatch.filter(all_names, pattern) + for name in accepted_names: + if name not in extractors: + if opts.verbose: + write_string('[debug] Enabling extractor %s\n' % name) + + extractors[name] = all_extractors[name] + + if disable_patterns: + for pattern in disable_patterns: + rejected_names = fnmatch.filter(extractors.keys(), pattern) + for name in rejected_names: + if opts.verbose: + write_string('[debug] Disabling extractor %s\n' % name) + + del extractors[name] + + if age_limit: + for name, extractor in extractors.items(): + if not extractor.is_suitable(age_limit): + if opts.verbose: + write_string('[debug] Extractor %s selected by filter, but ignored due to age limit\n' % name) + + del extractors[name] + + return extractors.values() + + def patterns_from_args(args): + if not args: + return + + for arg in args: + for pattern in arg.split(','): + yield pattern.lower() + + enable_extractors = list(patterns_from_args(opts.enable_extractors)) + disable_extractors = list(patterns_from_args(opts.disable_extractors)) + extractors = get_usable_extractors(enable_extractors, disable_extractors, opts.age_limit) + if opts.list_extractors: - for ie in list_extractors(opts.age_limit): + extractors.sort(key=lambda ie: ie.IE_NAME.lower()) + for ie in extractors: write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout) matchedUrls = [url for url in all_urls if ie.suitable(url)] for mu in matchedUrls: write_string(' ' + mu + '\n', out=sys.stdout) sys.exit(0) if opts.list_extractor_descriptions: - for ie in list_extractors(opts.age_limit): + extractors.sort(key=lambda ie: ie.IE_NAME.lower()) + for ie in extractors: if not ie._WORKING: continue desc = getattr(ie, 'IE_DESC', ie.IE_NAME) @@ -413,7 +466,10 @@ def _real_main(argv=None): } - with YoutubeDL(ydl_opts) as ydl: + if not extractors: + parser.error('No usable extractors selected') + + with YoutubeDL(ydl_opts, auto_init=False) as ydl: # Update version if opts.update_self: update_self(ydl.to_screen, opts.verbose, ydl._opener) @@ -422,6 +478,9 @@ def _real_main(argv=None): if opts.rm_cachedir: ydl.cache.remove() + for extractor in extractors: + ydl.add_info_extractor(extractor) + # Maybe do nothing if (len(all_urls) < 1) and (opts.load_info_filename is None): if opts.update_self or opts.rm_cachedir: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 53497fbc6..6ca7db66d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -167,6 +167,14 @@ def parseOpts(overrideArguments=None): '--force-generic-extractor', action='store_true', dest='force_generic_extractor', default=False, help='Force extraction to use the generic extractor') + general.add_option( + '--enable-extractors', metavar='EXTRACTORS', + action='append', dest='enable_extractors', + help='Enable only the chosen extractors. Comma-separated list of patterns, wildcards allowed. Example: "twitch:*,youtube:*,vimeo"') + general.add_option( + '--disable-extractors', metavar='EXTRACTORS', + action='append', dest='disable_extractors', + help='Disable the chosen extractors. Comma-separated list of patterns, wildcards allowed. Example: "twitch:*,youtube:*,vimeo"') general.add_option( '--default-search', dest='default_search', metavar='PREFIX',