From 8d58dafb2e0b118ba73aa00b199b124cfe39fbdc Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 8 Aug 2015 18:05:51 +0100 Subject: [PATCH] add option to make a lite version without porn site extractors --- Makefile | 3 +++ devscripts/remove-porn.py | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 devscripts/remove-porn.py diff --git a/Makefile b/Makefile index fdb1abb60..a064717c8 100644 --- a/Makefile +++ b/Makefile @@ -92,6 +92,9 @@ youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in fish-completion: youtube-dl.fish +remove-porn: + python devscripts/remove-porn.py + youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \ --exclude '*.DS_Store' \ diff --git a/devscripts/remove-porn.py b/devscripts/remove-porn.py new file mode 100644 index 000000000..3695c95ed --- /dev/null +++ b/devscripts/remove-porn.py @@ -0,0 +1,39 @@ +from youtube_dl.extractor import * +import re +import os + +def list_non_suitable_extractors(age_limit): + """ + Return a list of extractors that are suitable for the given age, + sorted by extractor ID. + """ + + return sorted( + filter(lambda ie: not ie.is_suitable(age_limit), gen_extractors()), + key=lambda ie: ie.IE_NAME.lower()) + +non_suitable_extractors = list_non_suitable_extractors(0) + +extractors_dir = 'youtube_dl/extractor/' + +with open(extractors_dir + '__init__.py') as f: + content = f.read() + for extractor in non_suitable_extractors: + content = re.sub(r'from \.' + extractor.__module__.split('.')[-1] + r' import (?:[A-Za-z0-9,\s]+|\([^\)]+\))\n', '', content) + +with open(extractors_dir + '__init__.py', 'w') as f: + f.write(content) + +with open(extractors_dir + 'generic.py') as f: + content = f.read() + for extractor in non_suitable_extractors: + content = re.sub(r'from \.' + extractor.__module__.split('.')[-1] + r' import (?:[A-Za-z0-9,\s]+|\([^\)]+\))\n', '', content) + content = re.sub(r'#[^#]+' + extractor.__module__.split('.')[-1] + r'[^#]+', '', content) + +with open(extractors_dir + 'generic.py', 'w') as f: + f.write(content) + +for extractor in non_suitable_extractors: + extractor_filename = extractors_dir + extractor.__module__.split('.')[-1] + '.py' + if os.path.isfile(extractor_filename): + os.remove(extractor_filename)