From fe37a2535aa013b12d87c9ac3ea466c4ac350f61 Mon Sep 17 00:00:00 2001 From: Alex Vong Date: Sun, 13 Sep 2015 17:55:15 +0800 Subject: [PATCH] Fix bad formatting of man page (Closes #6510) * Makefile: It now pipe the output of devscripts/prepare_manpage.py to pod2man to produce youtube-dl.1 man page (previously was using pandoc). * devscripts/prepare_manpage.py: It now convert README.md to Perl Pod format. --- Makefile | 6 +- devscripts/prepare_manpage.py | 542 ++++++++++++++++++++++++++++++++-- 2 files changed, 522 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index fdb1abb60..0b331240f 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,8 @@ BINDIR ?= $(PREFIX)/bin MANDIR ?= $(PREFIX)/man SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python +VERSION ?= $(shell echo 'print(__version__)' | cat youtube_dl/version.py - | python) +DATE ?= $(shell echo '$(VERSION)' | sed -e 's/\./-/g') # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local ifeq ($(PREFIX),/usr) @@ -73,9 +75,7 @@ README.txt: README.md pandoc -f markdown -t plain README.md -o README.txt youtube-dl.1: README.md - python devscripts/prepare_manpage.py >youtube-dl.1.temp.md - pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1 - rm -f youtube-dl.1.temp.md + python devscripts/prepare_manpage.py | uniq | pod2man --center='User Commands' --date=$(DATE) --errors='die' --name='YOUTUBE-DL' --release=$(VERSION) --section=1 > youtube-dl.1 youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in python devscripts/bash-completion.py diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 7ece37754..8389ba773 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -2,31 +2,527 @@ from __future__ import unicode_literals import io import os.path -import sys import re +import scm -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -README_FILE = os.path.join(ROOT_DIR, 'README.md') +ROOT_DIR_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +README_PATH = os.path.join(ROOT_DIR_PATH, "README.md") -with io.open(README_FILE, encoding='utf-8') as f: - readme = f.read() +with io.open(README_PATH, encoding="utf-8") as FILE: + README_ORIG = FILE.read() -PREFIX = '''%YOUTUBE-DL(1) - -# NAME - -youtube\-dl \- download videos from youtube.com or other video platforms - -# SYNOPSIS - -**youtube-dl** \[OPTIONS\] URL [URL...] - -''' -readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) -readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) -readme = PREFIX + readme - -if sys.version_info < (3, 0): - print(readme.encode('utf-8')) +if README_ORIG: + README = "=pod\n\n=encoding utf8\n\n# NAME\n\n" + README_ORIG + "\n\n=cut\n" else: - print(readme) + README = "" + +def identity(x): return x + +def _compose(f, g): return lambda x: f(g(x)) + +def compose(*tuple_of_func): + lst_of_func = scm.tuple_to_lst(tuple_of_func) + return scm.fold(_compose, identity, scm.reverse(lst_of_func)) + +def flatten(lst): + """flatten an arbitrarily deep nested lst into a single lst""" + if lst is scm.nil: + return scm.nil + elif not scm.is_list(lst): + return scm.list(lst) + else: + return scm.append(flatten(scm.car(lst)), + flatten(scm.cdr(lst))) + +def list_subtract(lst1, lst2): + return scm.lset_difference(lambda x, y: x == y, lst1, lst2) + +def non_nil_take_while(pred, lst): + return scm.take_while(pred, + scm.drop_while(lambda x: not pred(x), + lst)) + +def string_join(lst_of_string, delimiter): + list_of_string = scm.lst_to_list(lst_of_string) + return delimiter.join(list_of_string) + +def is_not_empty_string(s_exp): + return not scm.is_string(s_exp) or bool(s_exp) + +def regex_split(pattern, string, flags=0): + """split string into lst using pattern""" + return scm.list_to_lst(re.split(pattern, string, 0, flags)) + +def remove_table_of_content(string): + pattern = r"^-[ \t]+\[[- \tA-Z]+\]\(#[-a-z]+\)[ \t]*$" + return re.sub(pattern, r"", string, 0, re.MULTILINE) + +def make_lexer(split_pattern, sub_pattern , exp_type, flags=0): + """ + Lexer is a procedure which does the following: + + 1. Split string into lst using split_pattern + 2. Transform matching string in lst using sub_pattern + and attach exp_type to it, forming an expression + + The output is a lst of tokens consisting of string and expression + + """ + def attach_exp_type_to_matching_substring(string): + if re.search(split_pattern, string, flags): + return scm.list(exp_type, + re.sub(sub_pattern, r"\1", string, 1, flags)) + else: + return string + return lambda string: scm.map(attach_exp_type_to_matching_substring, + regex_split(split_pattern, string, flags)) + +def make_parser(exp_type, post_proc): + """ + Parser is a procedure which takes the output of a lexer as input + and does the following: + + 1. Group exp_type expression and the string after it + 2. Apply post_proc to the string after exp_type expression + + The output is a tree-like structure + + """ + def is_exp(lst): return scm.is_list(scm.car(lst)) + def extract_exp(lst): return scm.car(lst) + def extract_string_after_exp(lst): return scm.cadr(lst) + def extract_rest(lst): return scm.cddr(lst) + def parse_loop(lst, accum): + if lst is scm.nil: + return accum + elif is_exp(lst): + return parse_loop(extract_rest(lst), + scm.cons(scm.list(extract_exp(lst), + post_proc(extract_string_after_exp(lst))), + accum)) + else: + return parse_loop(scm.cdr(lst), + scm.cons(post_proc(scm.car(lst)), + accum)) + return lambda lst: scm.reverse(parse_loop(lst, scm.nil)) + +def make_front_end(post_proc, split_pattern, sub_pattern, exp_type, flags=0): + """compose parser and lexer to create a front end""" + return compose(make_parser(exp_type, post_proc), + make_lexer(split_pattern, sub_pattern, exp_type, flags)) + +def connect_front_end(*tuple_of_lst): + """ + Connect front ends together + by making use of post_proc parameter of make_front_end + + """ + lst_of_lst = scm.tuple_to_lst(tuple_of_lst) + def extract_func(lst_of_lst): return scm.caar(lst_of_lst) + def extract_arg_lst(lst_of_lst): return scm.cdar(lst_of_lst) + def connect_loop(lst_of_lst, accum): + if lst_of_lst is scm.nil: + return accum + else: + return connect_loop(scm.cdr(lst_of_lst), + scm.apply(extract_func(lst_of_lst), + scm.cons(accum, + extract_arg_lst(lst_of_lst)))) + return connect_loop(lst_of_lst, identity) + +# These are patterns used to determined how to decomposite the text +# into sensible parts + +TITLE_SPLIT_PATTERN = r"^(#[ \t]+[- \tA-Z]+[ \t]*)$" +TITLE_SUB_PATTERN = r"^#[ \t]+([- \tA-Z]+)[ \t]*$" + +SUBTITLE_SPLIT_PATTERN = r"^(#{2,3}[ \t]+[^#\n]+#*[ \t]*)$" +SUBTITLE_SUB_PATTERN = r"^#{2,3}[ \t]+([^#\n]+)#*[ \t]*$" + +HYPHEN_SPLIT_PATTERN = r"^([ \t]*-[ \t]+.+)$" +HYPHEN_SUB_PATTERN = r"^[ \t]*-[ \t]+(.+)$" + +ASTERISK_SPLIT_PATTERN = r"^(\*[ \t]+[^\*\n]+)$" +ASTERISK_SUB_PATTERN = r"^\*[ \t]+([^\*\n]+)$" + +NUMBER_PATTERN = r"^(\d+\.[ \t]+.+)" + +VERBATIM_SPLIT_PATTERN = r"(```[^`]+```)" +VERBATIM_SUB_PATTERN = r"```([^`]+)```" + +# tree representing the structure of README + +AST = compose(connect_front_end(scm.list(make_front_end, + VERBATIM_SPLIT_PATTERN, + VERBATIM_SUB_PATTERN, + scm.string_to_symbol("VERBATIM")), + scm.list(make_front_end, + NUMBER_PATTERN, + NUMBER_PATTERN, + scm.string_to_symbol("NUMBER"), + re.MULTILINE), + scm.list(make_front_end, + ASTERISK_SPLIT_PATTERN, + ASTERISK_SUB_PATTERN, + scm.string_to_symbol("ASTERISK"), + re.MULTILINE), + scm.list(make_front_end, + HYPHEN_SPLIT_PATTERN, + HYPHEN_SUB_PATTERN, + scm.string_to_symbol("HYPHEN"), + re.MULTILINE), + scm.list(make_front_end, + SUBTITLE_SPLIT_PATTERN, + SUBTITLE_SUB_PATTERN, + scm.string_to_symbol("SUBTITLE"), + re.MULTILINE), + scm.list(make_front_end, + TITLE_SPLIT_PATTERN, + TITLE_SUB_PATTERN, + scm.string_to_symbol("TITLE"), + re.MULTILINE)), + remove_table_of_content) \ + (README) + +def fetch_symbol(ast, exp_type_lst): + """ + From ast, fetch symbol which is of type listed in exp_type_lst + + Note that the output is a nested lst needed to be flatten in order to be + lst of the form ( ...) + + """ + def is_not_null(s_exp): return s_exp is not scm.nil + def is_exp(s_exp): return scm.is_list(scm.car(s_exp)) + def is_exp_type(s_exp, exp_type): return scm.caar(s_exp) is exp_type + def extract_exp(s_exp): return scm.car(s_exp) + def extract_rest(s_exp): return scm.cdr(s_exp) + if not scm.is_list(ast): + return scm.nil + elif is_exp(ast) and \ + scm.any(lambda exp_type: is_exp_type(ast, exp_type), exp_type_lst): + return scm.list(extract_exp(ast), + fetch_symbol(extract_rest(ast), exp_type_lst)) + else: + return scm.append(scm.filter(is_not_null, + scm.map(lambda sub_tree: \ + fetch_symbol(sub_tree, + exp_type_lst), + ast))) + +def group_adj_element(lst): + """ + Take output of fetch_symbol as input + + Transform lst of the form + ( ...) + into lst of the form + (( ) ( ) ...) + + """ + def index_to_element(k): return scm.list_ref(lst, k) + lst_of_index_lst = scm.map(scm.list, + scm.filter(scm.is_even, + scm.iota(scm.length(lst))), + scm.filter(scm.is_odd, + scm.iota(scm.length(lst)))) + return scm.map(lambda index_lst: scm.map(index_to_element, index_lst), + lst_of_index_lst) + +EXP_TYPE_LST = scm.list(scm.string_to_symbol("TITLE"), + scm.string_to_symbol("SUBTITLE"), + scm.string_to_symbol("HYPHEN"), + scm.string_to_symbol("ASTERISK"), + scm.string_to_symbol("NUMBER"), + scm.string_to_symbol("VERBATIM")) + +# table recording the expression type of each expression symbol + +SYMBOL_TABLE = compose(group_adj_element, flatten) \ + (fetch_symbol(AST, EXP_TYPE_LST)) + +def is_list_of_string(lst): return scm.every(scm.is_string, lst) + +def is_contain_string_lst(s_exp): + if not scm.is_list(s_exp): + return False + elif is_list_of_string(s_exp): + return True + else: + return scm.any(is_contain_string_lst, s_exp) + +def join_string_lst(s_exp): + if not scm.is_list(s_exp): + return s_exp + elif is_list_of_string(s_exp): + return string_join(s_exp, "") + else: + return scm.map(join_string_lst, s_exp) + +def recursively_join_string_lst(s_exp): + if not is_contain_string_lst(s_exp): + return s_exp + else: + return recursively_join_string_lst(join_string_lst(s_exp)) + +def process_ast(proc, exp_type, ast): + """recursively apply proc with exp_type, exp_symbol and rest""" + def is_exp(s_exp): return scm.is_list(scm.car(s_exp)) + def is_exp_type(s_exp, exp_type): return scm.caar(s_exp) is exp_type + def s_exp_first(s_exp): return scm.car(flatten(s_exp)) + def extract_exp_symbol(s_exp): return scm.cadar(s_exp) + def extract_rest(s_exp): return s_exp_first(scm.cadr(ast)) + if not scm.is_list(ast): + return ast + elif is_exp(ast) and is_exp_type(ast, exp_type): + return proc(exp_type, extract_exp_symbol(ast), extract_rest(ast)) + else: + return scm.map(lambda sub_tree: process_ast(proc, exp_type, sub_tree), + ast) + +def make_back_end(proc, exp_type): + """recursively join processed tree-like structure back to string""" + return lambda ast: recursively_join_string_lst(process_ast(proc, + exp_type, + ast)) + +def verbatim_processor(exp_type, exp_symbol, rest): + """ + Create verbatim paragraph from expression with exp_type VERBATIM + + 1. remove formatter name if exists + 2. indent each sentance in the paragraph by 4 spaces + + """ + def remove_formatter_name(string): + pattern = r"^bash[ \t]*$|^python[ \t]*$" + return re.sub(pattern, r"", string, 0, re.MULTILINE) + def indent_by_4_spaces(string): + pattern = r"^(.+)" + return re.sub(pattern, r" \1", string, 0, re.MULTILINE) + return indent_by_4_spaces(remove_formatter_name(exp_symbol)) + rest + +def group_by_exp_type(exp_type, lst): + """group exp_type expression by removing non-exp_type expression""" + def is_exp_type(s_exp): return scm.car(s_exp) is exp_type + sublst = non_nil_take_while(is_exp_type, lst) + if sublst is scm.nil: + return scm.nil + else: + return scm.cons(sublst, + group_by_exp_type(exp_type, + list_subtract(lst, sublst))) + +def make_item_position_decider(func): + """ + Return a procedure which will decide if a given string of exp_type + is in the desired position specified by func + + func take a lst and return element of the desired position + + """ + def extract_exp_symbol(s_exp): return scm.cadr(s_exp) + def is_item_position(string, exp_type, symbol_table): + return scm.any(lambda exp_symbol: exp_symbol == string, + scm.map(compose(extract_exp_symbol, func), + group_by_exp_type(exp_type, symbol_table))) + return is_item_position + +def process_item(exp_type, prefix, rest, exp_symbol, symbol_table): + """process item based on the position of exp_symbol in the symbol_table""" + is_first_item = make_item_position_decider(scm.first) + is_last_item = make_item_position_decider(scm.last) + if is_first_item(exp_symbol, exp_type, symbol_table): + return "=over 7\n\n=item Z<>" + prefix + "\n\n" + rest + "\n" + elif is_last_item(exp_symbol, exp_type, symbol_table): + return "=item Z<>" + prefix + "\n\n" + rest + "\n\n=back" + else: + return "=item Z<>" + prefix + "\n\n" + rest + "\n" + +def make_item_processor(symbol_table): + """ + Return a procedure which does a case dispatch on exp_type of expression + and pass the extracted parts of expression to process_item + + """ + def make_number_item_lst(exp_symbol): + split_pattern = r"^(\d+\.[ \t]+)" + sub_pattern = r"^(\d+\.)[ \t]+" + return scm.filter(is_not_empty_string, + make_lexer(split_pattern, + sub_pattern, + scm.string_to_symbol("NUMBER"), + re.MULTILINE) \ + (exp_symbol)) + def is_exp_type(exp_type, exp_symbol): return exp_type is exp_symbol + def extract_prefix(number_item_lst): return scm.cadar(number_item_lst) + def extract_rest(number_item_lst): return scm.cadr(number_item_lst) + def process_different_items(exp_type, exp_symbol, rest): + if is_exp_type(exp_type, scm.string_to_symbol("HYPHEN")): + return process_item(exp_type, + "-", + exp_symbol, + exp_symbol, + symbol_table) + \ + rest + elif is_exp_type(exp_type, scm.string_to_symbol("ASTERISK")): + return process_item(exp_type, + "*", + exp_symbol, + exp_symbol, + symbol_table) + \ + rest + elif is_exp_type(exp_type, scm.string_to_symbol("NUMBER")): + return process_item(exp_type, + extract_prefix(make_number_item_lst(exp_symbol)), + extract_rest(make_number_item_lst(exp_symbol)), + exp_symbol, + symbol_table) + \ + rest + else: + raise TypeError("unknown exp_type of expression") + return process_different_items + +def installation_section_processor(exp_type, exp_symbol, rest): + return "" + +def append_title(string): + return "=head1 " + string + "\n\n" + +def subtitle_processor(exp_type, exp_symbol, rest): + return "=head2 " + exp_symbol + "\n\n" + rest + +def name_section_processor(exp_type, exp_symbol, rest): + """add a proper SYNOPSIS section after the NAME section""" + synopsis = "\n\n=head1 SYNOPSIS\n\nB<<< youtube-dl >>> [I<<< OPTIONS >>>] I<<< URL >>> [I<<< URL >>>...]\n\n" + return append_title(exp_symbol) + rest + synopsis + +def description_section_processor(exp_type, exp_symbol, rest): + """remove the improper synopsis in the DESCRIPTION section""" + def remove_synopsis_in_description(string): + pattern = r"^ +.+$" + return re.sub(pattern, r"", string, 0, re.MULTILINE) + return append_title(exp_symbol) + remove_synopsis_in_description(rest) + +def sentence_per_line_to_word_per_line(string): + def spaces_to_newline(string): + pattern = r" +" + return re.sub(pattern, r"\n", string, 0, re.MULTILINE) + def remove_leading_newlines(string): + pattern = r"^\n+" + return re.sub(pattern, r"", string, 0, re.MULTILINE) + def multiple_newlines_to_single_newline(string): + pattern = r"\n+" + return re.sub(pattern, r"\n", string, 0, re.MULTILINE) + return compose(multiple_newlines_to_single_newline, + remove_leading_newlines, + spaces_to_newline) \ + (string) + +def process_options(string): + """process options in the OPTIONS section""" + def short_long_opt_with_arg_processor(string): + pattern = r"^(-[^\s]+)[\s]*,[\s]*(--[^\s]+)[\s]+([^a-z\s]+)[\s]+([A-Z].+)$" + return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n,\nB<<< \2 >>>\nI<<< \3 >>>\n\n\4", string, 0, re.MULTILINE) + def short_long_opt_without_arg_processor(string): + pattern = r"^(-[^\s]+)[\s]*,[\s]*(--[^\s]+)[\s]+([A-Z].+)$" + return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n,\nB<<< \2 >>>\n\n\3", string, 0, re.MULTILINE) + def long_opt_with_arg_processor(string): + pattern = r"^(--[^\s]+)[\s]+([^a-z\s]+)[\s]+([A-Z].+)$" + return re.sub(pattern, r"\n=item\nB<<< \1 >>>\nI<<< \2 >>>\n\n\3", string, 0, re.MULTILINE) + def long_opt_without_arg_processor(string): + pattern = r"^(--[^\s]+)[\s]+([A-Z].+)$" + return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n\n\2", string, 0, re.MULTILINE) + return compose(long_opt_without_arg_processor, + long_opt_with_arg_processor, + short_long_opt_without_arg_processor, + short_long_opt_with_arg_processor, + sentence_per_line_to_word_per_line) \ + (string) + +def options_section_processor(exp_type, exp_symbol, rest): + """ + Process the OPTIONS section by creating a sub_tree using front_end and + use process_options to process scm.cdr(sub_tree) + + Finally, convert the sub_tree back into string using back_end + + """ + def options_subsections_processor(exp_type, exp_symbol, rest): + return "\n=back\n\n=head2 " + \ + exp_symbol + \ + "\n\n=over 7\n\n" + \ + process_options(rest) + subtitle_split_pattern = r"^(=head2 .+)$" + subtitle_sub_pattern = r"^=head2 (.+)$" + sub_tree = connect_front_end(scm.list(make_front_end, + subtitle_split_pattern, + subtitle_sub_pattern, + scm.string_to_symbol("SUBTITLE"), + re.MULTILINE)) \ + (rest) + return append_title(exp_symbol) + \ + "=over 7\n\n" + \ + make_back_end(options_subsections_processor, + scm.string_to_symbol("SUBTITLE")) \ + (scm.cons(process_options(scm.car(sub_tree)), + scm.cdr(sub_tree))) + \ + "\n=back\n\n" + +def title_processor(exp_type, exp_symbol, rest): + """do a case dispatch on exp_type and invoke the appropriate processor""" + if exp_symbol == "INSTALLATION": + return installation_section_processor(exp_type, exp_symbol, rest) + elif exp_symbol == "NAME": + return name_section_processor(exp_type, exp_symbol, rest) + elif exp_symbol == "DESCRIPTION": + return description_section_processor(exp_type, exp_symbol, rest) + elif exp_symbol == "OPTIONS": + return options_section_processor(exp_type, exp_symbol, rest) + else: + return append_title(exp_symbol) + rest + +def bold(string): + """enclose string marked as bold by B<<< >>>""" + pattern = r"\*\*([^\*\n]+)\*\*" + return re.sub(pattern, r"B<<< \1 >>>", string, 0, re.MULTILINE) + +def italic(string): + """enclose string marked as italic by I<<< >>>""" + def asterisk_to_italic(string): + pattern = r"\*([^\*\n]+)\*" + return re.sub(pattern, r"I<<< \1 >>>", string, 0, re.MULTILINE) + def back_quote_to_italic(string): + pattern = r"`{1,2}([^`\n]+)`{1,2}" + return re.sub(pattern, r"I<<< \1 >>>", string, 0, re.MULTILINE) + return compose(back_quote_to_italic, asterisk_to_italic) \ + (string) + +def remove_internal_links(string): + pattern = r"\[([^]|\n]+)\]\(#[^\)|\n]+\)" + return re.sub(pattern, r"\1", string, 0, re.MULTILINE) + +def external_links(string): + """convert external links of the form [foo](bar) into L<<< foo|bar >>>""" + pattern = r"\[([^]|\n]+)\]\(([^\)|\n]+)\)" + return re.sub(pattern, r"L<<< \1|\2 >>>", string, 0, re.MULTILINE) + +# First, convert AST back to string using various back_ends +# Finally, postprocess the string and display it +scm.display(compose(external_links, + remove_internal_links, + italic, + bold, + make_back_end(title_processor, + scm.string_to_symbol("TITLE")), + make_back_end(subtitle_processor, + scm.string_to_symbol("SUBTITLE")), + make_back_end(make_item_processor(SYMBOL_TABLE), + scm.string_to_symbol("HYPHEN")), + make_back_end(make_item_processor(SYMBOL_TABLE), + scm.string_to_symbol("ASTERISK")), + make_back_end(make_item_processor(SYMBOL_TABLE), + scm.string_to_symbol("NUMBER")), + make_back_end(verbatim_processor, + scm.string_to_symbol("VERBATIM"))) \ + (AST))