Fix bad formatting of man page (Closes #6510)

* Makefile: It now pipe the output of devscripts/prepare_manpage.py to
 pod2man to produce youtube-dl.1 man page (previously was using pandoc).
* devscripts/prepare_manpage.py: It now convert README.md
 to Perl Pod format.
This commit is contained in:
Alex Vong 2015-09-13 17:55:15 +08:00
parent 92f580152f
commit fe37a2535a
2 changed files with 522 additions and 26 deletions

View File

@ -9,6 +9,8 @@ BINDIR ?= $(PREFIX)/bin
MANDIR ?= $(PREFIX)/man
SHAREDIR ?= $(PREFIX)/share
PYTHON ?= /usr/bin/env python
VERSION ?= $(shell echo 'print(__version__)' | cat youtube_dl/version.py - | python)
DATE ?= $(shell echo '$(VERSION)' | sed -e 's/\./-/g')
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
ifeq ($(PREFIX),/usr)
@ -73,9 +75,7 @@ README.txt: README.md
pandoc -f markdown -t plain README.md -o README.txt
youtube-dl.1: README.md
python devscripts/prepare_manpage.py >youtube-dl.1.temp.md
pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
rm -f youtube-dl.1.temp.md
python devscripts/prepare_manpage.py | uniq | pod2man --center='User Commands' --date=$(DATE) --errors='die' --name='YOUTUBE-DL' --release=$(VERSION) --section=1 > youtube-dl.1
youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in
python devscripts/bash-completion.py

View File

@ -2,31 +2,527 @@ from __future__ import unicode_literals
import io
import os.path
import sys
import re
import scm
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
README_FILE = os.path.join(ROOT_DIR, 'README.md')
ROOT_DIR_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
README_PATH = os.path.join(ROOT_DIR_PATH, "README.md")
with io.open(README_FILE, encoding='utf-8') as f:
readme = f.read()
with io.open(README_PATH, encoding="utf-8") as FILE:
README_ORIG = FILE.read()
PREFIX = '''%YOUTUBE-DL(1)
# NAME
youtube\-dl \- download videos from youtube.com or other video platforms
# SYNOPSIS
**youtube-dl** \[OPTIONS\] URL [URL...]
'''
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
readme = PREFIX + readme
if sys.version_info < (3, 0):
print(readme.encode('utf-8'))
if README_ORIG:
README = "=pod\n\n=encoding utf8\n\n# NAME\n\n" + README_ORIG + "\n\n=cut\n"
else:
print(readme)
README = ""
def identity(x): return x
def _compose(f, g): return lambda x: f(g(x))
def compose(*tuple_of_func):
lst_of_func = scm.tuple_to_lst(tuple_of_func)
return scm.fold(_compose, identity, scm.reverse(lst_of_func))
def flatten(lst):
"""flatten an arbitrarily deep nested lst into a single lst"""
if lst is scm.nil:
return scm.nil
elif not scm.is_list(lst):
return scm.list(lst)
else:
return scm.append(flatten(scm.car(lst)),
flatten(scm.cdr(lst)))
def list_subtract(lst1, lst2):
return scm.lset_difference(lambda x, y: x == y, lst1, lst2)
def non_nil_take_while(pred, lst):
return scm.take_while(pred,
scm.drop_while(lambda x: not pred(x),
lst))
def string_join(lst_of_string, delimiter):
list_of_string = scm.lst_to_list(lst_of_string)
return delimiter.join(list_of_string)
def is_not_empty_string(s_exp):
return not scm.is_string(s_exp) or bool(s_exp)
def regex_split(pattern, string, flags=0):
"""split string into lst using pattern"""
return scm.list_to_lst(re.split(pattern, string, 0, flags))
def remove_table_of_content(string):
pattern = r"^-[ \t]+\[[- \tA-Z]+\]\(#[-a-z]+\)[ \t]*$"
return re.sub(pattern, r"", string, 0, re.MULTILINE)
def make_lexer(split_pattern, sub_pattern , exp_type, flags=0):
"""
Lexer is a procedure which does the following:
1. Split string into lst using split_pattern
2. Transform matching string in lst using sub_pattern
and attach exp_type to it, forming an expression
The output is a lst of tokens consisting of string and expression
"""
def attach_exp_type_to_matching_substring(string):
if re.search(split_pattern, string, flags):
return scm.list(exp_type,
re.sub(sub_pattern, r"\1", string, 1, flags))
else:
return string
return lambda string: scm.map(attach_exp_type_to_matching_substring,
regex_split(split_pattern, string, flags))
def make_parser(exp_type, post_proc):
"""
Parser is a procedure which takes the output of a lexer as input
and does the following:
1. Group exp_type expression and the string after it
2. Apply post_proc to the string after exp_type expression
The output is a tree-like structure
"""
def is_exp(lst): return scm.is_list(scm.car(lst))
def extract_exp(lst): return scm.car(lst)
def extract_string_after_exp(lst): return scm.cadr(lst)
def extract_rest(lst): return scm.cddr(lst)
def parse_loop(lst, accum):
if lst is scm.nil:
return accum
elif is_exp(lst):
return parse_loop(extract_rest(lst),
scm.cons(scm.list(extract_exp(lst),
post_proc(extract_string_after_exp(lst))),
accum))
else:
return parse_loop(scm.cdr(lst),
scm.cons(post_proc(scm.car(lst)),
accum))
return lambda lst: scm.reverse(parse_loop(lst, scm.nil))
def make_front_end(post_proc, split_pattern, sub_pattern, exp_type, flags=0):
"""compose parser and lexer to create a front end"""
return compose(make_parser(exp_type, post_proc),
make_lexer(split_pattern, sub_pattern, exp_type, flags))
def connect_front_end(*tuple_of_lst):
"""
Connect front ends together
by making use of post_proc parameter of make_front_end
"""
lst_of_lst = scm.tuple_to_lst(tuple_of_lst)
def extract_func(lst_of_lst): return scm.caar(lst_of_lst)
def extract_arg_lst(lst_of_lst): return scm.cdar(lst_of_lst)
def connect_loop(lst_of_lst, accum):
if lst_of_lst is scm.nil:
return accum
else:
return connect_loop(scm.cdr(lst_of_lst),
scm.apply(extract_func(lst_of_lst),
scm.cons(accum,
extract_arg_lst(lst_of_lst))))
return connect_loop(lst_of_lst, identity)
# These are patterns used to determined how to decomposite the text
# into sensible parts
TITLE_SPLIT_PATTERN = r"^(#[ \t]+[- \tA-Z]+[ \t]*)$"
TITLE_SUB_PATTERN = r"^#[ \t]+([- \tA-Z]+)[ \t]*$"
SUBTITLE_SPLIT_PATTERN = r"^(#{2,3}[ \t]+[^#\n]+#*[ \t]*)$"
SUBTITLE_SUB_PATTERN = r"^#{2,3}[ \t]+([^#\n]+)#*[ \t]*$"
HYPHEN_SPLIT_PATTERN = r"^([ \t]*-[ \t]+.+)$"
HYPHEN_SUB_PATTERN = r"^[ \t]*-[ \t]+(.+)$"
ASTERISK_SPLIT_PATTERN = r"^(\*[ \t]+[^\*\n]+)$"
ASTERISK_SUB_PATTERN = r"^\*[ \t]+([^\*\n]+)$"
NUMBER_PATTERN = r"^(\d+\.[ \t]+.+)"
VERBATIM_SPLIT_PATTERN = r"(```[^`]+```)"
VERBATIM_SUB_PATTERN = r"```([^`]+)```"
# tree representing the structure of README
AST = compose(connect_front_end(scm.list(make_front_end,
VERBATIM_SPLIT_PATTERN,
VERBATIM_SUB_PATTERN,
scm.string_to_symbol("VERBATIM")),
scm.list(make_front_end,
NUMBER_PATTERN,
NUMBER_PATTERN,
scm.string_to_symbol("NUMBER"),
re.MULTILINE),
scm.list(make_front_end,
ASTERISK_SPLIT_PATTERN,
ASTERISK_SUB_PATTERN,
scm.string_to_symbol("ASTERISK"),
re.MULTILINE),
scm.list(make_front_end,
HYPHEN_SPLIT_PATTERN,
HYPHEN_SUB_PATTERN,
scm.string_to_symbol("HYPHEN"),
re.MULTILINE),
scm.list(make_front_end,
SUBTITLE_SPLIT_PATTERN,
SUBTITLE_SUB_PATTERN,
scm.string_to_symbol("SUBTITLE"),
re.MULTILINE),
scm.list(make_front_end,
TITLE_SPLIT_PATTERN,
TITLE_SUB_PATTERN,
scm.string_to_symbol("TITLE"),
re.MULTILINE)),
remove_table_of_content) \
(README)
def fetch_symbol(ast, exp_type_lst):
"""
From ast, fetch symbol which is of type listed in exp_type_lst
Note that the output is a nested lst needed to be flatten in order to be
lst of the form (<exp_type> <exp_symbol> <exp_type> <exp_symbol> ...)
"""
def is_not_null(s_exp): return s_exp is not scm.nil
def is_exp(s_exp): return scm.is_list(scm.car(s_exp))
def is_exp_type(s_exp, exp_type): return scm.caar(s_exp) is exp_type
def extract_exp(s_exp): return scm.car(s_exp)
def extract_rest(s_exp): return scm.cdr(s_exp)
if not scm.is_list(ast):
return scm.nil
elif is_exp(ast) and \
scm.any(lambda exp_type: is_exp_type(ast, exp_type), exp_type_lst):
return scm.list(extract_exp(ast),
fetch_symbol(extract_rest(ast), exp_type_lst))
else:
return scm.append(scm.filter(is_not_null,
scm.map(lambda sub_tree: \
fetch_symbol(sub_tree,
exp_type_lst),
ast)))
def group_adj_element(lst):
"""
Take output of fetch_symbol as input
Transform lst of the form
(<exp_type> <exp_symbol> <exp_type> <exp_symbol> ...)
into lst of the form
((<exp_type> <exp_symbol>) (<exp_type> <exp_symbol>) ...)
"""
def index_to_element(k): return scm.list_ref(lst, k)
lst_of_index_lst = scm.map(scm.list,
scm.filter(scm.is_even,
scm.iota(scm.length(lst))),
scm.filter(scm.is_odd,
scm.iota(scm.length(lst))))
return scm.map(lambda index_lst: scm.map(index_to_element, index_lst),
lst_of_index_lst)
EXP_TYPE_LST = scm.list(scm.string_to_symbol("TITLE"),
scm.string_to_symbol("SUBTITLE"),
scm.string_to_symbol("HYPHEN"),
scm.string_to_symbol("ASTERISK"),
scm.string_to_symbol("NUMBER"),
scm.string_to_symbol("VERBATIM"))
# table recording the expression type of each expression symbol
SYMBOL_TABLE = compose(group_adj_element, flatten) \
(fetch_symbol(AST, EXP_TYPE_LST))
def is_list_of_string(lst): return scm.every(scm.is_string, lst)
def is_contain_string_lst(s_exp):
if not scm.is_list(s_exp):
return False
elif is_list_of_string(s_exp):
return True
else:
return scm.any(is_contain_string_lst, s_exp)
def join_string_lst(s_exp):
if not scm.is_list(s_exp):
return s_exp
elif is_list_of_string(s_exp):
return string_join(s_exp, "")
else:
return scm.map(join_string_lst, s_exp)
def recursively_join_string_lst(s_exp):
if not is_contain_string_lst(s_exp):
return s_exp
else:
return recursively_join_string_lst(join_string_lst(s_exp))
def process_ast(proc, exp_type, ast):
"""recursively apply proc with exp_type, exp_symbol and rest"""
def is_exp(s_exp): return scm.is_list(scm.car(s_exp))
def is_exp_type(s_exp, exp_type): return scm.caar(s_exp) is exp_type
def s_exp_first(s_exp): return scm.car(flatten(s_exp))
def extract_exp_symbol(s_exp): return scm.cadar(s_exp)
def extract_rest(s_exp): return s_exp_first(scm.cadr(ast))
if not scm.is_list(ast):
return ast
elif is_exp(ast) and is_exp_type(ast, exp_type):
return proc(exp_type, extract_exp_symbol(ast), extract_rest(ast))
else:
return scm.map(lambda sub_tree: process_ast(proc, exp_type, sub_tree),
ast)
def make_back_end(proc, exp_type):
"""recursively join processed tree-like structure back to string"""
return lambda ast: recursively_join_string_lst(process_ast(proc,
exp_type,
ast))
def verbatim_processor(exp_type, exp_symbol, rest):
"""
Create verbatim paragraph from expression with exp_type VERBATIM
1. remove formatter name if exists
2. indent each sentance in the paragraph by 4 spaces
"""
def remove_formatter_name(string):
pattern = r"^bash[ \t]*$|^python[ \t]*$"
return re.sub(pattern, r"", string, 0, re.MULTILINE)
def indent_by_4_spaces(string):
pattern = r"^(.+)"
return re.sub(pattern, r" \1", string, 0, re.MULTILINE)
return indent_by_4_spaces(remove_formatter_name(exp_symbol)) + rest
def group_by_exp_type(exp_type, lst):
"""group exp_type expression by removing non-exp_type expression"""
def is_exp_type(s_exp): return scm.car(s_exp) is exp_type
sublst = non_nil_take_while(is_exp_type, lst)
if sublst is scm.nil:
return scm.nil
else:
return scm.cons(sublst,
group_by_exp_type(exp_type,
list_subtract(lst, sublst)))
def make_item_position_decider(func):
"""
Return a procedure which will decide if a given string of exp_type
is in the desired position specified by func
func take a lst and return element of the desired position
"""
def extract_exp_symbol(s_exp): return scm.cadr(s_exp)
def is_item_position(string, exp_type, symbol_table):
return scm.any(lambda exp_symbol: exp_symbol == string,
scm.map(compose(extract_exp_symbol, func),
group_by_exp_type(exp_type, symbol_table)))
return is_item_position
def process_item(exp_type, prefix, rest, exp_symbol, symbol_table):
"""process item based on the position of exp_symbol in the symbol_table"""
is_first_item = make_item_position_decider(scm.first)
is_last_item = make_item_position_decider(scm.last)
if is_first_item(exp_symbol, exp_type, symbol_table):
return "=over 7\n\n=item Z<>" + prefix + "\n\n" + rest + "\n"
elif is_last_item(exp_symbol, exp_type, symbol_table):
return "=item Z<>" + prefix + "\n\n" + rest + "\n\n=back"
else:
return "=item Z<>" + prefix + "\n\n" + rest + "\n"
def make_item_processor(symbol_table):
"""
Return a procedure which does a case dispatch on exp_type of expression
and pass the extracted parts of expression to process_item
"""
def make_number_item_lst(exp_symbol):
split_pattern = r"^(\d+\.[ \t]+)"
sub_pattern = r"^(\d+\.)[ \t]+"
return scm.filter(is_not_empty_string,
make_lexer(split_pattern,
sub_pattern,
scm.string_to_symbol("NUMBER"),
re.MULTILINE) \
(exp_symbol))
def is_exp_type(exp_type, exp_symbol): return exp_type is exp_symbol
def extract_prefix(number_item_lst): return scm.cadar(number_item_lst)
def extract_rest(number_item_lst): return scm.cadr(number_item_lst)
def process_different_items(exp_type, exp_symbol, rest):
if is_exp_type(exp_type, scm.string_to_symbol("HYPHEN")):
return process_item(exp_type,
"-",
exp_symbol,
exp_symbol,
symbol_table) + \
rest
elif is_exp_type(exp_type, scm.string_to_symbol("ASTERISK")):
return process_item(exp_type,
"*",
exp_symbol,
exp_symbol,
symbol_table) + \
rest
elif is_exp_type(exp_type, scm.string_to_symbol("NUMBER")):
return process_item(exp_type,
extract_prefix(make_number_item_lst(exp_symbol)),
extract_rest(make_number_item_lst(exp_symbol)),
exp_symbol,
symbol_table) + \
rest
else:
raise TypeError("unknown exp_type of expression")
return process_different_items
def installation_section_processor(exp_type, exp_symbol, rest):
return ""
def append_title(string):
return "=head1 " + string + "\n\n"
def subtitle_processor(exp_type, exp_symbol, rest):
return "=head2 " + exp_symbol + "\n\n" + rest
def name_section_processor(exp_type, exp_symbol, rest):
"""add a proper SYNOPSIS section after the NAME section"""
synopsis = "\n\n=head1 SYNOPSIS\n\nB<<< youtube-dl >>> [I<<< OPTIONS >>>] I<<< URL >>> [I<<< URL >>>...]\n\n"
return append_title(exp_symbol) + rest + synopsis
def description_section_processor(exp_type, exp_symbol, rest):
"""remove the improper synopsis in the DESCRIPTION section"""
def remove_synopsis_in_description(string):
pattern = r"^ +.+$"
return re.sub(pattern, r"", string, 0, re.MULTILINE)
return append_title(exp_symbol) + remove_synopsis_in_description(rest)
def sentence_per_line_to_word_per_line(string):
def spaces_to_newline(string):
pattern = r" +"
return re.sub(pattern, r"\n", string, 0, re.MULTILINE)
def remove_leading_newlines(string):
pattern = r"^\n+"
return re.sub(pattern, r"", string, 0, re.MULTILINE)
def multiple_newlines_to_single_newline(string):
pattern = r"\n+"
return re.sub(pattern, r"\n", string, 0, re.MULTILINE)
return compose(multiple_newlines_to_single_newline,
remove_leading_newlines,
spaces_to_newline) \
(string)
def process_options(string):
"""process options in the OPTIONS section"""
def short_long_opt_with_arg_processor(string):
pattern = r"^(-[^\s]+)[\s]*,[\s]*(--[^\s]+)[\s]+([^a-z\s]+)[\s]+([A-Z].+)$"
return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n,\nB<<< \2 >>>\nI<<< \3 >>>\n\n\4", string, 0, re.MULTILINE)
def short_long_opt_without_arg_processor(string):
pattern = r"^(-[^\s]+)[\s]*,[\s]*(--[^\s]+)[\s]+([A-Z].+)$"
return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n,\nB<<< \2 >>>\n\n\3", string, 0, re.MULTILINE)
def long_opt_with_arg_processor(string):
pattern = r"^(--[^\s]+)[\s]+([^a-z\s]+)[\s]+([A-Z].+)$"
return re.sub(pattern, r"\n=item\nB<<< \1 >>>\nI<<< \2 >>>\n\n\3", string, 0, re.MULTILINE)
def long_opt_without_arg_processor(string):
pattern = r"^(--[^\s]+)[\s]+([A-Z].+)$"
return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n\n\2", string, 0, re.MULTILINE)
return compose(long_opt_without_arg_processor,
long_opt_with_arg_processor,
short_long_opt_without_arg_processor,
short_long_opt_with_arg_processor,
sentence_per_line_to_word_per_line) \
(string)
def options_section_processor(exp_type, exp_symbol, rest):
"""
Process the OPTIONS section by creating a sub_tree using front_end and
use process_options to process scm.cdr(sub_tree)
Finally, convert the sub_tree back into string using back_end
"""
def options_subsections_processor(exp_type, exp_symbol, rest):
return "\n=back\n\n=head2 " + \
exp_symbol + \
"\n\n=over 7\n\n" + \
process_options(rest)
subtitle_split_pattern = r"^(=head2 .+)$"
subtitle_sub_pattern = r"^=head2 (.+)$"
sub_tree = connect_front_end(scm.list(make_front_end,
subtitle_split_pattern,
subtitle_sub_pattern,
scm.string_to_symbol("SUBTITLE"),
re.MULTILINE)) \
(rest)
return append_title(exp_symbol) + \
"=over 7\n\n" + \
make_back_end(options_subsections_processor,
scm.string_to_symbol("SUBTITLE")) \
(scm.cons(process_options(scm.car(sub_tree)),
scm.cdr(sub_tree))) + \
"\n=back\n\n"
def title_processor(exp_type, exp_symbol, rest):
"""do a case dispatch on exp_type and invoke the appropriate processor"""
if exp_symbol == "INSTALLATION":
return installation_section_processor(exp_type, exp_symbol, rest)
elif exp_symbol == "NAME":
return name_section_processor(exp_type, exp_symbol, rest)
elif exp_symbol == "DESCRIPTION":
return description_section_processor(exp_type, exp_symbol, rest)
elif exp_symbol == "OPTIONS":
return options_section_processor(exp_type, exp_symbol, rest)
else:
return append_title(exp_symbol) + rest
def bold(string):
"""enclose string marked as bold by B<<< >>>"""
pattern = r"\*\*([^\*\n]+)\*\*"
return re.sub(pattern, r"B<<< \1 >>>", string, 0, re.MULTILINE)
def italic(string):
"""enclose string marked as italic by I<<< >>>"""
def asterisk_to_italic(string):
pattern = r"\*([^\*\n]+)\*"
return re.sub(pattern, r"I<<< \1 >>>", string, 0, re.MULTILINE)
def back_quote_to_italic(string):
pattern = r"`{1,2}([^`\n]+)`{1,2}"
return re.sub(pattern, r"I<<< \1 >>>", string, 0, re.MULTILINE)
return compose(back_quote_to_italic, asterisk_to_italic) \
(string)
def remove_internal_links(string):
pattern = r"\[([^]|\n]+)\]\(#[^\)|\n]+\)"
return re.sub(pattern, r"\1", string, 0, re.MULTILINE)
def external_links(string):
"""convert external links of the form [foo](bar) into L<<< foo|bar >>>"""
pattern = r"\[([^]|\n]+)\]\(([^\)|\n]+)\)"
return re.sub(pattern, r"L<<< \1|\2 >>>", string, 0, re.MULTILINE)
# First, convert AST back to string using various back_ends
# Finally, postprocess the string and display it
scm.display(compose(external_links,
remove_internal_links,
italic,
bold,
make_back_end(title_processor,
scm.string_to_symbol("TITLE")),
make_back_end(subtitle_processor,
scm.string_to_symbol("SUBTITLE")),
make_back_end(make_item_processor(SYMBOL_TABLE),
scm.string_to_symbol("HYPHEN")),
make_back_end(make_item_processor(SYMBOL_TABLE),
scm.string_to_symbol("ASTERISK")),
make_back_end(make_item_processor(SYMBOL_TABLE),
scm.string_to_symbol("NUMBER")),
make_back_end(verbatim_processor,
scm.string_to_symbol("VERBATIM"))) \
(AST))