Fix bad formatting of man page (Closes #6510)

* Makefile: It now pipe the output of devscripts/prepare_manpage.py to pod2man to produce youtube-dl.1 man page (previously was using pandoc). * devscripts/prepare_manpage.py: It now convert README.md to Perl Pod format.
2015-09-13 17:55:15 +08:00 · 2015-09-13 17:55:15 +08:00 · fe37a2535a
commit fe37a2535a
parent 92f580152f
2 changed files with 522 additions and 26 deletions
--- a/6
+++ b/6
@ -9,6 +9,8 @@ BINDIR ?= $(PREFIX)/bin
 MANDIR ?= $(PREFIX)/man
 SHAREDIR ?= $(PREFIX)/share
 PYTHON ?= /usr/bin/env python
+VERSION ?= $(shell echo 'print(__version__)' | cat youtube_dl/version.py - | python)
+DATE ?= $(shell echo '$(VERSION)' | sed -e 's/\./-/g')

 # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
 ifeq ($(PREFIX),/usr)
@ -73,9 +75,7 @@ README.txt: README.md
 	pandoc -f markdown -t plain README.md -o README.txt

 youtube-dl.1: README.md
-	python devscripts/prepare_manpage.py >youtube-dl.1.temp.md
-	pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
-	rm -f youtube-dl.1.temp.md
+	python devscripts/prepare_manpage.py | uniq | pod2man --center='User Commands' --date=$(DATE) --errors='die' --name='YOUTUBE-DL' --release=$(VERSION) --section=1 > youtube-dl.1

 youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in
 	python devscripts/bash-completion.py
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@ -2,31 +2,527 @@ from __future__ import unicode_literals

 import io
 import os.path
-import sys
 import re
+import scm

-ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-README_FILE = os.path.join(ROOT_DIR, 'README.md')
+ROOT_DIR_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+README_PATH = os.path.join(ROOT_DIR_PATH, "README.md")

-with io.open(README_FILE, encoding='utf-8') as f:
-    readme = f.read()
+with io.open(README_PATH, encoding="utf-8") as FILE:
+    README_ORIG = FILE.read()

-PREFIX = '''%YOUTUBE-DL(1)
-
-# NAME
-
-youtube\-dl \- download videos from youtube.com or other video platforms
-
-# SYNOPSIS
-
-**youtube-dl** \[OPTIONS\] URL [URL...]
-
-'''
-readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
-readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
-readme = PREFIX + readme
-
-if sys.version_info < (3, 0):
-    print(readme.encode('utf-8'))
+if README_ORIG:
+    README = "=pod\n\n=encoding utf8\n\n# NAME\n\n" + README_ORIG + "\n\n=cut\n"
 else:
-    print(readme)
+    README = ""
+
+def identity(x): return x
+
+def _compose(f, g): return lambda x: f(g(x))
+
+def compose(*tuple_of_func):
+    lst_of_func = scm.tuple_to_lst(tuple_of_func)
+    return scm.fold(_compose, identity, scm.reverse(lst_of_func))
+
+def flatten(lst):
+    """flatten an arbitrarily deep nested lst into a single lst"""
+    if lst is scm.nil:
+        return scm.nil
+    elif not scm.is_list(lst):
+        return scm.list(lst)
+    else:
+        return scm.append(flatten(scm.car(lst)),
+                          flatten(scm.cdr(lst)))
+
+def list_subtract(lst1, lst2):
+    return scm.lset_difference(lambda x, y: x == y, lst1, lst2)
+
+def non_nil_take_while(pred, lst):
+    return scm.take_while(pred,
+                          scm.drop_while(lambda x: not pred(x),
+                                         lst))
+
+def string_join(lst_of_string, delimiter):
+    list_of_string = scm.lst_to_list(lst_of_string)
+    return delimiter.join(list_of_string)
+
+def is_not_empty_string(s_exp):
+    return not scm.is_string(s_exp) or bool(s_exp)
+
+def regex_split(pattern, string, flags=0):
+    """split string into lst using pattern"""
+    return scm.list_to_lst(re.split(pattern, string, 0, flags))
+
+def remove_table_of_content(string):
+    pattern = r"^-[ \t]+\[[- \tA-Z]+\]\(#[-a-z]+\)[ \t]*$"
+    return re.sub(pattern, r"", string, 0, re.MULTILINE)
+
+def make_lexer(split_pattern, sub_pattern , exp_type, flags=0):
+    """
+    Lexer is a procedure which does the following:
+
+    1. Split string into lst using split_pattern
+    2. Transform matching string in lst using sub_pattern
+    and attach exp_type to it, forming an expression
+
+    The output is a lst of tokens consisting of string and expression
+
+    """
+    def attach_exp_type_to_matching_substring(string):
+        if re.search(split_pattern, string, flags):
+            return scm.list(exp_type,
+                            re.sub(sub_pattern, r"\1", string, 1, flags))
+        else:
+            return string
+    return lambda string: scm.map(attach_exp_type_to_matching_substring,
+                                  regex_split(split_pattern, string, flags))
+
+def make_parser(exp_type, post_proc):
+    """
+    Parser is a procedure which takes the output of a lexer as input
+    and does the following:
+
+    1. Group exp_type expression and the string after it
+    2. Apply post_proc to the string after exp_type expression
+
+    The output is a tree-like structure
+
+    """
+    def is_exp(lst): return scm.is_list(scm.car(lst))
+    def extract_exp(lst): return scm.car(lst)
+    def extract_string_after_exp(lst): return scm.cadr(lst)
+    def extract_rest(lst): return scm.cddr(lst)
+    def parse_loop(lst, accum):
+        if lst is scm.nil:
+            return accum
+        elif is_exp(lst):
+            return parse_loop(extract_rest(lst),
+                              scm.cons(scm.list(extract_exp(lst),
+                                                post_proc(extract_string_after_exp(lst))),
+                                       accum))
+        else:
+            return parse_loop(scm.cdr(lst),
+                              scm.cons(post_proc(scm.car(lst)),
+                                       accum))
+    return lambda lst: scm.reverse(parse_loop(lst, scm.nil))
+
+def make_front_end(post_proc, split_pattern, sub_pattern, exp_type, flags=0):
+    """compose parser and lexer to create a front end"""
+    return compose(make_parser(exp_type, post_proc),
+                   make_lexer(split_pattern, sub_pattern, exp_type, flags))
+
+def connect_front_end(*tuple_of_lst):
+    """
+    Connect front ends together
+    by making use of post_proc parameter of make_front_end
+
+    """
+    lst_of_lst = scm.tuple_to_lst(tuple_of_lst)
+    def extract_func(lst_of_lst): return scm.caar(lst_of_lst)
+    def extract_arg_lst(lst_of_lst): return scm.cdar(lst_of_lst)
+    def connect_loop(lst_of_lst, accum):
+        if lst_of_lst is scm.nil:
+            return accum
+        else:
+            return connect_loop(scm.cdr(lst_of_lst),
+                                scm.apply(extract_func(lst_of_lst),
+                                          scm.cons(accum,
+                                                   extract_arg_lst(lst_of_lst))))
+    return connect_loop(lst_of_lst, identity)
+
+# These are patterns used to determined how to decomposite the text
+# into sensible parts
+
+TITLE_SPLIT_PATTERN = r"^(#[ \t]+[- \tA-Z]+[ \t]*)$"
+TITLE_SUB_PATTERN = r"^#[ \t]+([- \tA-Z]+)[ \t]*$"
+
+SUBTITLE_SPLIT_PATTERN = r"^(#{2,3}[ \t]+[^#\n]+#*[ \t]*)$"
+SUBTITLE_SUB_PATTERN = r"^#{2,3}[ \t]+([^#\n]+)#*[ \t]*$"
+
+HYPHEN_SPLIT_PATTERN = r"^([ \t]*-[ \t]+.+)$"
+HYPHEN_SUB_PATTERN = r"^[ \t]*-[ \t]+(.+)$"
+
+ASTERISK_SPLIT_PATTERN = r"^(\*[ \t]+[^\*\n]+)$"
+ASTERISK_SUB_PATTERN = r"^\*[ \t]+([^\*\n]+)$"
+
+NUMBER_PATTERN = r"^(\d+\.[ \t]+.+)"
+
+VERBATIM_SPLIT_PATTERN = r"(```[^`]+```)"
+VERBATIM_SUB_PATTERN = r"```([^`]+)```"
+
+# tree representing the structure of README
+
+AST = compose(connect_front_end(scm.list(make_front_end,
+                                         VERBATIM_SPLIT_PATTERN,
+                                         VERBATIM_SUB_PATTERN,
+                                         scm.string_to_symbol("VERBATIM")),
+                                scm.list(make_front_end,
+                                         NUMBER_PATTERN,
+                                         NUMBER_PATTERN,
+                                         scm.string_to_symbol("NUMBER"),
+                                         re.MULTILINE),
+                                scm.list(make_front_end,
+                                         ASTERISK_SPLIT_PATTERN,
+                                         ASTERISK_SUB_PATTERN,
+                                         scm.string_to_symbol("ASTERISK"),
+                                         re.MULTILINE),
+                                scm.list(make_front_end,
+                                         HYPHEN_SPLIT_PATTERN,
+                                         HYPHEN_SUB_PATTERN,
+                                         scm.string_to_symbol("HYPHEN"),
+                                         re.MULTILINE),
+                                scm.list(make_front_end,
+                                         SUBTITLE_SPLIT_PATTERN,
+                                         SUBTITLE_SUB_PATTERN,
+                                         scm.string_to_symbol("SUBTITLE"),
+                                         re.MULTILINE),
+                                scm.list(make_front_end,
+                                         TITLE_SPLIT_PATTERN,
+                                         TITLE_SUB_PATTERN,
+                                         scm.string_to_symbol("TITLE"),
+                                         re.MULTILINE)),
+              remove_table_of_content) \
+              (README)
+
+def fetch_symbol(ast, exp_type_lst):
+    """
+    From ast, fetch symbol which is of type listed in exp_type_lst
+
+    Note that the output is a nested lst needed to be flatten in order to be
+    lst of the form (<exp_type> <exp_symbol> <exp_type> <exp_symbol> ...)
+
+    """
+    def is_not_null(s_exp): return s_exp is not scm.nil
+    def is_exp(s_exp): return scm.is_list(scm.car(s_exp))
+    def is_exp_type(s_exp, exp_type): return scm.caar(s_exp) is exp_type
+    def extract_exp(s_exp): return scm.car(s_exp)
+    def extract_rest(s_exp): return scm.cdr(s_exp)
+    if not scm.is_list(ast):
+        return scm.nil
+    elif is_exp(ast) and \
+         scm.any(lambda exp_type: is_exp_type(ast, exp_type), exp_type_lst):
+        return scm.list(extract_exp(ast),
+                        fetch_symbol(extract_rest(ast), exp_type_lst))
+    else:
+        return scm.append(scm.filter(is_not_null,
+                                     scm.map(lambda sub_tree: \
+                                             fetch_symbol(sub_tree,
+                                                          exp_type_lst),
+                                             ast)))
+
+def group_adj_element(lst):
+    """
+    Take output of fetch_symbol as input
+
+    Transform lst of the form
+    (<exp_type> <exp_symbol> <exp_type> <exp_symbol> ...)
+    into lst of the form
+    ((<exp_type> <exp_symbol>) (<exp_type> <exp_symbol>) ...)
+
+    """
+    def index_to_element(k): return scm.list_ref(lst, k)
+    lst_of_index_lst = scm.map(scm.list,
+                               scm.filter(scm.is_even,
+                                          scm.iota(scm.length(lst))),
+                               scm.filter(scm.is_odd,
+                                          scm.iota(scm.length(lst))))
+    return scm.map(lambda index_lst: scm.map(index_to_element, index_lst),
+                   lst_of_index_lst)
+
+EXP_TYPE_LST = scm.list(scm.string_to_symbol("TITLE"),
+                        scm.string_to_symbol("SUBTITLE"),
+                        scm.string_to_symbol("HYPHEN"),
+                        scm.string_to_symbol("ASTERISK"),
+                        scm.string_to_symbol("NUMBER"),
+                        scm.string_to_symbol("VERBATIM"))
+
+# table recording the expression type of each expression symbol
+
+SYMBOL_TABLE = compose(group_adj_element, flatten) \
+               (fetch_symbol(AST, EXP_TYPE_LST))
+
+def is_list_of_string(lst): return scm.every(scm.is_string, lst)
+
+def is_contain_string_lst(s_exp):
+    if not scm.is_list(s_exp):
+        return False
+    elif is_list_of_string(s_exp):
+        return True
+    else:
+        return scm.any(is_contain_string_lst, s_exp)
+
+def join_string_lst(s_exp):
+    if not scm.is_list(s_exp):
+        return s_exp
+    elif is_list_of_string(s_exp):
+        return string_join(s_exp, "")
+    else:
+        return scm.map(join_string_lst, s_exp)
+
+def recursively_join_string_lst(s_exp):
+    if not is_contain_string_lst(s_exp):
+        return s_exp
+    else:
+        return recursively_join_string_lst(join_string_lst(s_exp))
+
+def process_ast(proc, exp_type, ast):
+    """recursively apply proc with exp_type, exp_symbol and rest"""
+    def is_exp(s_exp): return scm.is_list(scm.car(s_exp))
+    def is_exp_type(s_exp, exp_type): return scm.caar(s_exp) is exp_type
+    def s_exp_first(s_exp): return scm.car(flatten(s_exp))
+    def extract_exp_symbol(s_exp): return scm.cadar(s_exp)
+    def extract_rest(s_exp): return s_exp_first(scm.cadr(ast))
+    if not scm.is_list(ast):
+        return ast
+    elif is_exp(ast) and is_exp_type(ast, exp_type):
+        return proc(exp_type, extract_exp_symbol(ast), extract_rest(ast))
+    else:
+        return scm.map(lambda sub_tree: process_ast(proc, exp_type, sub_tree),
+                       ast)
+
+def make_back_end(proc, exp_type):
+    """recursively join processed tree-like structure back to string"""
+    return lambda ast: recursively_join_string_lst(process_ast(proc,
+                                                               exp_type,
+                                                               ast))
+
+def verbatim_processor(exp_type, exp_symbol, rest):
+    """
+    Create verbatim paragraph from expression with exp_type VERBATIM
+
+    1. remove formatter name if exists
+    2. indent each sentance in the paragraph by 4 spaces
+
+    """
+    def remove_formatter_name(string):
+        pattern = r"^bash[ \t]*$|^python[ \t]*$"
+        return re.sub(pattern, r"", string, 0, re.MULTILINE)
+    def indent_by_4_spaces(string):
+        pattern = r"^(.+)"
+        return re.sub(pattern, r"    \1", string, 0, re.MULTILINE)
+    return indent_by_4_spaces(remove_formatter_name(exp_symbol)) + rest
+
+def group_by_exp_type(exp_type, lst):
+    """group exp_type expression by removing non-exp_type expression"""
+    def is_exp_type(s_exp): return scm.car(s_exp) is exp_type
+    sublst = non_nil_take_while(is_exp_type, lst)
+    if sublst is scm.nil:
+        return scm.nil
+    else:
+        return scm.cons(sublst,
+                        group_by_exp_type(exp_type,
+                                          list_subtract(lst, sublst)))
+
+def make_item_position_decider(func):
+    """
+    Return a procedure which will decide if a given string of exp_type
+    is in the desired position specified by func
+
+    func take a lst and return element of the desired position
+
+    """
+    def extract_exp_symbol(s_exp): return scm.cadr(s_exp)
+    def is_item_position(string, exp_type, symbol_table):
+        return scm.any(lambda exp_symbol: exp_symbol == string,
+                       scm.map(compose(extract_exp_symbol, func),
+                               group_by_exp_type(exp_type, symbol_table)))
+    return is_item_position
+
+def process_item(exp_type, prefix, rest, exp_symbol, symbol_table):
+    """process item based on the position of exp_symbol in the symbol_table"""
+    is_first_item = make_item_position_decider(scm.first)
+    is_last_item = make_item_position_decider(scm.last)
+    if is_first_item(exp_symbol, exp_type, symbol_table):
+        return "=over 7\n\n=item Z<>" + prefix + "\n\n" + rest + "\n"
+    elif is_last_item(exp_symbol, exp_type, symbol_table):
+        return "=item Z<>" + prefix + "\n\n" + rest + "\n\n=back"
+    else:
+        return "=item Z<>" + prefix + "\n\n" + rest + "\n"
+
+def make_item_processor(symbol_table):
+    """
+    Return a procedure which does a case dispatch on exp_type of expression
+    and pass the extracted parts of expression to process_item
+
+    """
+    def make_number_item_lst(exp_symbol):
+        split_pattern = r"^(\d+\.[ \t]+)"
+        sub_pattern = r"^(\d+\.)[ \t]+"
+        return scm.filter(is_not_empty_string,
+                          make_lexer(split_pattern,
+                                     sub_pattern,
+                                     scm.string_to_symbol("NUMBER"),
+                                     re.MULTILINE) \
+                          (exp_symbol))
+    def is_exp_type(exp_type, exp_symbol): return exp_type is exp_symbol
+    def extract_prefix(number_item_lst): return scm.cadar(number_item_lst)
+    def extract_rest(number_item_lst): return scm.cadr(number_item_lst)
+    def process_different_items(exp_type, exp_symbol, rest):
+        if is_exp_type(exp_type, scm.string_to_symbol("HYPHEN")):
+            return process_item(exp_type,
+                                "-",
+                                exp_symbol,
+                                exp_symbol,
+                                symbol_table) + \
+                                rest
+        elif is_exp_type(exp_type, scm.string_to_symbol("ASTERISK")):
+            return process_item(exp_type,
+                                "*",
+                                exp_symbol,
+                                exp_symbol,
+                                symbol_table) + \
+                                rest
+        elif is_exp_type(exp_type, scm.string_to_symbol("NUMBER")):
+            return process_item(exp_type,
+                                extract_prefix(make_number_item_lst(exp_symbol)),
+                                extract_rest(make_number_item_lst(exp_symbol)),
+                                exp_symbol,
+                                symbol_table) + \
+                                rest
+        else:
+            raise TypeError("unknown exp_type of expression")
+    return process_different_items
+
+def installation_section_processor(exp_type, exp_symbol, rest):
+    return ""
+
+def append_title(string):
+    return "=head1 " + string + "\n\n"
+
+def subtitle_processor(exp_type, exp_symbol, rest):
+    return "=head2 " + exp_symbol + "\n\n" + rest
+
+def name_section_processor(exp_type, exp_symbol, rest):
+    """add a proper SYNOPSIS section after the NAME section"""
+    synopsis = "\n\n=head1 SYNOPSIS\n\nB<<< youtube-dl >>> [I<<< OPTIONS >>>] I<<< URL >>> [I<<< URL >>>...]\n\n"
+    return append_title(exp_symbol) + rest + synopsis
+
+def description_section_processor(exp_type, exp_symbol, rest):
+    """remove the improper synopsis in the DESCRIPTION section"""
+    def remove_synopsis_in_description(string):
+        pattern = r"^ +.+$"
+        return re.sub(pattern, r"", string, 0, re.MULTILINE)
+    return append_title(exp_symbol) + remove_synopsis_in_description(rest)
+
+def sentence_per_line_to_word_per_line(string):
+    def spaces_to_newline(string):
+        pattern = r" +"
+        return re.sub(pattern, r"\n", string, 0, re.MULTILINE)
+    def remove_leading_newlines(string):
+        pattern = r"^\n+"
+        return re.sub(pattern, r"", string, 0, re.MULTILINE)
+    def multiple_newlines_to_single_newline(string):
+        pattern = r"\n+"
+        return re.sub(pattern, r"\n", string, 0, re.MULTILINE)
+    return compose(multiple_newlines_to_single_newline,
+                   remove_leading_newlines,
+                   spaces_to_newline) \
+                   (string)
+
+def process_options(string):
+    """process options in the OPTIONS section"""
+    def short_long_opt_with_arg_processor(string):
+        pattern = r"^(-[^\s]+)[\s]*,[\s]*(--[^\s]+)[\s]+([^a-z\s]+)[\s]+([A-Z].+)$"
+        return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n,\nB<<< \2 >>>\nI<<< \3 >>>\n\n\4", string, 0, re.MULTILINE)
+    def short_long_opt_without_arg_processor(string):
+        pattern = r"^(-[^\s]+)[\s]*,[\s]*(--[^\s]+)[\s]+([A-Z].+)$"
+        return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n,\nB<<< \2 >>>\n\n\3", string, 0, re.MULTILINE)
+    def long_opt_with_arg_processor(string):
+        pattern = r"^(--[^\s]+)[\s]+([^a-z\s]+)[\s]+([A-Z].+)$"
+        return re.sub(pattern, r"\n=item\nB<<< \1 >>>\nI<<< \2 >>>\n\n\3", string, 0, re.MULTILINE)
+    def long_opt_without_arg_processor(string):
+        pattern = r"^(--[^\s]+)[\s]+([A-Z].+)$"
+        return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n\n\2", string, 0, re.MULTILINE)
+    return compose(long_opt_without_arg_processor,
+                   long_opt_with_arg_processor,
+                   short_long_opt_without_arg_processor,
+                   short_long_opt_with_arg_processor,
+                   sentence_per_line_to_word_per_line) \
+                   (string)
+
+def options_section_processor(exp_type, exp_symbol, rest):
+    """
+    Process the OPTIONS section by creating a sub_tree using front_end and
+    use process_options to process scm.cdr(sub_tree)
+
+    Finally, convert the sub_tree back into string using back_end
+
+    """
+    def options_subsections_processor(exp_type, exp_symbol, rest):
+        return "\n=back\n\n=head2 " + \
+            exp_symbol + \
+            "\n\n=over 7\n\n" + \
+            process_options(rest)
+    subtitle_split_pattern = r"^(=head2 .+)$"
+    subtitle_sub_pattern = r"^=head2 (.+)$"
+    sub_tree = connect_front_end(scm.list(make_front_end,
+                                          subtitle_split_pattern,
+                                          subtitle_sub_pattern,
+                                          scm.string_to_symbol("SUBTITLE"),
+                                          re.MULTILINE)) \
+                                          (rest)
+    return append_title(exp_symbol) + \
+        "=over 7\n\n" + \
+        make_back_end(options_subsections_processor,
+                      scm.string_to_symbol("SUBTITLE")) \
+                      (scm.cons(process_options(scm.car(sub_tree)),
+                                scm.cdr(sub_tree))) + \
+                                "\n=back\n\n"
+
+def title_processor(exp_type, exp_symbol, rest):
+    """do a case dispatch on exp_type and invoke the appropriate processor"""
+    if exp_symbol == "INSTALLATION":
+        return installation_section_processor(exp_type, exp_symbol, rest)
+    elif exp_symbol == "NAME":
+        return name_section_processor(exp_type, exp_symbol, rest)
+    elif exp_symbol == "DESCRIPTION":
+        return description_section_processor(exp_type, exp_symbol, rest)
+    elif exp_symbol == "OPTIONS":
+        return options_section_processor(exp_type, exp_symbol, rest)
+    else:
+        return append_title(exp_symbol) + rest
+
+def bold(string):
+    """enclose string marked as bold by B<<< >>>"""
+    pattern = r"\*\*([^\*\n]+)\*\*"
+    return re.sub(pattern, r"B<<< \1 >>>", string, 0, re.MULTILINE)
+
+def italic(string):
+    """enclose string marked as italic by I<<< >>>"""
+    def asterisk_to_italic(string):
+        pattern = r"\*([^\*\n]+)\*"
+        return re.sub(pattern, r"I<<< \1 >>>", string, 0, re.MULTILINE)
+    def back_quote_to_italic(string):
+        pattern = r"`{1,2}([^`\n]+)`{1,2}"
+        return re.sub(pattern, r"I<<< \1 >>>", string, 0, re.MULTILINE)
+    return compose(back_quote_to_italic, asterisk_to_italic) \
+        (string)
+
+def remove_internal_links(string):
+    pattern = r"\[([^]|\n]+)\]\(#[^\)|\n]+\)"
+    return re.sub(pattern, r"\1", string, 0, re.MULTILINE)
+
+def external_links(string):
+    """convert external links of the form [foo](bar) into L<<< foo|bar >>>"""
+    pattern = r"\[([^]|\n]+)\]\(([^\)|\n]+)\)"
+    return re.sub(pattern, r"L<<< \1|\2 >>>", string, 0, re.MULTILINE)
+
+# First, convert AST back to string using various back_ends
+# Finally, postprocess the string and display it
+scm.display(compose(external_links,
+                    remove_internal_links,
+                    italic,
+                    bold,
+                    make_back_end(title_processor,
+                                  scm.string_to_symbol("TITLE")),
+                    make_back_end(subtitle_processor,
+                                  scm.string_to_symbol("SUBTITLE")),
+                    make_back_end(make_item_processor(SYMBOL_TABLE),
+                                  scm.string_to_symbol("HYPHEN")),
+                    make_back_end(make_item_processor(SYMBOL_TABLE),
+                                  scm.string_to_symbol("ASTERISK")),
+                    make_back_end(make_item_processor(SYMBOL_TABLE),
+                                  scm.string_to_symbol("NUMBER")),
+                    make_back_end(verbatim_processor,
+                                  scm.string_to_symbol("VERBATIM"))) \
+            (AST))