From 229ecdde27ece9f2edf769ba57b6157af1dcf2a1 Mon Sep 17 00:00:00 2001
From: Alex Vong <alexvong1995@gmail.com>
Date: Mon, 10 Aug 2015 12:55:29 +0800
Subject: [PATCH 1/4] Add the missing tag `EMBEDDING YOUTUBE-DL` in the content
 table.

* README.md: Add missing tag.
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 24bfe38a2..2bef26d57 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,7 @@ youtube-dl - download videos from youtube.com or other video platforms
 - [VIDEO SELECTION](#video-selection)
 - [FAQ](#faq)
 - [DEVELOPER INSTRUCTIONS](#developer-instructions)
+- [EMBEDDING YOUTUBE-DL](#embedding-youtube-dl)
 - [BUGS](#bugs)
 - [COPYRIGHT](#copyright)
 

From 92f580152f21b1ed92e397a49bf8c05bbbeae580 Mon Sep 17 00:00:00 2001
From: Alex Vong <alexvong1995@gmail.com>
Date: Sat, 12 Sep 2015 16:36:39 +0800
Subject: [PATCH 2/4] Add a minimal scm module to provide Scheme-like data
 types and procedures. primitives.py implements a minimal subset of Scheme
 primitives. srfi_1.py implements a minimal subset of SRFI-1: List Library.

* devscripts/scm/__init__.py: New file.
* devscripts/scm/primitives.py: New file.
* devscripts/scm/srfi_1.py: New file.
---
 devscripts/scm/__init__.py   |   2 +
 devscripts/scm/primitives.py | 196 +++++++++++++++++++
 devscripts/scm/srfi_1.py     | 355 +++++++++++++++++++++++++++++++++++
 3 files changed, 553 insertions(+)
 create mode 100644 devscripts/scm/__init__.py
 create mode 100644 devscripts/scm/primitives.py
 create mode 100644 devscripts/scm/srfi_1.py

diff --git a/devscripts/scm/__init__.py b/devscripts/scm/__init__.py
new file mode 100644
index 000000000..81d45a6ab
--- /dev/null
+++ b/devscripts/scm/__init__.py
@@ -0,0 +1,2 @@
+from .primitives import *
+from .srfi_1 import *
diff --git a/devscripts/scm/primitives.py b/devscripts/scm/primitives.py
new file mode 100644
index 000000000..224a471cb
--- /dev/null
+++ b/devscripts/scm/primitives.py
@@ -0,0 +1,196 @@
+from __future__ import unicode_literals
+
+import re
+import sys
+
+
+"""
+Module implemeting a minimal subset of Scheme primitives in term of Python
+
+In Scheme:
+    apply car cdr cons display even? length list list->lst lst->list lst->tuple
+    list? null? object->string odd? pair? string? string->symbol symbol->string
+    symbol? tuple->lst
+
+In Python:
+    apply car cdr cons display is_even length list list_to_lst lst_to_list
+    lst_to_tuple is_list is_null object_to_string is_odd is_pair is_string
+    string_to_symbol symbol_to_string is_symbol tuple_to_lst
+
+"""
+
+# standalone primitives
+
+if sys.version_info < (3, 0):
+    def display(obj): print(obj.encode("utf-8"))
+else:
+    def display(obj): print(obj)
+
+def is_even(x): return x % 2 == 0
+
+def is_odd(x): return x % 2 != 0
+
+if sys.version_info < (3, 0):
+    is_string = lambda obj: isinstance(obj, (str, basestring))
+else:
+    is_string = lambda obj: isinstance(obj, str)
+
+if sys.version_info < (3, 0):
+    object_to_string = lambda obj: unicode(obj)
+else:
+    object_to_string = lambda obj: str(obj)
+
+# nil-related primitives
+
+class _Nil:
+    """internal class implementing the empty lst type"""
+    def __repr__(self): return "()"
+
+    def __str__(self): return "()"
+
+# Many Scheme implementations don't have nil and use '() instead,
+# but we can't do that as we don't know how to get quoting works in Python...
+nil = _Nil()
+
+def is_null(x): return x is nil
+
+
+# pair/lst-related primitives
+
+class _Pair:
+    """internal class implementing the pair type"""
+    def __init__(self, car, cdr):
+        self.car = car
+        self.cdr = cdr
+        if cdr is nil or is_list(cdr):
+            self.is_list = True
+        else:
+            self.is_list = False
+
+    def __repr__(self):
+        """
+        Simulate representation of pair and list in Scheme REPL
+
+        In general, cons(x, y) is called a pair and is represented by (x . y)
+
+        However, if y is the empty lst, then cons(x, y) is called a lst
+        (avoid confusing with the procedure list,
+        treat list as a verb and lst as a noun)
+        and is represented by (x)
+
+        Moroever, if y is a lst and represented by (foo),
+        then cons(x,y) is also a lst and is represented by (x foo)
+
+        """
+        pattern = r"^\(|\)$"
+        if self.cdr is nil:
+            return "(" + repr(self.car) + ")"
+        elif self.is_list:
+            return "(" + repr(self.car) + " " + \
+                re.sub(pattern, "", repr(self.cdr)) + ")"
+        else:
+            return "(" + repr(self.car) + " . " + repr(self.cdr) + ")"
+
+    def __str__(self):
+        """
+        Same as __repr__.
+
+        Except repr(self.car) and repr(self.cdr) are replaced by
+        object_to_string(self.car) and object_to_string(self.cdr) respectively.
+
+        """
+        pattern = r"^\(|\)$"
+        if self.is_list:
+            return "(" + object_to_string(self.car) + " " + \
+                re.sub(pattern, "", object_to_string(self.cdr)) + ")"
+        else:
+            return "(" + object_to_string(self.car) + " . " + \
+                object_to_string(self.cdr) + ")"
+
+    def __eq__(self, x):
+        return isinstance(x, _Pair) and self.car == x.car and self.cdr == x.cdr
+
+def cons(a, b): return _Pair(a, b)
+
+def car(pair): return pair.car
+
+def cdr(pair): return pair.cdr
+
+def is_pair(x): return isinstance(x, _Pair)
+
+def is_list(x):
+    if x is nil:
+        return True
+    elif isinstance(x, _Pair):
+        return x.is_list
+    else:
+        return False
+
+def list(*arg_tup):
+    """build a lst from any number of elements"""
+    if arg_tup is ():
+        return nil
+    else:
+        return cons(arg_tup[0], list(*arg_tup[1:]))
+
+def tuple_to_lst(tup):
+    """convert Python tuple to Scheme lst"""
+    if not tup:
+        return nil
+    else:
+        return cons(tup[0], tuple_to_lst(tup[1:]))
+
+def lst_to_tuple(lst):
+    """convert Scheme lst to Python tuple"""
+    if lst is nil:
+        return ()
+    else:
+        return (lst.car,) + lst_to_tuple(lst.cdr)
+
+def apply(proc, lst):
+    """apply procedure proc to a Scheme lst"""
+    return proc(*lst_to_tuple(lst))
+
+def list_to_lst(list_):
+    """convert Python list to Scheme lst"""
+    if not list_:
+        return nil
+    else:
+        return cons(list_[0], list_to_lst(list_[1:]))
+
+def lst_to_list(lst):
+    """convert Scheme lst to Python list"""
+    if lst is nil:
+        return []
+    else:
+        return [lst.car,] + lst_to_list(lst.cdr)
+
+
+# symbol-related primitives
+
+# maintain a dictionary of symbol, to avoid duplication of _Symbol object
+_symbol_dict = {}
+
+class _Symbol:
+    """internal class implementing the symbol type"""
+    def __init__(self, string):
+        self.string = string
+        _symbol_dict[string] = self
+
+    def __repr__(self):
+        """remove leading and trailing quote from repr(self.string)"""
+        pattern = r"^'|^\"|\"$|'$"
+        return re.sub(pattern, "", repr(self.string))
+
+    def __str__(self):
+        return object_to_string(self.string)
+
+def string_to_symbol(string):
+    """convert Python string to Scheme symbol"""
+    if string not in _symbol_dict:
+        _symbol_dict[string] = _Symbol(string)
+    return _symbol_dict[string]
+
+def symbol_to_string(symbol): return object_to_string(symbol)
+
+def is_symbol(x): return isinstance(_Symbol)
diff --git a/devscripts/scm/srfi_1.py b/devscripts/scm/srfi_1.py
new file mode 100644
index 000000000..806929b38
--- /dev/null
+++ b/devscripts/scm/srfi_1.py
@@ -0,0 +1,355 @@
+from __future__ import unicode_literals
+
+from .primitives import *
+
+
+"""
+Module implemeting a minimal subset of SRFI-1: List Library
+
+In Scheme:
+    append caar cadr cdar cddr caaar caadr cadar caddr cdaar cdadr cddar cdddr
+    caaaar caaadr caadar caaddr cadaar cadadr caddar cadddr cdaaar cdaadr
+    cdadar cdaddr cddaar cddadr cdddar cddddr concatenate drop-while every
+    filter first fold iota last length list list_ref lset-difference map reduce
+    reverse take-while
+
+In Python:
+    append caar cadr cdar cddr caaar caadr cadar caddr cdaar cdadr cddar cdddr
+    caaaar caaadr caadar caaddr cadaar cadadr caddar cadddr cdaaar cdaadr
+    cdadar cdaddr cddaar cddadr cdddar cddddr concatenate drop_while every
+    filter first fold iota last length list list_ref lset_difference map reduce
+    reverse take_while
+
+"""
+
+# Use these procedures with caution,
+# as too much car/cdr-ing may hinder readability
+
+def caar(obj): return car(car(obj))
+
+def cadr(obj): return car(cdr(obj))
+
+def cdar(obj): return cdr(car(obj))
+
+def cddr(obj): return cdr(cdr(obj))
+
+def caaar(obj): return car(car(car(obj)))
+
+def caadr(obj): return car(car(cdr(obj)))
+
+def cadar(obj): return car(cdr(car(obj)))
+
+def caddr(obj): return car(cdr(cdr(obj)))
+
+def cdaar(obj): return cdr(car(car(obj)))
+
+def cdadr(obj): return cdr(car(cdr(obj)))
+
+def cddar(obj): return cdr(cdr(car(obj)))
+
+def cdddr(obj): return cdr(cdr(cdr(obj)))
+
+def caaaar(obj): return car(car(car(car(obj))))
+
+def caaadr(obj): return car(car(car(cdr(obj))))
+
+def caadar(obj): return car(car(cdr(car(obj))))
+
+def caaddr(obj): return car(car(cdr(cdr(obj))))
+
+def cadaar(obj): return car(cdr(car(car(obj))))
+
+def cadadr(obj): return car(cdr(car(cdr(obj))))
+
+def caddar(obj): return car(cdr(cdr(car(obj))))
+
+def cadddr(obj): return car(cdr(cdr(cdr(obj))))
+
+def cdaaar(obj): return cdr(car(car(car(obj))))
+
+def cdaadr(obj): return cdr(car(car(cdr(obj))))
+
+def cdadar(obj): return cdr(car(cdr(car(obj))))
+
+def cdaddr(obj): return cdr(car(cdr(cdr(obj))))
+
+def cddaar(obj): return cdr(cdr(car(car(obj))))
+
+def cddadr(obj): return cdr(cdr(car(cdr(obj))))
+
+def cdddar(obj): return cdr(cdr(cdr(car(obj))))
+
+def cddddr(obj): return cdr(cdr(cdr(cdr(obj))))
+
+def length(lst):
+    """compute length of lst"""
+    def length_loop(lst, count):
+        if lst is nil:
+            return count
+        else:
+            return length_loop(cdr(lst),
+                               count + 1)
+    return length_loop(lst, 0)
+
+def list_ref(lst, k):
+    """return the k^th element of lst"""
+    if k == 0:
+        return car(lst)
+    else:
+        return list_ref(cdr(lst), k - 1)
+
+def iota(count):
+    """return lst from 0 to (count - 1)"""
+    def iota_loop(loop_count):
+        if loop_count == count:
+            return nil
+        else:
+            return cons(loop_count,
+                        iota_loop(loop_count + 1))
+    return iota_loop(0)
+
+def _any(proc, arg_lst):
+    """
+    any for procedures that take a single argument
+
+    Apply proc to every element in arg_lst
+    Return True is any of the result is True
+    Otherwise, return False
+
+    """
+    if arg_lst is nil:
+        return False
+    elif proc(car(arg_lst)):
+        return True
+    else:
+        return _any(proc, cdr(arg_lst))
+
+def _every(proc, arg_lst):
+    """
+    every for procedures that take a single argument
+
+    Apply proc to every element in arg_lst
+    Return True is every result is True
+    Otherwise, return False
+
+    """
+    if arg_lst is nil:
+        return True
+    elif not proc(car(arg_lst)):
+        return False
+    else:
+        return _every(proc, cdr(arg_lst))
+
+def _map(proc, lst):
+    """
+    map for procedures that take a single argument
+
+    Apply proc to every element in arg_lst and return the resulting lst
+
+    """
+    if lst is nil:
+        return nil
+    else:
+        return cons(proc(car(lst)),
+                    _map(proc, cdr(lst)))
+
+def map(proc, *tuple_of_lst):
+    """
+    map for procedures that take any number of arguments, including 1
+
+    Apply proc to the n^th element in lst from lst_of_lst
+    and return the resulting lst
+
+    """
+    lst_of_lst = tuple_to_lst(tuple_of_lst)
+    if _every(is_null, lst_of_lst):
+        return nil
+    elif _any(is_null, lst_of_lst):
+        raise IndexError("some of the lists are differed in length!")
+    else:
+        return cons(apply(proc,
+                          _map(car, lst_of_lst)),
+                    apply(map, cons(proc,
+                                    _map(cdr, lst_of_lst))))
+
+def _fold(proc, init, lst):
+    """
+    fold for procedures that take a single argument
+
+    If lst is the empty lst, return init
+    Otherwise, apply proc to the first element of lst and init in this order
+    Now, the result becomes the new init
+
+    """
+
+    if lst is nil:
+        return init
+    else:
+        return _fold(proc, proc(car(lst), init), cdr(lst))
+
+def reduce(proc, default, lst):
+    """
+    If lst is the empty lst, return default
+    Otherwise, apply proc to the second element in lst
+    and the first element from lst in this order
+    Now, the result becomes the element after the remaining first element
+
+    """
+    if lst is nil:
+        return default
+    elif cdr(lst) is nil:
+        return car(lst)
+    else:
+        return _fold(proc, car(lst), cdr(lst))
+
+def any(proc, *tuple_of_lst):
+    """
+    any for procedures that take any number of arguments, including 1
+
+    Apply proc to the n^th element in lst from lst_of_lst
+    Return True is any of the result is True
+    Otherwise, return False
+
+    """
+    lst_of_lst = tuple_to_lst(tuple_of_lst)
+    return reduce(lambda x, y: x or y,
+                  False,
+                  apply(_map, cons(proc,
+                                   lst_of_lst)))
+
+def every(proc, *tuple_of_lst):
+    """
+    every for procedures that take any number of arguments, including 1
+
+    Apply proc to the n^th element in lst from lst_of_lst
+    Return True is any of the result is True
+    Otherwise, return False
+
+    """
+    lst_of_lst = tuple_to_lst(tuple_of_lst)
+    return reduce(lambda x, y: x and y,
+                  True,
+                  apply(_map, cons(proc,
+                                   lst_of_lst)))
+
+def reverse(lst):
+    """reverse a given lst"""
+    return _fold(cons, nil, lst)
+
+def filter(proc, lst):
+    """
+    Apply proc to elements in lst
+    Remove those evaluated to False and return the resulting lst
+
+    """
+    def filter_loop(proc, lst, accum):
+        if lst is nil:
+            return accum
+        elif proc(car(lst)):
+            return filter_loop(proc, cdr(lst), cons(car(lst), accum))
+        else:
+            return filter_loop(proc,
+                               cdr(lst),
+                               accum)
+    return reverse(filter_loop(proc, lst, nil))
+
+def first(lst):
+    """return the first element of lst, usually used with last"""
+    return car(lst)
+
+def last(lst):
+    """return the last element of lst, usually used with first"""
+    return car(reverse(lst))
+
+def _append(lst1, lst2):
+    """
+    append for procedure that takes a single argument
+
+    Append 2 lst into a single lst
+
+    """
+
+    return _fold(cons, lst2, reverse(lst1))
+
+def append(*tuple_of_lst):
+    """
+    append for procedures that take any number of arguments, including 1
+
+    Append any number of lst into a single lst
+
+    """
+
+    lst_of_lst = tuple_to_lst(tuple_of_lst)
+    return reduce(_append, nil, reverse(lst_of_lst))
+
+def concatenate(lst_of_lst):
+    """concatenate lst_of_lst into a single lst"""
+    return apply(append, lst_of_lst)
+
+def fold(proc, init, *tuple_of_lst):
+    """
+    fold for procedures that take any number of arguments, including 1
+
+    If every element in lst_of_lst is the empty lst, return init
+    Otherwise, apply proc to the first element of every element in lst_of_lst
+    and init in this order
+    Now, the result becomes the new init
+
+    """
+
+    lst_of_lst = tuple_to_lst(tuple_of_lst)
+    if _every(is_null, lst_of_lst):
+        return init
+    elif _any(is_null, lst_of_lst):
+        raise IndexError("some of the lists are differed in length!")
+    else:
+        return apply(fold, cons(proc,
+                                cons(apply(proc,
+                                           append(_map(car, lst_of_lst),
+                                                  list(init))),
+                                     _map(cdr, lst_of_lst))))
+
+def lset_difference(comparator, lst, *tuple_of_lst):
+    def _lset_difference(comparator, lst1, lst2):
+        """treat lst1 and lst2 as sets and compute lst1 \ lst2"""
+        return filter(lambda x: _every(lambda y: not comparator(x, y),
+                                       lst2),
+                      lst1)
+    lst_of_lst = tuple_to_lst(tuple_of_lst)
+    if lst_of_lst is nil:
+        return lst
+    else:
+        return apply(lset_difference,
+                     cons(comparator,
+                          cons(_lset_difference(comparator,
+                                                lst,
+                                                car(lst_of_lst)),
+                               cdr(lst_of_lst))))
+
+def drop_while(pred, lst):
+    """
+    While predicate evaluates to True, drops the element
+
+    Return the lst if predicate evaluates to False or if lst is empty
+
+    """
+    if lst is nil:
+        return nil
+    elif not pred(car(lst)):
+        return lst
+    else:
+        return drop_while(pred, cdr(lst))
+
+def take_while(pred, lst):
+    """
+    While predicate evaluates to True, takes the element
+
+    Return the empty lst if predicate evaluates to False or if lst is empty
+
+    """
+    if lst is nil:
+        return nil
+    elif not pred(car(lst)):
+        return nil
+    else:
+        return cons(car(lst), take_while(pred, cdr(lst)))

From fe37a2535aa013b12d87c9ac3ea466c4ac350f61 Mon Sep 17 00:00:00 2001
From: Alex Vong <alexvong1995@gmail.com>
Date: Sun, 13 Sep 2015 17:55:15 +0800
Subject: [PATCH 3/4] Fix bad formatting of man page (Closes #6510)

* Makefile: It now pipe the output of devscripts/prepare_manpage.py to
 pod2man to produce youtube-dl.1 man page (previously was using pandoc).
* devscripts/prepare_manpage.py: It now convert README.md
 to Perl Pod format.
---
 Makefile                      |   6 +-
 devscripts/prepare_manpage.py | 542 ++++++++++++++++++++++++++++++++--
 2 files changed, 522 insertions(+), 26 deletions(-)

diff --git a/Makefile b/Makefile
index fdb1abb60..0b331240f 100644
--- a/Makefile
+++ b/Makefile
@@ -9,6 +9,8 @@ BINDIR ?= $(PREFIX)/bin
 MANDIR ?= $(PREFIX)/man
 SHAREDIR ?= $(PREFIX)/share
 PYTHON ?= /usr/bin/env python
+VERSION ?= $(shell echo 'print(__version__)' | cat youtube_dl/version.py - | python)
+DATE ?= $(shell echo '$(VERSION)' | sed -e 's/\./-/g')
 
 # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
 ifeq ($(PREFIX),/usr)
@@ -73,9 +75,7 @@ README.txt: README.md
 	pandoc -f markdown -t plain README.md -o README.txt
 
 youtube-dl.1: README.md
-	python devscripts/prepare_manpage.py >youtube-dl.1.temp.md
-	pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
-	rm -f youtube-dl.1.temp.md
+	python devscripts/prepare_manpage.py | uniq | pod2man --center='User Commands' --date=$(DATE) --errors='die' --name='YOUTUBE-DL' --release=$(VERSION) --section=1 > youtube-dl.1
 
 youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in
 	python devscripts/bash-completion.py
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
index 7ece37754..8389ba773 100644
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -2,31 +2,527 @@ from __future__ import unicode_literals
 
 import io
 import os.path
-import sys
 import re
+import scm
 
-ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-README_FILE = os.path.join(ROOT_DIR, 'README.md')
+ROOT_DIR_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+README_PATH = os.path.join(ROOT_DIR_PATH, "README.md")
 
-with io.open(README_FILE, encoding='utf-8') as f:
-    readme = f.read()
+with io.open(README_PATH, encoding="utf-8") as FILE:
+    README_ORIG = FILE.read()
 
-PREFIX = '''%YOUTUBE-DL(1)
-
-# NAME
-
-youtube\-dl \- download videos from youtube.com or other video platforms
-
-# SYNOPSIS
-
-**youtube-dl** \[OPTIONS\] URL [URL...]
-
-'''
-readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
-readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
-readme = PREFIX + readme
-
-if sys.version_info < (3, 0):
-    print(readme.encode('utf-8'))
+if README_ORIG:
+    README = "=pod\n\n=encoding utf8\n\n# NAME\n\n" + README_ORIG + "\n\n=cut\n"
 else:
-    print(readme)
+    README = ""
+
+def identity(x): return x
+
+def _compose(f, g): return lambda x: f(g(x))
+
+def compose(*tuple_of_func):
+    lst_of_func = scm.tuple_to_lst(tuple_of_func)
+    return scm.fold(_compose, identity, scm.reverse(lst_of_func))
+
+def flatten(lst):
+    """flatten an arbitrarily deep nested lst into a single lst"""
+    if lst is scm.nil:
+        return scm.nil
+    elif not scm.is_list(lst):
+        return scm.list(lst)
+    else:
+        return scm.append(flatten(scm.car(lst)),
+                          flatten(scm.cdr(lst)))
+
+def list_subtract(lst1, lst2):
+    return scm.lset_difference(lambda x, y: x == y, lst1, lst2)
+
+def non_nil_take_while(pred, lst):
+    return scm.take_while(pred,
+                          scm.drop_while(lambda x: not pred(x),
+                                         lst))
+
+def string_join(lst_of_string, delimiter):
+    list_of_string = scm.lst_to_list(lst_of_string)
+    return delimiter.join(list_of_string)
+
+def is_not_empty_string(s_exp):
+    return not scm.is_string(s_exp) or bool(s_exp)
+
+def regex_split(pattern, string, flags=0):
+    """split string into lst using pattern"""
+    return scm.list_to_lst(re.split(pattern, string, 0, flags))
+
+def remove_table_of_content(string):
+    pattern = r"^-[ \t]+\[[- \tA-Z]+\]\(#[-a-z]+\)[ \t]*$"
+    return re.sub(pattern, r"", string, 0, re.MULTILINE)
+
+def make_lexer(split_pattern, sub_pattern , exp_type, flags=0):
+    """
+    Lexer is a procedure which does the following:
+
+    1. Split string into lst using split_pattern
+    2. Transform matching string in lst using sub_pattern
+    and attach exp_type to it, forming an expression
+
+    The output is a lst of tokens consisting of string and expression
+
+    """
+    def attach_exp_type_to_matching_substring(string):
+        if re.search(split_pattern, string, flags):
+            return scm.list(exp_type,
+                            re.sub(sub_pattern, r"\1", string, 1, flags))
+        else:
+            return string
+    return lambda string: scm.map(attach_exp_type_to_matching_substring,
+                                  regex_split(split_pattern, string, flags))
+
+def make_parser(exp_type, post_proc):
+    """
+    Parser is a procedure which takes the output of a lexer as input
+    and does the following:
+
+    1. Group exp_type expression and the string after it
+    2. Apply post_proc to the string after exp_type expression
+
+    The output is a tree-like structure
+
+    """
+    def is_exp(lst): return scm.is_list(scm.car(lst))
+    def extract_exp(lst): return scm.car(lst)
+    def extract_string_after_exp(lst): return scm.cadr(lst)
+    def extract_rest(lst): return scm.cddr(lst)
+    def parse_loop(lst, accum):
+        if lst is scm.nil:
+            return accum
+        elif is_exp(lst):
+            return parse_loop(extract_rest(lst),
+                              scm.cons(scm.list(extract_exp(lst),
+                                                post_proc(extract_string_after_exp(lst))),
+                                       accum))
+        else:
+            return parse_loop(scm.cdr(lst),
+                              scm.cons(post_proc(scm.car(lst)),
+                                       accum))
+    return lambda lst: scm.reverse(parse_loop(lst, scm.nil))
+
+def make_front_end(post_proc, split_pattern, sub_pattern, exp_type, flags=0):
+    """compose parser and lexer to create a front end"""
+    return compose(make_parser(exp_type, post_proc),
+                   make_lexer(split_pattern, sub_pattern, exp_type, flags))
+
+def connect_front_end(*tuple_of_lst):
+    """
+    Connect front ends together
+    by making use of post_proc parameter of make_front_end
+
+    """
+    lst_of_lst = scm.tuple_to_lst(tuple_of_lst)
+    def extract_func(lst_of_lst): return scm.caar(lst_of_lst)
+    def extract_arg_lst(lst_of_lst): return scm.cdar(lst_of_lst)
+    def connect_loop(lst_of_lst, accum):
+        if lst_of_lst is scm.nil:
+            return accum
+        else:
+            return connect_loop(scm.cdr(lst_of_lst),
+                                scm.apply(extract_func(lst_of_lst),
+                                          scm.cons(accum,
+                                                   extract_arg_lst(lst_of_lst))))
+    return connect_loop(lst_of_lst, identity)
+
+# These are patterns used to determined how to decomposite the text
+# into sensible parts
+
+TITLE_SPLIT_PATTERN = r"^(#[ \t]+[- \tA-Z]+[ \t]*)$"
+TITLE_SUB_PATTERN = r"^#[ \t]+([- \tA-Z]+)[ \t]*$"
+
+SUBTITLE_SPLIT_PATTERN = r"^(#{2,3}[ \t]+[^#\n]+#*[ \t]*)$"
+SUBTITLE_SUB_PATTERN = r"^#{2,3}[ \t]+([^#\n]+)#*[ \t]*$"
+
+HYPHEN_SPLIT_PATTERN = r"^([ \t]*-[ \t]+.+)$"
+HYPHEN_SUB_PATTERN = r"^[ \t]*-[ \t]+(.+)$"
+
+ASTERISK_SPLIT_PATTERN = r"^(\*[ \t]+[^\*\n]+)$"
+ASTERISK_SUB_PATTERN = r"^\*[ \t]+([^\*\n]+)$"
+
+NUMBER_PATTERN = r"^(\d+\.[ \t]+.+)"
+
+VERBATIM_SPLIT_PATTERN = r"(```[^`]+```)"
+VERBATIM_SUB_PATTERN = r"```([^`]+)```"
+
+# tree representing the structure of README
+
+AST = compose(connect_front_end(scm.list(make_front_end,
+                                         VERBATIM_SPLIT_PATTERN,
+                                         VERBATIM_SUB_PATTERN,
+                                         scm.string_to_symbol("VERBATIM")),
+                                scm.list(make_front_end,
+                                         NUMBER_PATTERN,
+                                         NUMBER_PATTERN,
+                                         scm.string_to_symbol("NUMBER"),
+                                         re.MULTILINE),
+                                scm.list(make_front_end,
+                                         ASTERISK_SPLIT_PATTERN,
+                                         ASTERISK_SUB_PATTERN,
+                                         scm.string_to_symbol("ASTERISK"),
+                                         re.MULTILINE),
+                                scm.list(make_front_end,
+                                         HYPHEN_SPLIT_PATTERN,
+                                         HYPHEN_SUB_PATTERN,
+                                         scm.string_to_symbol("HYPHEN"),
+                                         re.MULTILINE),
+                                scm.list(make_front_end,
+                                         SUBTITLE_SPLIT_PATTERN,
+                                         SUBTITLE_SUB_PATTERN,
+                                         scm.string_to_symbol("SUBTITLE"),
+                                         re.MULTILINE),
+                                scm.list(make_front_end,
+                                         TITLE_SPLIT_PATTERN,
+                                         TITLE_SUB_PATTERN,
+                                         scm.string_to_symbol("TITLE"),
+                                         re.MULTILINE)),
+              remove_table_of_content) \
+              (README)
+
+def fetch_symbol(ast, exp_type_lst):
+    """
+    From ast, fetch symbol which is of type listed in exp_type_lst
+
+    Note that the output is a nested lst needed to be flatten in order to be
+    lst of the form (<exp_type> <exp_symbol> <exp_type> <exp_symbol> ...)
+
+    """
+    def is_not_null(s_exp): return s_exp is not scm.nil
+    def is_exp(s_exp): return scm.is_list(scm.car(s_exp))
+    def is_exp_type(s_exp, exp_type): return scm.caar(s_exp) is exp_type
+    def extract_exp(s_exp): return scm.car(s_exp)
+    def extract_rest(s_exp): return scm.cdr(s_exp)
+    if not scm.is_list(ast):
+        return scm.nil
+    elif is_exp(ast) and \
+         scm.any(lambda exp_type: is_exp_type(ast, exp_type), exp_type_lst):
+        return scm.list(extract_exp(ast),
+                        fetch_symbol(extract_rest(ast), exp_type_lst))
+    else:
+        return scm.append(scm.filter(is_not_null,
+                                     scm.map(lambda sub_tree: \
+                                             fetch_symbol(sub_tree,
+                                                          exp_type_lst),
+                                             ast)))
+
+def group_adj_element(lst):
+    """
+    Take output of fetch_symbol as input
+
+    Transform lst of the form
+    (<exp_type> <exp_symbol> <exp_type> <exp_symbol> ...)
+    into lst of the form
+    ((<exp_type> <exp_symbol>) (<exp_type> <exp_symbol>) ...)
+
+    """
+    def index_to_element(k): return scm.list_ref(lst, k)
+    lst_of_index_lst = scm.map(scm.list,
+                               scm.filter(scm.is_even,
+                                          scm.iota(scm.length(lst))),
+                               scm.filter(scm.is_odd,
+                                          scm.iota(scm.length(lst))))
+    return scm.map(lambda index_lst: scm.map(index_to_element, index_lst),
+                   lst_of_index_lst)
+
+EXP_TYPE_LST = scm.list(scm.string_to_symbol("TITLE"),
+                        scm.string_to_symbol("SUBTITLE"),
+                        scm.string_to_symbol("HYPHEN"),
+                        scm.string_to_symbol("ASTERISK"),
+                        scm.string_to_symbol("NUMBER"),
+                        scm.string_to_symbol("VERBATIM"))
+
+# table recording the expression type of each expression symbol
+
+SYMBOL_TABLE = compose(group_adj_element, flatten) \
+               (fetch_symbol(AST, EXP_TYPE_LST))
+
+def is_list_of_string(lst): return scm.every(scm.is_string, lst)
+
+def is_contain_string_lst(s_exp):
+    if not scm.is_list(s_exp):
+        return False
+    elif is_list_of_string(s_exp):
+        return True
+    else:
+        return scm.any(is_contain_string_lst, s_exp)
+
+def join_string_lst(s_exp):
+    if not scm.is_list(s_exp):
+        return s_exp
+    elif is_list_of_string(s_exp):
+        return string_join(s_exp, "")
+    else:
+        return scm.map(join_string_lst, s_exp)
+
+def recursively_join_string_lst(s_exp):
+    if not is_contain_string_lst(s_exp):
+        return s_exp
+    else:
+        return recursively_join_string_lst(join_string_lst(s_exp))
+
+def process_ast(proc, exp_type, ast):
+    """recursively apply proc with exp_type, exp_symbol and rest"""
+    def is_exp(s_exp): return scm.is_list(scm.car(s_exp))
+    def is_exp_type(s_exp, exp_type): return scm.caar(s_exp) is exp_type
+    def s_exp_first(s_exp): return scm.car(flatten(s_exp))
+    def extract_exp_symbol(s_exp): return scm.cadar(s_exp)
+    def extract_rest(s_exp): return s_exp_first(scm.cadr(ast))
+    if not scm.is_list(ast):
+        return ast
+    elif is_exp(ast) and is_exp_type(ast, exp_type):
+        return proc(exp_type, extract_exp_symbol(ast), extract_rest(ast))
+    else:
+        return scm.map(lambda sub_tree: process_ast(proc, exp_type, sub_tree),
+                       ast)
+
+def make_back_end(proc, exp_type):
+    """recursively join processed tree-like structure back to string"""
+    return lambda ast: recursively_join_string_lst(process_ast(proc,
+                                                               exp_type,
+                                                               ast))
+
+def verbatim_processor(exp_type, exp_symbol, rest):
+    """
+    Create verbatim paragraph from expression with exp_type VERBATIM
+
+    1. remove formatter name if exists
+    2. indent each sentance in the paragraph by 4 spaces
+
+    """
+    def remove_formatter_name(string):
+        pattern = r"^bash[ \t]*$|^python[ \t]*$"
+        return re.sub(pattern, r"", string, 0, re.MULTILINE)
+    def indent_by_4_spaces(string):
+        pattern = r"^(.+)"
+        return re.sub(pattern, r"    \1", string, 0, re.MULTILINE)
+    return indent_by_4_spaces(remove_formatter_name(exp_symbol)) + rest
+
+def group_by_exp_type(exp_type, lst):
+    """group exp_type expression by removing non-exp_type expression"""
+    def is_exp_type(s_exp): return scm.car(s_exp) is exp_type
+    sublst = non_nil_take_while(is_exp_type, lst)
+    if sublst is scm.nil:
+        return scm.nil
+    else:
+        return scm.cons(sublst,
+                        group_by_exp_type(exp_type,
+                                          list_subtract(lst, sublst)))
+
+def make_item_position_decider(func):
+    """
+    Return a procedure which will decide if a given string of exp_type
+    is in the desired position specified by func
+
+    func take a lst and return element of the desired position
+
+    """
+    def extract_exp_symbol(s_exp): return scm.cadr(s_exp)
+    def is_item_position(string, exp_type, symbol_table):
+        return scm.any(lambda exp_symbol: exp_symbol == string,
+                       scm.map(compose(extract_exp_symbol, func),
+                               group_by_exp_type(exp_type, symbol_table)))
+    return is_item_position
+
+def process_item(exp_type, prefix, rest, exp_symbol, symbol_table):
+    """process item based on the position of exp_symbol in the symbol_table"""
+    is_first_item = make_item_position_decider(scm.first)
+    is_last_item = make_item_position_decider(scm.last)
+    if is_first_item(exp_symbol, exp_type, symbol_table):
+        return "=over 7\n\n=item Z<>" + prefix + "\n\n" + rest + "\n"
+    elif is_last_item(exp_symbol, exp_type, symbol_table):
+        return "=item Z<>" + prefix + "\n\n" + rest + "\n\n=back"
+    else:
+        return "=item Z<>" + prefix + "\n\n" + rest + "\n"
+
+def make_item_processor(symbol_table):
+    """
+    Return a procedure which does a case dispatch on exp_type of expression
+    and pass the extracted parts of expression to process_item
+
+    """
+    def make_number_item_lst(exp_symbol):
+        split_pattern = r"^(\d+\.[ \t]+)"
+        sub_pattern = r"^(\d+\.)[ \t]+"
+        return scm.filter(is_not_empty_string,
+                          make_lexer(split_pattern,
+                                     sub_pattern,
+                                     scm.string_to_symbol("NUMBER"),
+                                     re.MULTILINE) \
+                          (exp_symbol))
+    def is_exp_type(exp_type, exp_symbol): return exp_type is exp_symbol
+    def extract_prefix(number_item_lst): return scm.cadar(number_item_lst)
+    def extract_rest(number_item_lst): return scm.cadr(number_item_lst)
+    def process_different_items(exp_type, exp_symbol, rest):
+        if is_exp_type(exp_type, scm.string_to_symbol("HYPHEN")):
+            return process_item(exp_type,
+                                "-",
+                                exp_symbol,
+                                exp_symbol,
+                                symbol_table) + \
+                                rest
+        elif is_exp_type(exp_type, scm.string_to_symbol("ASTERISK")):
+            return process_item(exp_type,
+                                "*",
+                                exp_symbol,
+                                exp_symbol,
+                                symbol_table) + \
+                                rest
+        elif is_exp_type(exp_type, scm.string_to_symbol("NUMBER")):
+            return process_item(exp_type,
+                                extract_prefix(make_number_item_lst(exp_symbol)),
+                                extract_rest(make_number_item_lst(exp_symbol)),
+                                exp_symbol,
+                                symbol_table) + \
+                                rest
+        else:
+            raise TypeError("unknown exp_type of expression")
+    return process_different_items
+
+def installation_section_processor(exp_type, exp_symbol, rest):
+    return ""
+
+def append_title(string):
+    return "=head1 " + string + "\n\n"
+
+def subtitle_processor(exp_type, exp_symbol, rest):
+    return "=head2 " + exp_symbol + "\n\n" + rest
+
+def name_section_processor(exp_type, exp_symbol, rest):
+    """add a proper SYNOPSIS section after the NAME section"""
+    synopsis = "\n\n=head1 SYNOPSIS\n\nB<<< youtube-dl >>> [I<<< OPTIONS >>>] I<<< URL >>> [I<<< URL >>>...]\n\n"
+    return append_title(exp_symbol) + rest + synopsis
+
+def description_section_processor(exp_type, exp_symbol, rest):
+    """remove the improper synopsis in the DESCRIPTION section"""
+    def remove_synopsis_in_description(string):
+        pattern = r"^ +.+$"
+        return re.sub(pattern, r"", string, 0, re.MULTILINE)
+    return append_title(exp_symbol) + remove_synopsis_in_description(rest)
+
+def sentence_per_line_to_word_per_line(string):
+    def spaces_to_newline(string):
+        pattern = r" +"
+        return re.sub(pattern, r"\n", string, 0, re.MULTILINE)
+    def remove_leading_newlines(string):
+        pattern = r"^\n+"
+        return re.sub(pattern, r"", string, 0, re.MULTILINE)
+    def multiple_newlines_to_single_newline(string):
+        pattern = r"\n+"
+        return re.sub(pattern, r"\n", string, 0, re.MULTILINE)
+    return compose(multiple_newlines_to_single_newline,
+                   remove_leading_newlines,
+                   spaces_to_newline) \
+                   (string)
+
+def process_options(string):
+    """process options in the OPTIONS section"""
+    def short_long_opt_with_arg_processor(string):
+        pattern = r"^(-[^\s]+)[\s]*,[\s]*(--[^\s]+)[\s]+([^a-z\s]+)[\s]+([A-Z].+)$"
+        return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n,\nB<<< \2 >>>\nI<<< \3 >>>\n\n\4", string, 0, re.MULTILINE)
+    def short_long_opt_without_arg_processor(string):
+        pattern = r"^(-[^\s]+)[\s]*,[\s]*(--[^\s]+)[\s]+([A-Z].+)$"
+        return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n,\nB<<< \2 >>>\n\n\3", string, 0, re.MULTILINE)
+    def long_opt_with_arg_processor(string):
+        pattern = r"^(--[^\s]+)[\s]+([^a-z\s]+)[\s]+([A-Z].+)$"
+        return re.sub(pattern, r"\n=item\nB<<< \1 >>>\nI<<< \2 >>>\n\n\3", string, 0, re.MULTILINE)
+    def long_opt_without_arg_processor(string):
+        pattern = r"^(--[^\s]+)[\s]+([A-Z].+)$"
+        return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n\n\2", string, 0, re.MULTILINE)
+    return compose(long_opt_without_arg_processor,
+                   long_opt_with_arg_processor,
+                   short_long_opt_without_arg_processor,
+                   short_long_opt_with_arg_processor,
+                   sentence_per_line_to_word_per_line) \
+                   (string)
+
+def options_section_processor(exp_type, exp_symbol, rest):
+    """
+    Process the OPTIONS section by creating a sub_tree using front_end and
+    use process_options to process scm.cdr(sub_tree)
+
+    Finally, convert the sub_tree back into string using back_end
+
+    """
+    def options_subsections_processor(exp_type, exp_symbol, rest):
+        return "\n=back\n\n=head2 " + \
+            exp_symbol + \
+            "\n\n=over 7\n\n" + \
+            process_options(rest)
+    subtitle_split_pattern = r"^(=head2 .+)$"
+    subtitle_sub_pattern = r"^=head2 (.+)$"
+    sub_tree = connect_front_end(scm.list(make_front_end,
+                                          subtitle_split_pattern,
+                                          subtitle_sub_pattern,
+                                          scm.string_to_symbol("SUBTITLE"),
+                                          re.MULTILINE)) \
+                                          (rest)
+    return append_title(exp_symbol) + \
+        "=over 7\n\n" + \
+        make_back_end(options_subsections_processor,
+                      scm.string_to_symbol("SUBTITLE")) \
+                      (scm.cons(process_options(scm.car(sub_tree)),
+                                scm.cdr(sub_tree))) + \
+                                "\n=back\n\n"
+
+def title_processor(exp_type, exp_symbol, rest):
+    """do a case dispatch on exp_type and invoke the appropriate processor"""
+    if exp_symbol == "INSTALLATION":
+        return installation_section_processor(exp_type, exp_symbol, rest)
+    elif exp_symbol == "NAME":
+        return name_section_processor(exp_type, exp_symbol, rest)
+    elif exp_symbol == "DESCRIPTION":
+        return description_section_processor(exp_type, exp_symbol, rest)
+    elif exp_symbol == "OPTIONS":
+        return options_section_processor(exp_type, exp_symbol, rest)
+    else:
+        return append_title(exp_symbol) + rest
+
+def bold(string):
+    """enclose string marked as bold by B<<< >>>"""
+    pattern = r"\*\*([^\*\n]+)\*\*"
+    return re.sub(pattern, r"B<<< \1 >>>", string, 0, re.MULTILINE)
+
+def italic(string):
+    """enclose string marked as italic by I<<< >>>"""
+    def asterisk_to_italic(string):
+        pattern = r"\*([^\*\n]+)\*"
+        return re.sub(pattern, r"I<<< \1 >>>", string, 0, re.MULTILINE)
+    def back_quote_to_italic(string):
+        pattern = r"`{1,2}([^`\n]+)`{1,2}"
+        return re.sub(pattern, r"I<<< \1 >>>", string, 0, re.MULTILINE)
+    return compose(back_quote_to_italic, asterisk_to_italic) \
+        (string)
+
+def remove_internal_links(string):
+    pattern = r"\[([^]|\n]+)\]\(#[^\)|\n]+\)"
+    return re.sub(pattern, r"\1", string, 0, re.MULTILINE)
+
+def external_links(string):
+    """convert external links of the form [foo](bar) into L<<< foo|bar >>>"""
+    pattern = r"\[([^]|\n]+)\]\(([^\)|\n]+)\)"
+    return re.sub(pattern, r"L<<< \1|\2 >>>", string, 0, re.MULTILINE)
+
+# First, convert AST back to string using various back_ends
+# Finally, postprocess the string and display it
+scm.display(compose(external_links,
+                    remove_internal_links,
+                    italic,
+                    bold,
+                    make_back_end(title_processor,
+                                  scm.string_to_symbol("TITLE")),
+                    make_back_end(subtitle_processor,
+                                  scm.string_to_symbol("SUBTITLE")),
+                    make_back_end(make_item_processor(SYMBOL_TABLE),
+                                  scm.string_to_symbol("HYPHEN")),
+                    make_back_end(make_item_processor(SYMBOL_TABLE),
+                                  scm.string_to_symbol("ASTERISK")),
+                    make_back_end(make_item_processor(SYMBOL_TABLE),
+                                  scm.string_to_symbol("NUMBER")),
+                    make_back_end(verbatim_processor,
+                                  scm.string_to_symbol("VERBATIM"))) \
+            (AST))

From ec39092443ca009efa5291ec858c0898874c9546 Mon Sep 17 00:00:00 2001
From: Alex Vong <alexvong1995@gmail.com>
Date: Sun, 13 Sep 2015 19:15:42 +0800
Subject: [PATCH 4/4] Add pod2man as build dependency in `DEVELOPER
 INSTRUCTIONS` section.

* README.md: Add pod2man as build dependency.
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 2bef26d57..dc69a77c8 100644
--- a/README.md
+++ b/README.md
@@ -501,6 +501,7 @@ If you want to create a build of youtube-dl yourself, you'll need
 * python
 * make
 * pandoc
+* pod2man
 * zip
 * nosetests