From 229ecdde27ece9f2edf769ba57b6157af1dcf2a1 Mon Sep 17 00:00:00 2001 From: Alex Vong Date: Mon, 10 Aug 2015 12:55:29 +0800 Subject: [PATCH 1/4] Add the missing tag `EMBEDDING YOUTUBE-DL` in the content table. * README.md: Add missing tag. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 24bfe38a2..2bef26d57 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ youtube-dl - download videos from youtube.com or other video platforms - [VIDEO SELECTION](#video-selection) - [FAQ](#faq) - [DEVELOPER INSTRUCTIONS](#developer-instructions) +- [EMBEDDING YOUTUBE-DL](#embedding-youtube-dl) - [BUGS](#bugs) - [COPYRIGHT](#copyright) From 92f580152f21b1ed92e397a49bf8c05bbbeae580 Mon Sep 17 00:00:00 2001 From: Alex Vong Date: Sat, 12 Sep 2015 16:36:39 +0800 Subject: [PATCH 2/4] Add a minimal scm module to provide Scheme-like data types and procedures. primitives.py implements a minimal subset of Scheme primitives. srfi_1.py implements a minimal subset of SRFI-1: List Library. * devscripts/scm/__init__.py: New file. * devscripts/scm/primitives.py: New file. * devscripts/scm/srfi_1.py: New file. --- devscripts/scm/__init__.py | 2 + devscripts/scm/primitives.py | 196 +++++++++++++++++++ devscripts/scm/srfi_1.py | 355 +++++++++++++++++++++++++++++++++++ 3 files changed, 553 insertions(+) create mode 100644 devscripts/scm/__init__.py create mode 100644 devscripts/scm/primitives.py create mode 100644 devscripts/scm/srfi_1.py diff --git a/devscripts/scm/__init__.py b/devscripts/scm/__init__.py new file mode 100644 index 000000000..81d45a6ab --- /dev/null +++ b/devscripts/scm/__init__.py @@ -0,0 +1,2 @@ +from .primitives import * +from .srfi_1 import * diff --git a/devscripts/scm/primitives.py b/devscripts/scm/primitives.py new file mode 100644 index 000000000..224a471cb --- /dev/null +++ b/devscripts/scm/primitives.py @@ -0,0 +1,196 @@ +from __future__ import unicode_literals + +import re +import sys + + +""" +Module implemeting a minimal subset of Scheme primitives in term of Python + +In Scheme: + apply car cdr cons display even? length list list->lst lst->list lst->tuple + list? null? object->string odd? pair? string? string->symbol symbol->string + symbol? tuple->lst + +In Python: + apply car cdr cons display is_even length list list_to_lst lst_to_list + lst_to_tuple is_list is_null object_to_string is_odd is_pair is_string + string_to_symbol symbol_to_string is_symbol tuple_to_lst + +""" + +# standalone primitives + +if sys.version_info < (3, 0): + def display(obj): print(obj.encode("utf-8")) +else: + def display(obj): print(obj) + +def is_even(x): return x % 2 == 0 + +def is_odd(x): return x % 2 != 0 + +if sys.version_info < (3, 0): + is_string = lambda obj: isinstance(obj, (str, basestring)) +else: + is_string = lambda obj: isinstance(obj, str) + +if sys.version_info < (3, 0): + object_to_string = lambda obj: unicode(obj) +else: + object_to_string = lambda obj: str(obj) + +# nil-related primitives + +class _Nil: + """internal class implementing the empty lst type""" + def __repr__(self): return "()" + + def __str__(self): return "()" + +# Many Scheme implementations don't have nil and use '() instead, +# but we can't do that as we don't know how to get quoting works in Python... +nil = _Nil() + +def is_null(x): return x is nil + + +# pair/lst-related primitives + +class _Pair: + """internal class implementing the pair type""" + def __init__(self, car, cdr): + self.car = car + self.cdr = cdr + if cdr is nil or is_list(cdr): + self.is_list = True + else: + self.is_list = False + + def __repr__(self): + """ + Simulate representation of pair and list in Scheme REPL + + In general, cons(x, y) is called a pair and is represented by (x . y) + + However, if y is the empty lst, then cons(x, y) is called a lst + (avoid confusing with the procedure list, + treat list as a verb and lst as a noun) + and is represented by (x) + + Moroever, if y is a lst and represented by (foo), + then cons(x,y) is also a lst and is represented by (x foo) + + """ + pattern = r"^\(|\)$" + if self.cdr is nil: + return "(" + repr(self.car) + ")" + elif self.is_list: + return "(" + repr(self.car) + " " + \ + re.sub(pattern, "", repr(self.cdr)) + ")" + else: + return "(" + repr(self.car) + " . " + repr(self.cdr) + ")" + + def __str__(self): + """ + Same as __repr__. + + Except repr(self.car) and repr(self.cdr) are replaced by + object_to_string(self.car) and object_to_string(self.cdr) respectively. + + """ + pattern = r"^\(|\)$" + if self.is_list: + return "(" + object_to_string(self.car) + " " + \ + re.sub(pattern, "", object_to_string(self.cdr)) + ")" + else: + return "(" + object_to_string(self.car) + " . " + \ + object_to_string(self.cdr) + ")" + + def __eq__(self, x): + return isinstance(x, _Pair) and self.car == x.car and self.cdr == x.cdr + +def cons(a, b): return _Pair(a, b) + +def car(pair): return pair.car + +def cdr(pair): return pair.cdr + +def is_pair(x): return isinstance(x, _Pair) + +def is_list(x): + if x is nil: + return True + elif isinstance(x, _Pair): + return x.is_list + else: + return False + +def list(*arg_tup): + """build a lst from any number of elements""" + if arg_tup is (): + return nil + else: + return cons(arg_tup[0], list(*arg_tup[1:])) + +def tuple_to_lst(tup): + """convert Python tuple to Scheme lst""" + if not tup: + return nil + else: + return cons(tup[0], tuple_to_lst(tup[1:])) + +def lst_to_tuple(lst): + """convert Scheme lst to Python tuple""" + if lst is nil: + return () + else: + return (lst.car,) + lst_to_tuple(lst.cdr) + +def apply(proc, lst): + """apply procedure proc to a Scheme lst""" + return proc(*lst_to_tuple(lst)) + +def list_to_lst(list_): + """convert Python list to Scheme lst""" + if not list_: + return nil + else: + return cons(list_[0], list_to_lst(list_[1:])) + +def lst_to_list(lst): + """convert Scheme lst to Python list""" + if lst is nil: + return [] + else: + return [lst.car,] + lst_to_list(lst.cdr) + + +# symbol-related primitives + +# maintain a dictionary of symbol, to avoid duplication of _Symbol object +_symbol_dict = {} + +class _Symbol: + """internal class implementing the symbol type""" + def __init__(self, string): + self.string = string + _symbol_dict[string] = self + + def __repr__(self): + """remove leading and trailing quote from repr(self.string)""" + pattern = r"^'|^\"|\"$|'$" + return re.sub(pattern, "", repr(self.string)) + + def __str__(self): + return object_to_string(self.string) + +def string_to_symbol(string): + """convert Python string to Scheme symbol""" + if string not in _symbol_dict: + _symbol_dict[string] = _Symbol(string) + return _symbol_dict[string] + +def symbol_to_string(symbol): return object_to_string(symbol) + +def is_symbol(x): return isinstance(_Symbol) diff --git a/devscripts/scm/srfi_1.py b/devscripts/scm/srfi_1.py new file mode 100644 index 000000000..806929b38 --- /dev/null +++ b/devscripts/scm/srfi_1.py @@ -0,0 +1,355 @@ +from __future__ import unicode_literals + +from .primitives import * + + +""" +Module implemeting a minimal subset of SRFI-1: List Library + +In Scheme: + append caar cadr cdar cddr caaar caadr cadar caddr cdaar cdadr cddar cdddr + caaaar caaadr caadar caaddr cadaar cadadr caddar cadddr cdaaar cdaadr + cdadar cdaddr cddaar cddadr cdddar cddddr concatenate drop-while every + filter first fold iota last length list list_ref lset-difference map reduce + reverse take-while + +In Python: + append caar cadr cdar cddr caaar caadr cadar caddr cdaar cdadr cddar cdddr + caaaar caaadr caadar caaddr cadaar cadadr caddar cadddr cdaaar cdaadr + cdadar cdaddr cddaar cddadr cdddar cddddr concatenate drop_while every + filter first fold iota last length list list_ref lset_difference map reduce + reverse take_while + +""" + +# Use these procedures with caution, +# as too much car/cdr-ing may hinder readability + +def caar(obj): return car(car(obj)) + +def cadr(obj): return car(cdr(obj)) + +def cdar(obj): return cdr(car(obj)) + +def cddr(obj): return cdr(cdr(obj)) + +def caaar(obj): return car(car(car(obj))) + +def caadr(obj): return car(car(cdr(obj))) + +def cadar(obj): return car(cdr(car(obj))) + +def caddr(obj): return car(cdr(cdr(obj))) + +def cdaar(obj): return cdr(car(car(obj))) + +def cdadr(obj): return cdr(car(cdr(obj))) + +def cddar(obj): return cdr(cdr(car(obj))) + +def cdddr(obj): return cdr(cdr(cdr(obj))) + +def caaaar(obj): return car(car(car(car(obj)))) + +def caaadr(obj): return car(car(car(cdr(obj)))) + +def caadar(obj): return car(car(cdr(car(obj)))) + +def caaddr(obj): return car(car(cdr(cdr(obj)))) + +def cadaar(obj): return car(cdr(car(car(obj)))) + +def cadadr(obj): return car(cdr(car(cdr(obj)))) + +def caddar(obj): return car(cdr(cdr(car(obj)))) + +def cadddr(obj): return car(cdr(cdr(cdr(obj)))) + +def cdaaar(obj): return cdr(car(car(car(obj)))) + +def cdaadr(obj): return cdr(car(car(cdr(obj)))) + +def cdadar(obj): return cdr(car(cdr(car(obj)))) + +def cdaddr(obj): return cdr(car(cdr(cdr(obj)))) + +def cddaar(obj): return cdr(cdr(car(car(obj)))) + +def cddadr(obj): return cdr(cdr(car(cdr(obj)))) + +def cdddar(obj): return cdr(cdr(cdr(car(obj)))) + +def cddddr(obj): return cdr(cdr(cdr(cdr(obj)))) + +def length(lst): + """compute length of lst""" + def length_loop(lst, count): + if lst is nil: + return count + else: + return length_loop(cdr(lst), + count + 1) + return length_loop(lst, 0) + +def list_ref(lst, k): + """return the k^th element of lst""" + if k == 0: + return car(lst) + else: + return list_ref(cdr(lst), k - 1) + +def iota(count): + """return lst from 0 to (count - 1)""" + def iota_loop(loop_count): + if loop_count == count: + return nil + else: + return cons(loop_count, + iota_loop(loop_count + 1)) + return iota_loop(0) + +def _any(proc, arg_lst): + """ + any for procedures that take a single argument + + Apply proc to every element in arg_lst + Return True is any of the result is True + Otherwise, return False + + """ + if arg_lst is nil: + return False + elif proc(car(arg_lst)): + return True + else: + return _any(proc, cdr(arg_lst)) + +def _every(proc, arg_lst): + """ + every for procedures that take a single argument + + Apply proc to every element in arg_lst + Return True is every result is True + Otherwise, return False + + """ + if arg_lst is nil: + return True + elif not proc(car(arg_lst)): + return False + else: + return _every(proc, cdr(arg_lst)) + +def _map(proc, lst): + """ + map for procedures that take a single argument + + Apply proc to every element in arg_lst and return the resulting lst + + """ + if lst is nil: + return nil + else: + return cons(proc(car(lst)), + _map(proc, cdr(lst))) + +def map(proc, *tuple_of_lst): + """ + map for procedures that take any number of arguments, including 1 + + Apply proc to the n^th element in lst from lst_of_lst + and return the resulting lst + + """ + lst_of_lst = tuple_to_lst(tuple_of_lst) + if _every(is_null, lst_of_lst): + return nil + elif _any(is_null, lst_of_lst): + raise IndexError("some of the lists are differed in length!") + else: + return cons(apply(proc, + _map(car, lst_of_lst)), + apply(map, cons(proc, + _map(cdr, lst_of_lst)))) + +def _fold(proc, init, lst): + """ + fold for procedures that take a single argument + + If lst is the empty lst, return init + Otherwise, apply proc to the first element of lst and init in this order + Now, the result becomes the new init + + """ + + if lst is nil: + return init + else: + return _fold(proc, proc(car(lst), init), cdr(lst)) + +def reduce(proc, default, lst): + """ + If lst is the empty lst, return default + Otherwise, apply proc to the second element in lst + and the first element from lst in this order + Now, the result becomes the element after the remaining first element + + """ + if lst is nil: + return default + elif cdr(lst) is nil: + return car(lst) + else: + return _fold(proc, car(lst), cdr(lst)) + +def any(proc, *tuple_of_lst): + """ + any for procedures that take any number of arguments, including 1 + + Apply proc to the n^th element in lst from lst_of_lst + Return True is any of the result is True + Otherwise, return False + + """ + lst_of_lst = tuple_to_lst(tuple_of_lst) + return reduce(lambda x, y: x or y, + False, + apply(_map, cons(proc, + lst_of_lst))) + +def every(proc, *tuple_of_lst): + """ + every for procedures that take any number of arguments, including 1 + + Apply proc to the n^th element in lst from lst_of_lst + Return True is any of the result is True + Otherwise, return False + + """ + lst_of_lst = tuple_to_lst(tuple_of_lst) + return reduce(lambda x, y: x and y, + True, + apply(_map, cons(proc, + lst_of_lst))) + +def reverse(lst): + """reverse a given lst""" + return _fold(cons, nil, lst) + +def filter(proc, lst): + """ + Apply proc to elements in lst + Remove those evaluated to False and return the resulting lst + + """ + def filter_loop(proc, lst, accum): + if lst is nil: + return accum + elif proc(car(lst)): + return filter_loop(proc, cdr(lst), cons(car(lst), accum)) + else: + return filter_loop(proc, + cdr(lst), + accum) + return reverse(filter_loop(proc, lst, nil)) + +def first(lst): + """return the first element of lst, usually used with last""" + return car(lst) + +def last(lst): + """return the last element of lst, usually used with first""" + return car(reverse(lst)) + +def _append(lst1, lst2): + """ + append for procedure that takes a single argument + + Append 2 lst into a single lst + + """ + + return _fold(cons, lst2, reverse(lst1)) + +def append(*tuple_of_lst): + """ + append for procedures that take any number of arguments, including 1 + + Append any number of lst into a single lst + + """ + + lst_of_lst = tuple_to_lst(tuple_of_lst) + return reduce(_append, nil, reverse(lst_of_lst)) + +def concatenate(lst_of_lst): + """concatenate lst_of_lst into a single lst""" + return apply(append, lst_of_lst) + +def fold(proc, init, *tuple_of_lst): + """ + fold for procedures that take any number of arguments, including 1 + + If every element in lst_of_lst is the empty lst, return init + Otherwise, apply proc to the first element of every element in lst_of_lst + and init in this order + Now, the result becomes the new init + + """ + + lst_of_lst = tuple_to_lst(tuple_of_lst) + if _every(is_null, lst_of_lst): + return init + elif _any(is_null, lst_of_lst): + raise IndexError("some of the lists are differed in length!") + else: + return apply(fold, cons(proc, + cons(apply(proc, + append(_map(car, lst_of_lst), + list(init))), + _map(cdr, lst_of_lst)))) + +def lset_difference(comparator, lst, *tuple_of_lst): + def _lset_difference(comparator, lst1, lst2): + """treat lst1 and lst2 as sets and compute lst1 \ lst2""" + return filter(lambda x: _every(lambda y: not comparator(x, y), + lst2), + lst1) + lst_of_lst = tuple_to_lst(tuple_of_lst) + if lst_of_lst is nil: + return lst + else: + return apply(lset_difference, + cons(comparator, + cons(_lset_difference(comparator, + lst, + car(lst_of_lst)), + cdr(lst_of_lst)))) + +def drop_while(pred, lst): + """ + While predicate evaluates to True, drops the element + + Return the lst if predicate evaluates to False or if lst is empty + + """ + if lst is nil: + return nil + elif not pred(car(lst)): + return lst + else: + return drop_while(pred, cdr(lst)) + +def take_while(pred, lst): + """ + While predicate evaluates to True, takes the element + + Return the empty lst if predicate evaluates to False or if lst is empty + + """ + if lst is nil: + return nil + elif not pred(car(lst)): + return nil + else: + return cons(car(lst), take_while(pred, cdr(lst))) From fe37a2535aa013b12d87c9ac3ea466c4ac350f61 Mon Sep 17 00:00:00 2001 From: Alex Vong Date: Sun, 13 Sep 2015 17:55:15 +0800 Subject: [PATCH 3/4] Fix bad formatting of man page (Closes #6510) * Makefile: It now pipe the output of devscripts/prepare_manpage.py to pod2man to produce youtube-dl.1 man page (previously was using pandoc). * devscripts/prepare_manpage.py: It now convert README.md to Perl Pod format. --- Makefile | 6 +- devscripts/prepare_manpage.py | 542 ++++++++++++++++++++++++++++++++-- 2 files changed, 522 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index fdb1abb60..0b331240f 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,8 @@ BINDIR ?= $(PREFIX)/bin MANDIR ?= $(PREFIX)/man SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python +VERSION ?= $(shell echo 'print(__version__)' | cat youtube_dl/version.py - | python) +DATE ?= $(shell echo '$(VERSION)' | sed -e 's/\./-/g') # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local ifeq ($(PREFIX),/usr) @@ -73,9 +75,7 @@ README.txt: README.md pandoc -f markdown -t plain README.md -o README.txt youtube-dl.1: README.md - python devscripts/prepare_manpage.py >youtube-dl.1.temp.md - pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1 - rm -f youtube-dl.1.temp.md + python devscripts/prepare_manpage.py | uniq | pod2man --center='User Commands' --date=$(DATE) --errors='die' --name='YOUTUBE-DL' --release=$(VERSION) --section=1 > youtube-dl.1 youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in python devscripts/bash-completion.py diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 7ece37754..8389ba773 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -2,31 +2,527 @@ from __future__ import unicode_literals import io import os.path -import sys import re +import scm -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -README_FILE = os.path.join(ROOT_DIR, 'README.md') +ROOT_DIR_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +README_PATH = os.path.join(ROOT_DIR_PATH, "README.md") -with io.open(README_FILE, encoding='utf-8') as f: - readme = f.read() +with io.open(README_PATH, encoding="utf-8") as FILE: + README_ORIG = FILE.read() -PREFIX = '''%YOUTUBE-DL(1) - -# NAME - -youtube\-dl \- download videos from youtube.com or other video platforms - -# SYNOPSIS - -**youtube-dl** \[OPTIONS\] URL [URL...] - -''' -readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) -readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) -readme = PREFIX + readme - -if sys.version_info < (3, 0): - print(readme.encode('utf-8')) +if README_ORIG: + README = "=pod\n\n=encoding utf8\n\n# NAME\n\n" + README_ORIG + "\n\n=cut\n" else: - print(readme) + README = "" + +def identity(x): return x + +def _compose(f, g): return lambda x: f(g(x)) + +def compose(*tuple_of_func): + lst_of_func = scm.tuple_to_lst(tuple_of_func) + return scm.fold(_compose, identity, scm.reverse(lst_of_func)) + +def flatten(lst): + """flatten an arbitrarily deep nested lst into a single lst""" + if lst is scm.nil: + return scm.nil + elif not scm.is_list(lst): + return scm.list(lst) + else: + return scm.append(flatten(scm.car(lst)), + flatten(scm.cdr(lst))) + +def list_subtract(lst1, lst2): + return scm.lset_difference(lambda x, y: x == y, lst1, lst2) + +def non_nil_take_while(pred, lst): + return scm.take_while(pred, + scm.drop_while(lambda x: not pred(x), + lst)) + +def string_join(lst_of_string, delimiter): + list_of_string = scm.lst_to_list(lst_of_string) + return delimiter.join(list_of_string) + +def is_not_empty_string(s_exp): + return not scm.is_string(s_exp) or bool(s_exp) + +def regex_split(pattern, string, flags=0): + """split string into lst using pattern""" + return scm.list_to_lst(re.split(pattern, string, 0, flags)) + +def remove_table_of_content(string): + pattern = r"^-[ \t]+\[[- \tA-Z]+\]\(#[-a-z]+\)[ \t]*$" + return re.sub(pattern, r"", string, 0, re.MULTILINE) + +def make_lexer(split_pattern, sub_pattern , exp_type, flags=0): + """ + Lexer is a procedure which does the following: + + 1. Split string into lst using split_pattern + 2. Transform matching string in lst using sub_pattern + and attach exp_type to it, forming an expression + + The output is a lst of tokens consisting of string and expression + + """ + def attach_exp_type_to_matching_substring(string): + if re.search(split_pattern, string, flags): + return scm.list(exp_type, + re.sub(sub_pattern, r"\1", string, 1, flags)) + else: + return string + return lambda string: scm.map(attach_exp_type_to_matching_substring, + regex_split(split_pattern, string, flags)) + +def make_parser(exp_type, post_proc): + """ + Parser is a procedure which takes the output of a lexer as input + and does the following: + + 1. Group exp_type expression and the string after it + 2. Apply post_proc to the string after exp_type expression + + The output is a tree-like structure + + """ + def is_exp(lst): return scm.is_list(scm.car(lst)) + def extract_exp(lst): return scm.car(lst) + def extract_string_after_exp(lst): return scm.cadr(lst) + def extract_rest(lst): return scm.cddr(lst) + def parse_loop(lst, accum): + if lst is scm.nil: + return accum + elif is_exp(lst): + return parse_loop(extract_rest(lst), + scm.cons(scm.list(extract_exp(lst), + post_proc(extract_string_after_exp(lst))), + accum)) + else: + return parse_loop(scm.cdr(lst), + scm.cons(post_proc(scm.car(lst)), + accum)) + return lambda lst: scm.reverse(parse_loop(lst, scm.nil)) + +def make_front_end(post_proc, split_pattern, sub_pattern, exp_type, flags=0): + """compose parser and lexer to create a front end""" + return compose(make_parser(exp_type, post_proc), + make_lexer(split_pattern, sub_pattern, exp_type, flags)) + +def connect_front_end(*tuple_of_lst): + """ + Connect front ends together + by making use of post_proc parameter of make_front_end + + """ + lst_of_lst = scm.tuple_to_lst(tuple_of_lst) + def extract_func(lst_of_lst): return scm.caar(lst_of_lst) + def extract_arg_lst(lst_of_lst): return scm.cdar(lst_of_lst) + def connect_loop(lst_of_lst, accum): + if lst_of_lst is scm.nil: + return accum + else: + return connect_loop(scm.cdr(lst_of_lst), + scm.apply(extract_func(lst_of_lst), + scm.cons(accum, + extract_arg_lst(lst_of_lst)))) + return connect_loop(lst_of_lst, identity) + +# These are patterns used to determined how to decomposite the text +# into sensible parts + +TITLE_SPLIT_PATTERN = r"^(#[ \t]+[- \tA-Z]+[ \t]*)$" +TITLE_SUB_PATTERN = r"^#[ \t]+([- \tA-Z]+)[ \t]*$" + +SUBTITLE_SPLIT_PATTERN = r"^(#{2,3}[ \t]+[^#\n]+#*[ \t]*)$" +SUBTITLE_SUB_PATTERN = r"^#{2,3}[ \t]+([^#\n]+)#*[ \t]*$" + +HYPHEN_SPLIT_PATTERN = r"^([ \t]*-[ \t]+.+)$" +HYPHEN_SUB_PATTERN = r"^[ \t]*-[ \t]+(.+)$" + +ASTERISK_SPLIT_PATTERN = r"^(\*[ \t]+[^\*\n]+)$" +ASTERISK_SUB_PATTERN = r"^\*[ \t]+([^\*\n]+)$" + +NUMBER_PATTERN = r"^(\d+\.[ \t]+.+)" + +VERBATIM_SPLIT_PATTERN = r"(```[^`]+```)" +VERBATIM_SUB_PATTERN = r"```([^`]+)```" + +# tree representing the structure of README + +AST = compose(connect_front_end(scm.list(make_front_end, + VERBATIM_SPLIT_PATTERN, + VERBATIM_SUB_PATTERN, + scm.string_to_symbol("VERBATIM")), + scm.list(make_front_end, + NUMBER_PATTERN, + NUMBER_PATTERN, + scm.string_to_symbol("NUMBER"), + re.MULTILINE), + scm.list(make_front_end, + ASTERISK_SPLIT_PATTERN, + ASTERISK_SUB_PATTERN, + scm.string_to_symbol("ASTERISK"), + re.MULTILINE), + scm.list(make_front_end, + HYPHEN_SPLIT_PATTERN, + HYPHEN_SUB_PATTERN, + scm.string_to_symbol("HYPHEN"), + re.MULTILINE), + scm.list(make_front_end, + SUBTITLE_SPLIT_PATTERN, + SUBTITLE_SUB_PATTERN, + scm.string_to_symbol("SUBTITLE"), + re.MULTILINE), + scm.list(make_front_end, + TITLE_SPLIT_PATTERN, + TITLE_SUB_PATTERN, + scm.string_to_symbol("TITLE"), + re.MULTILINE)), + remove_table_of_content) \ + (README) + +def fetch_symbol(ast, exp_type_lst): + """ + From ast, fetch symbol which is of type listed in exp_type_lst + + Note that the output is a nested lst needed to be flatten in order to be + lst of the form ( ...) + + """ + def is_not_null(s_exp): return s_exp is not scm.nil + def is_exp(s_exp): return scm.is_list(scm.car(s_exp)) + def is_exp_type(s_exp, exp_type): return scm.caar(s_exp) is exp_type + def extract_exp(s_exp): return scm.car(s_exp) + def extract_rest(s_exp): return scm.cdr(s_exp) + if not scm.is_list(ast): + return scm.nil + elif is_exp(ast) and \ + scm.any(lambda exp_type: is_exp_type(ast, exp_type), exp_type_lst): + return scm.list(extract_exp(ast), + fetch_symbol(extract_rest(ast), exp_type_lst)) + else: + return scm.append(scm.filter(is_not_null, + scm.map(lambda sub_tree: \ + fetch_symbol(sub_tree, + exp_type_lst), + ast))) + +def group_adj_element(lst): + """ + Take output of fetch_symbol as input + + Transform lst of the form + ( ...) + into lst of the form + (( ) ( ) ...) + + """ + def index_to_element(k): return scm.list_ref(lst, k) + lst_of_index_lst = scm.map(scm.list, + scm.filter(scm.is_even, + scm.iota(scm.length(lst))), + scm.filter(scm.is_odd, + scm.iota(scm.length(lst)))) + return scm.map(lambda index_lst: scm.map(index_to_element, index_lst), + lst_of_index_lst) + +EXP_TYPE_LST = scm.list(scm.string_to_symbol("TITLE"), + scm.string_to_symbol("SUBTITLE"), + scm.string_to_symbol("HYPHEN"), + scm.string_to_symbol("ASTERISK"), + scm.string_to_symbol("NUMBER"), + scm.string_to_symbol("VERBATIM")) + +# table recording the expression type of each expression symbol + +SYMBOL_TABLE = compose(group_adj_element, flatten) \ + (fetch_symbol(AST, EXP_TYPE_LST)) + +def is_list_of_string(lst): return scm.every(scm.is_string, lst) + +def is_contain_string_lst(s_exp): + if not scm.is_list(s_exp): + return False + elif is_list_of_string(s_exp): + return True + else: + return scm.any(is_contain_string_lst, s_exp) + +def join_string_lst(s_exp): + if not scm.is_list(s_exp): + return s_exp + elif is_list_of_string(s_exp): + return string_join(s_exp, "") + else: + return scm.map(join_string_lst, s_exp) + +def recursively_join_string_lst(s_exp): + if not is_contain_string_lst(s_exp): + return s_exp + else: + return recursively_join_string_lst(join_string_lst(s_exp)) + +def process_ast(proc, exp_type, ast): + """recursively apply proc with exp_type, exp_symbol and rest""" + def is_exp(s_exp): return scm.is_list(scm.car(s_exp)) + def is_exp_type(s_exp, exp_type): return scm.caar(s_exp) is exp_type + def s_exp_first(s_exp): return scm.car(flatten(s_exp)) + def extract_exp_symbol(s_exp): return scm.cadar(s_exp) + def extract_rest(s_exp): return s_exp_first(scm.cadr(ast)) + if not scm.is_list(ast): + return ast + elif is_exp(ast) and is_exp_type(ast, exp_type): + return proc(exp_type, extract_exp_symbol(ast), extract_rest(ast)) + else: + return scm.map(lambda sub_tree: process_ast(proc, exp_type, sub_tree), + ast) + +def make_back_end(proc, exp_type): + """recursively join processed tree-like structure back to string""" + return lambda ast: recursively_join_string_lst(process_ast(proc, + exp_type, + ast)) + +def verbatim_processor(exp_type, exp_symbol, rest): + """ + Create verbatim paragraph from expression with exp_type VERBATIM + + 1. remove formatter name if exists + 2. indent each sentance in the paragraph by 4 spaces + + """ + def remove_formatter_name(string): + pattern = r"^bash[ \t]*$|^python[ \t]*$" + return re.sub(pattern, r"", string, 0, re.MULTILINE) + def indent_by_4_spaces(string): + pattern = r"^(.+)" + return re.sub(pattern, r" \1", string, 0, re.MULTILINE) + return indent_by_4_spaces(remove_formatter_name(exp_symbol)) + rest + +def group_by_exp_type(exp_type, lst): + """group exp_type expression by removing non-exp_type expression""" + def is_exp_type(s_exp): return scm.car(s_exp) is exp_type + sublst = non_nil_take_while(is_exp_type, lst) + if sublst is scm.nil: + return scm.nil + else: + return scm.cons(sublst, + group_by_exp_type(exp_type, + list_subtract(lst, sublst))) + +def make_item_position_decider(func): + """ + Return a procedure which will decide if a given string of exp_type + is in the desired position specified by func + + func take a lst and return element of the desired position + + """ + def extract_exp_symbol(s_exp): return scm.cadr(s_exp) + def is_item_position(string, exp_type, symbol_table): + return scm.any(lambda exp_symbol: exp_symbol == string, + scm.map(compose(extract_exp_symbol, func), + group_by_exp_type(exp_type, symbol_table))) + return is_item_position + +def process_item(exp_type, prefix, rest, exp_symbol, symbol_table): + """process item based on the position of exp_symbol in the symbol_table""" + is_first_item = make_item_position_decider(scm.first) + is_last_item = make_item_position_decider(scm.last) + if is_first_item(exp_symbol, exp_type, symbol_table): + return "=over 7\n\n=item Z<>" + prefix + "\n\n" + rest + "\n" + elif is_last_item(exp_symbol, exp_type, symbol_table): + return "=item Z<>" + prefix + "\n\n" + rest + "\n\n=back" + else: + return "=item Z<>" + prefix + "\n\n" + rest + "\n" + +def make_item_processor(symbol_table): + """ + Return a procedure which does a case dispatch on exp_type of expression + and pass the extracted parts of expression to process_item + + """ + def make_number_item_lst(exp_symbol): + split_pattern = r"^(\d+\.[ \t]+)" + sub_pattern = r"^(\d+\.)[ \t]+" + return scm.filter(is_not_empty_string, + make_lexer(split_pattern, + sub_pattern, + scm.string_to_symbol("NUMBER"), + re.MULTILINE) \ + (exp_symbol)) + def is_exp_type(exp_type, exp_symbol): return exp_type is exp_symbol + def extract_prefix(number_item_lst): return scm.cadar(number_item_lst) + def extract_rest(number_item_lst): return scm.cadr(number_item_lst) + def process_different_items(exp_type, exp_symbol, rest): + if is_exp_type(exp_type, scm.string_to_symbol("HYPHEN")): + return process_item(exp_type, + "-", + exp_symbol, + exp_symbol, + symbol_table) + \ + rest + elif is_exp_type(exp_type, scm.string_to_symbol("ASTERISK")): + return process_item(exp_type, + "*", + exp_symbol, + exp_symbol, + symbol_table) + \ + rest + elif is_exp_type(exp_type, scm.string_to_symbol("NUMBER")): + return process_item(exp_type, + extract_prefix(make_number_item_lst(exp_symbol)), + extract_rest(make_number_item_lst(exp_symbol)), + exp_symbol, + symbol_table) + \ + rest + else: + raise TypeError("unknown exp_type of expression") + return process_different_items + +def installation_section_processor(exp_type, exp_symbol, rest): + return "" + +def append_title(string): + return "=head1 " + string + "\n\n" + +def subtitle_processor(exp_type, exp_symbol, rest): + return "=head2 " + exp_symbol + "\n\n" + rest + +def name_section_processor(exp_type, exp_symbol, rest): + """add a proper SYNOPSIS section after the NAME section""" + synopsis = "\n\n=head1 SYNOPSIS\n\nB<<< youtube-dl >>> [I<<< OPTIONS >>>] I<<< URL >>> [I<<< URL >>>...]\n\n" + return append_title(exp_symbol) + rest + synopsis + +def description_section_processor(exp_type, exp_symbol, rest): + """remove the improper synopsis in the DESCRIPTION section""" + def remove_synopsis_in_description(string): + pattern = r"^ +.+$" + return re.sub(pattern, r"", string, 0, re.MULTILINE) + return append_title(exp_symbol) + remove_synopsis_in_description(rest) + +def sentence_per_line_to_word_per_line(string): + def spaces_to_newline(string): + pattern = r" +" + return re.sub(pattern, r"\n", string, 0, re.MULTILINE) + def remove_leading_newlines(string): + pattern = r"^\n+" + return re.sub(pattern, r"", string, 0, re.MULTILINE) + def multiple_newlines_to_single_newline(string): + pattern = r"\n+" + return re.sub(pattern, r"\n", string, 0, re.MULTILINE) + return compose(multiple_newlines_to_single_newline, + remove_leading_newlines, + spaces_to_newline) \ + (string) + +def process_options(string): + """process options in the OPTIONS section""" + def short_long_opt_with_arg_processor(string): + pattern = r"^(-[^\s]+)[\s]*,[\s]*(--[^\s]+)[\s]+([^a-z\s]+)[\s]+([A-Z].+)$" + return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n,\nB<<< \2 >>>\nI<<< \3 >>>\n\n\4", string, 0, re.MULTILINE) + def short_long_opt_without_arg_processor(string): + pattern = r"^(-[^\s]+)[\s]*,[\s]*(--[^\s]+)[\s]+([A-Z].+)$" + return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n,\nB<<< \2 >>>\n\n\3", string, 0, re.MULTILINE) + def long_opt_with_arg_processor(string): + pattern = r"^(--[^\s]+)[\s]+([^a-z\s]+)[\s]+([A-Z].+)$" + return re.sub(pattern, r"\n=item\nB<<< \1 >>>\nI<<< \2 >>>\n\n\3", string, 0, re.MULTILINE) + def long_opt_without_arg_processor(string): + pattern = r"^(--[^\s]+)[\s]+([A-Z].+)$" + return re.sub(pattern, r"\n=item\nB<<< \1 >>>\n\n\2", string, 0, re.MULTILINE) + return compose(long_opt_without_arg_processor, + long_opt_with_arg_processor, + short_long_opt_without_arg_processor, + short_long_opt_with_arg_processor, + sentence_per_line_to_word_per_line) \ + (string) + +def options_section_processor(exp_type, exp_symbol, rest): + """ + Process the OPTIONS section by creating a sub_tree using front_end and + use process_options to process scm.cdr(sub_tree) + + Finally, convert the sub_tree back into string using back_end + + """ + def options_subsections_processor(exp_type, exp_symbol, rest): + return "\n=back\n\n=head2 " + \ + exp_symbol + \ + "\n\n=over 7\n\n" + \ + process_options(rest) + subtitle_split_pattern = r"^(=head2 .+)$" + subtitle_sub_pattern = r"^=head2 (.+)$" + sub_tree = connect_front_end(scm.list(make_front_end, + subtitle_split_pattern, + subtitle_sub_pattern, + scm.string_to_symbol("SUBTITLE"), + re.MULTILINE)) \ + (rest) + return append_title(exp_symbol) + \ + "=over 7\n\n" + \ + make_back_end(options_subsections_processor, + scm.string_to_symbol("SUBTITLE")) \ + (scm.cons(process_options(scm.car(sub_tree)), + scm.cdr(sub_tree))) + \ + "\n=back\n\n" + +def title_processor(exp_type, exp_symbol, rest): + """do a case dispatch on exp_type and invoke the appropriate processor""" + if exp_symbol == "INSTALLATION": + return installation_section_processor(exp_type, exp_symbol, rest) + elif exp_symbol == "NAME": + return name_section_processor(exp_type, exp_symbol, rest) + elif exp_symbol == "DESCRIPTION": + return description_section_processor(exp_type, exp_symbol, rest) + elif exp_symbol == "OPTIONS": + return options_section_processor(exp_type, exp_symbol, rest) + else: + return append_title(exp_symbol) + rest + +def bold(string): + """enclose string marked as bold by B<<< >>>""" + pattern = r"\*\*([^\*\n]+)\*\*" + return re.sub(pattern, r"B<<< \1 >>>", string, 0, re.MULTILINE) + +def italic(string): + """enclose string marked as italic by I<<< >>>""" + def asterisk_to_italic(string): + pattern = r"\*([^\*\n]+)\*" + return re.sub(pattern, r"I<<< \1 >>>", string, 0, re.MULTILINE) + def back_quote_to_italic(string): + pattern = r"`{1,2}([^`\n]+)`{1,2}" + return re.sub(pattern, r"I<<< \1 >>>", string, 0, re.MULTILINE) + return compose(back_quote_to_italic, asterisk_to_italic) \ + (string) + +def remove_internal_links(string): + pattern = r"\[([^]|\n]+)\]\(#[^\)|\n]+\)" + return re.sub(pattern, r"\1", string, 0, re.MULTILINE) + +def external_links(string): + """convert external links of the form [foo](bar) into L<<< foo|bar >>>""" + pattern = r"\[([^]|\n]+)\]\(([^\)|\n]+)\)" + return re.sub(pattern, r"L<<< \1|\2 >>>", string, 0, re.MULTILINE) + +# First, convert AST back to string using various back_ends +# Finally, postprocess the string and display it +scm.display(compose(external_links, + remove_internal_links, + italic, + bold, + make_back_end(title_processor, + scm.string_to_symbol("TITLE")), + make_back_end(subtitle_processor, + scm.string_to_symbol("SUBTITLE")), + make_back_end(make_item_processor(SYMBOL_TABLE), + scm.string_to_symbol("HYPHEN")), + make_back_end(make_item_processor(SYMBOL_TABLE), + scm.string_to_symbol("ASTERISK")), + make_back_end(make_item_processor(SYMBOL_TABLE), + scm.string_to_symbol("NUMBER")), + make_back_end(verbatim_processor, + scm.string_to_symbol("VERBATIM"))) \ + (AST)) From ec39092443ca009efa5291ec858c0898874c9546 Mon Sep 17 00:00:00 2001 From: Alex Vong Date: Sun, 13 Sep 2015 19:15:42 +0800 Subject: [PATCH 4/4] Add pod2man as build dependency in `DEVELOPER INSTRUCTIONS` section. * README.md: Add pod2man as build dependency. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 2bef26d57..dc69a77c8 100644 --- a/README.md +++ b/README.md @@ -501,6 +501,7 @@ If you want to create a build of youtube-dl yourself, you'll need * python * make * pandoc +* pod2man * zip * nosetests