diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index c0cf8bd99..f2453775b 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -10,7 +10,7 @@ from .utils import ( __DECIMAL_RE = r'(?:[1-9][0-9]*)|0' __OCTAL_RE = r'0+[0-7]+' -__HEXADECIMAL_RE = r'(0[xX])[0-9a-fA-F]+' +__HEXADECIMAL_RE = r'0[xX][0-9a-fA-F]+' _OPERATORS = [ ('|', operator.or_), @@ -27,6 +27,7 @@ _OPERATORS = [ _ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS] _ASSIGN_OPERATORS.append(('=', lambda cur, right: right)) +# TODO flow control and others probably _RESERVED_RE = r'(?:function|var|(?Preturn))\s' _OPERATORS_RE = r'|'.join(re.escape(op) for op, opfunc in _OPERATORS) @@ -43,11 +44,10 @@ _FLOAT_RE = r'(%(dec)s)?\.%(dec)s' % {'dec': __DECIMAL_RE} _BOOL_RE = r'true|false' # TODO check if they can be multiline -# XXX: it seams group cannot be refed this way -# r'/(?=[^*])[^/\n]*/(?![gimy]*(?P[gimy])[gimy]*\g)[gimy]{0,4}' -_REGEX_RE = r'''/(?=[^*]) - ((\\([tnvfr0.\\+*?^$\[\]{}()|/]|[0-7]{3}|x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|c[A-Z]|))| - [^/\n])*/[gimy]{0,4}''' +# r'''/(?=[^*]) +# ((\\([tnvfr0.\\+*?^$\[\]{}()|/]|[0-7]{3}|x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|c[A-Z]|))|[^/\n])* +# /(?:(?![gimy]*(?P[gimy])[gimy]*(?P=flag))[gimy]{0,4}\b|\s|\n|$)''' +_REGEX_RE = r'\/(?!\*)([^/\n]|\/)*\/(?:(?![gimy]*(?P[gimy])[gimy]*(?P=flag))[gimy]{0,4}\b|\s|\n|$)' _LITERAL_RE = r'((?P%(int)s)|(?P%(float)s)|(?P%(str)s)|(?P%(bool)s)|(?P%(regex)s))' % { 'int': _INTEGER_RE, @@ -56,10 +56,9 @@ _LITERAL_RE = r'((?P%(int)s)|(?P%(float)s)|(?P%(str)s)|(?P%(comment)s)|(?P%(rsv)s)|