[jsinterp] Actual parsing
This commit is contained in:
parent
2574721a81
commit
d328b8c6c2
@ -8,6 +8,10 @@ from .utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
__DECIMAL_RE = r'([1-9][0-9]*)|0'
|
||||||
|
__OCTAL_RE = r'0+[0-7]+'
|
||||||
|
__HEXADECIMAL_RE = r'(0[xX])[0-9a-fA-F]+'
|
||||||
|
|
||||||
_OPERATORS = [
|
_OPERATORS = [
|
||||||
('|', operator.or_),
|
('|', operator.or_),
|
||||||
('^', operator.xor),
|
('^', operator.xor),
|
||||||
@ -18,13 +22,59 @@ _OPERATORS = [
|
|||||||
('+', operator.add),
|
('+', operator.add),
|
||||||
('%', operator.mod),
|
('%', operator.mod),
|
||||||
('/', operator.truediv),
|
('/', operator.truediv),
|
||||||
('*', operator.mul),
|
('*', operator.mul)
|
||||||
]
|
]
|
||||||
_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
|
_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
|
||||||
_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
|
_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
|
||||||
|
|
||||||
|
_RESERVED_RE = r'(function|var|return)\s'
|
||||||
|
|
||||||
|
_OPERATORS_RE = r'|'.join(re.escape(op) for op, opfunc in _OPERATORS)
|
||||||
|
_ASSIGN_OPERATORS_RE = r'|'.join(re.escape(op) for op, opfunc in _ASSIGN_OPERATORS)
|
||||||
|
|
||||||
_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
|
_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
|
||||||
|
|
||||||
|
# can't use raw string, starts with " and end with '
|
||||||
|
_STRING_RE = '''"(?:[^"\\\\]*(?:\\\\\\\\|\\\\[\'"nurtbfx/\\n]))*[^"\\\\]*"|
|
||||||
|
\'(?:[^\'\\\\]*(?:\\\\\\\\|\\\\[\'"nurtbfx/\\n]))*[^\'\\\\]*\''''
|
||||||
|
|
||||||
|
_INTEGER_RE = r'%(hex)s|%(dec)s|%(oct)s' % {'hex': __HEXADECIMAL_RE, 'dec': __DECIMAL_RE, 'oct': __OCTAL_RE}
|
||||||
|
_FLOAT_RE = r'%(dec)s\.%(dec)s' % {'dec': __DECIMAL_RE}
|
||||||
|
|
||||||
|
_BOOL_RE = r'true|false'
|
||||||
|
_REGEX_RE = r'/[^/]*/' # TODO make validation work
|
||||||
|
|
||||||
|
_LITERAL_RE = r'(%(int)s|%(float)s|%(str)s|%(bool)s|%(regex)s)' % {
|
||||||
|
'int': _INTEGER_RE,
|
||||||
|
'float': _FLOAT_RE,
|
||||||
|
'str': _STRING_RE,
|
||||||
|
'bool': _BOOL_RE,
|
||||||
|
'regex': _REGEX_RE
|
||||||
|
}
|
||||||
|
_ARRAY_RE = r'\[(%(literal)s\s*,\s*)*(%(literal)s\s*)?\]' % {'literal': _LITERAL_RE} # TODO nested array
|
||||||
|
|
||||||
|
_VALUE_RE = r'(%(literal)s)|(%(array)s)' % {'literal': _LITERAL_RE, 'array': _ARRAY_RE}
|
||||||
|
_CALL_RE = r'%(name)s\s*\((%(val)s\s*,\s*)*(%(val)s\s*)?\)' % {'name': _NAME_RE, 'val': _VALUE_RE}
|
||||||
|
_PARENTHESES_RE = r'(?P<popen>\()|(?P<pclose>\))|(?P<iopen>\[)|(?P<iclose>\])'
|
||||||
|
_EXP_RE = r'''(?P<id>%(name)s)|(?P<val>%(val)s)|(?P<op>%(op)s)|%(par)s''' % {
|
||||||
|
'name': _NAME_RE,
|
||||||
|
'val': _VALUE_RE,
|
||||||
|
'op': _OPERATORS_RE,
|
||||||
|
'par': _PARENTHESES_RE
|
||||||
|
} # TODO validate expression (it's probably recursive!)
|
||||||
|
_ARRAY_ELEMENT_RE = r'%(name)s\s*\[\s*(%(index)s)\s*\]' % {'name': _NAME_RE, 'index': _EXP_RE}
|
||||||
|
|
||||||
|
token = re.compile(r'''(?x)\s*
|
||||||
|
((?P<rsv>%(rsv)s)|(?P<call>%(call)s)|(?P<field>\.%(name)s)|
|
||||||
|
(?P<assign>%(aop)s)|(%(exp)s)|
|
||||||
|
(?P<end>;))\s*''' % {
|
||||||
|
'rsv': _RESERVED_RE,
|
||||||
|
'call': _CALL_RE,
|
||||||
|
'name': _NAME_RE,
|
||||||
|
'aop': _ASSIGN_OPERATORS_RE,
|
||||||
|
'exp': _EXP_RE
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
class JSInterpreter(object):
|
class JSInterpreter(object):
|
||||||
def __init__(self, code, objects=None):
|
def __init__(self, code, objects=None):
|
||||||
@ -34,6 +84,41 @@ class JSInterpreter(object):
|
|||||||
self._functions = {}
|
self._functions = {}
|
||||||
self._objects = objects
|
self._objects = objects
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _next_statement(code, pos=0):
|
||||||
|
stmt = ''
|
||||||
|
while pos < len(code):
|
||||||
|
feed_m = token.match(code[pos:])
|
||||||
|
if feed_m:
|
||||||
|
for token_id, token_value in feed_m.groupdict().items():
|
||||||
|
if token_value is not None:
|
||||||
|
pos += feed_m.end()
|
||||||
|
if token_id == 'end':
|
||||||
|
yield stmt
|
||||||
|
stmt = ''
|
||||||
|
else:
|
||||||
|
if token_id == 'rsv':
|
||||||
|
pass
|
||||||
|
if token_id == 'call':
|
||||||
|
pass
|
||||||
|
if token_id == 'field':
|
||||||
|
pass
|
||||||
|
if token_id == 'id':
|
||||||
|
pass
|
||||||
|
if token_id == 'val':
|
||||||
|
pass
|
||||||
|
if token_id == 'popen':
|
||||||
|
pass
|
||||||
|
if token_id == 'pclose':
|
||||||
|
pass
|
||||||
|
if token_id == 'op':
|
||||||
|
pass
|
||||||
|
if token_id == 'assign':
|
||||||
|
pass
|
||||||
|
stmt += token_value
|
||||||
|
else:
|
||||||
|
raise NotImplemented("Possibly I've missed something")
|
||||||
|
|
||||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||||
if allow_recursion < 0:
|
if allow_recursion < 0:
|
||||||
raise ExtractorError('Recursion limit reached')
|
raise ExtractorError('Recursion limit reached')
|
||||||
@ -250,7 +335,7 @@ class JSInterpreter(object):
|
|||||||
def build_function(self, argnames, code):
|
def build_function(self, argnames, code):
|
||||||
def resf(args):
|
def resf(args):
|
||||||
local_vars = dict(zip(argnames, args))
|
local_vars = dict(zip(argnames, args))
|
||||||
for stmt in code.split(';'):
|
for stmt in self._next_statement(code):
|
||||||
res, abort = self.interpret_statement(stmt, local_vars)
|
res, abort = self.interpret_statement(stmt, local_vars)
|
||||||
if abort:
|
if abort:
|
||||||
break
|
break
|
||||||
|
Loading…
x
Reference in New Issue
Block a user