2014-03-30 07:02:58 +02:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
2016-12-04 19:15:35 +01:00
|
|
|
from ..utils import ExtractorError
|
|
|
|
from .tstream import TokenStream
|
2016-12-07 07:28:09 +01:00
|
|
|
from .jsgrammar import Token
|
2016-12-03 06:32:11 +01:00
|
|
|
|
2016-12-07 08:25:19 +01:00
|
|
|
_token_keys = set((Token.NULL, Token.BOOL, Token.ID, Token.STR, Token.INT, Token.FLOAT, Token.REGEX))
|
2016-11-30 07:49:47 +01:00
|
|
|
|
2016-12-06 18:42:59 +01:00
|
|
|
|
2014-03-30 07:02:58 +02:00
|
|
|
class JSInterpreter(object):
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO support json
|
2016-12-03 06:32:11 +01:00
|
|
|
undefined = object()
|
|
|
|
|
2015-02-01 22:38:26 +01:00
|
|
|
def __init__(self, code, objects=None):
|
|
|
|
if objects is None:
|
|
|
|
objects = {}
|
2015-02-18 10:47:40 +01:00
|
|
|
self.code = code
|
2014-03-30 07:02:58 +02:00
|
|
|
self._functions = {}
|
2015-02-01 22:38:26 +01:00
|
|
|
self._objects = objects
|
|
|
|
|
2016-12-03 06:32:11 +01:00
|
|
|
def _next_statement(self, token_stream, stack_top):
|
2016-12-03 13:21:03 +01:00
|
|
|
if stack_top < 0:
|
|
|
|
raise ExtractorError('Recursion limit reached')
|
2016-12-03 06:32:11 +01:00
|
|
|
# ast
|
2016-12-04 12:49:30 +01:00
|
|
|
statement = None
|
|
|
|
|
|
|
|
token_id, token_value, token_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if token_id in (Token.CCLOSE, Token.END):
|
2016-12-04 12:49:30 +01:00
|
|
|
# empty statement goes straight here
|
|
|
|
return statement
|
2016-12-07 07:28:09 +01:00
|
|
|
if token_id is Token.ID and token_value == 'function':
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse funcdecl
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Function declaration is not yet supported at %d' % token_pos)
|
2016-12-07 07:28:09 +01:00
|
|
|
elif token_id is Token.COPEN:
|
2016-12-04 12:49:30 +01:00
|
|
|
# block
|
2016-12-03 06:32:11 +01:00
|
|
|
token_stream.pop()
|
2016-12-04 12:49:30 +01:00
|
|
|
statement_list = []
|
2016-12-04 19:15:35 +01:00
|
|
|
for s in self.statements(token_stream, stack_top - 1):
|
2016-12-04 12:49:30 +01:00
|
|
|
statement_list.append(s)
|
|
|
|
token_id, token_value, token_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if token_id is Token.CCLOSE:
|
2016-12-04 12:49:30 +01:00
|
|
|
token_stream.pop()
|
|
|
|
break
|
2016-12-07 08:25:19 +01:00
|
|
|
statement = (Token.BLOCK, statement_list)
|
2016-12-07 07:28:09 +01:00
|
|
|
elif token_id is Token.ID:
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse label
|
2016-12-04 12:49:30 +01:00
|
|
|
if token_value == 'var':
|
2016-12-06 18:42:59 +01:00
|
|
|
token_stream.pop()
|
2016-12-04 12:49:30 +01:00
|
|
|
variables = []
|
|
|
|
init = []
|
|
|
|
has_another = True
|
|
|
|
while has_another:
|
|
|
|
token_id, token_value, token_pos = token_stream.pop()
|
2016-12-07 07:28:09 +01:00
|
|
|
if token_id is not Token.ID:
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Missing variable name at %d' % token_pos)
|
2016-12-04 19:15:35 +01:00
|
|
|
token_stream.chk_id(last=True)
|
2016-12-04 12:49:30 +01:00
|
|
|
variables.append(token_value)
|
2016-12-03 06:32:11 +01:00
|
|
|
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.AOP:
|
2016-12-03 06:32:11 +01:00
|
|
|
token_stream.pop()
|
2016-12-04 12:49:30 +01:00
|
|
|
init.append(self._assign_expression(token_stream, stack_top - 1))
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
|
|
|
else:
|
|
|
|
init.append(JSInterpreter.undefined)
|
|
|
|
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.END:
|
2016-12-04 12:49:30 +01:00
|
|
|
has_another = False
|
2016-12-07 07:28:09 +01:00
|
|
|
elif peek_id is Token.COMMA:
|
2016-12-04 12:49:30 +01:00
|
|
|
pass
|
2016-12-03 06:32:11 +01:00
|
|
|
else:
|
|
|
|
# FIXME automatic end insertion
|
2016-12-07 07:28:09 +01:00
|
|
|
# - token_id is Token.CCLOSE
|
2016-12-04 12:49:30 +01:00
|
|
|
# - check line terminator
|
|
|
|
# - restricted token
|
|
|
|
raise ExtractorError('Unexpected sequence %s at %d' % (peek_value, peek_pos))
|
2016-12-07 08:25:19 +01:00
|
|
|
statement = (Token.VAR, zip(variables, init))
|
2016-12-04 12:49:30 +01:00
|
|
|
elif token_value == 'if':
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse ifstatement
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Conditional statement is not yet supported at %d' % token_pos)
|
|
|
|
elif token_value in ('for', 'do', 'while'):
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse iterstatement
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Loops is not yet supported at %d' % token_pos)
|
|
|
|
elif token_value in ('break', 'continue'):
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse continue, break
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Flow control is not yet supported at %d' % token_pos)
|
|
|
|
elif token_value == 'return':
|
2016-12-06 18:42:59 +01:00
|
|
|
token_stream.pop()
|
2016-12-07 08:25:19 +01:00
|
|
|
statement = (Token.RETURN, self._expression(token_stream, stack_top - 1))
|
2016-12-04 12:49:30 +01:00
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is not Token.END:
|
2016-12-04 12:49:30 +01:00
|
|
|
# FIXME automatic end insertion
|
|
|
|
raise ExtractorError('Unexpected sequence %s at %d' % (peek_value, peek_pos))
|
|
|
|
elif token_value == 'with':
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse withstatement
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('With statement is not yet supported at %d' % token_pos)
|
|
|
|
elif token_value == 'switch':
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse switchstatement
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Switch statement is not yet supported at %d' % token_pos)
|
|
|
|
elif token_value == 'throw':
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse throwstatement
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Throw statement is not yet supported at %d' % token_pos)
|
|
|
|
elif token_value == 'try':
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse trystatement
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Try statement is not yet supported at %d' % token_pos)
|
|
|
|
elif token_value == 'debugger':
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse debuggerstatement
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Debugger statement is not yet supported at %d' % token_pos)
|
|
|
|
# expr
|
|
|
|
if statement is None:
|
|
|
|
expr_list = []
|
|
|
|
has_another = True
|
|
|
|
while has_another:
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if not (peek_id is Token.COPEN and peek_id is Token.ID and peek_value == 'function'):
|
2016-12-04 12:49:30 +01:00
|
|
|
expr_list.append(self._assign_expression(token_stream, stack_top - 1))
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-06 18:42:59 +01:00
|
|
|
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.END:
|
2016-12-04 12:49:30 +01:00
|
|
|
has_another = False
|
2016-12-07 07:28:09 +01:00
|
|
|
elif peek_id is Token.COMMA:
|
2016-12-04 12:49:30 +01:00
|
|
|
pass
|
|
|
|
else:
|
|
|
|
# FIXME automatic end insertion
|
|
|
|
raise ExtractorError('Unexpected sequence %s at %d' % (peek_value, peek_pos))
|
|
|
|
|
2016-12-07 08:25:19 +01:00
|
|
|
statement = (Token.EXPR, expr_list)
|
2016-12-04 12:49:30 +01:00
|
|
|
return statement
|
2016-12-03 06:32:11 +01:00
|
|
|
|
|
|
|
def statements(self, code=None, pos=0, stack_size=100):
|
|
|
|
if code is None:
|
|
|
|
code = self.code
|
|
|
|
ts = TokenStream(code, pos)
|
|
|
|
|
2016-12-04 12:49:30 +01:00
|
|
|
while not ts.ended:
|
|
|
|
yield self._next_statement(ts, stack_size)
|
2016-12-03 06:32:11 +01:00
|
|
|
ts.pop()
|
|
|
|
raise StopIteration
|
|
|
|
|
2016-12-04 12:49:30 +01:00
|
|
|
def _expression(self, token_stream, stack_top):
|
|
|
|
exprs = []
|
|
|
|
has_another = True
|
|
|
|
while has_another:
|
|
|
|
exprs.append(self._assign_expression(token_stream, stack_top - 1))
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.COMMA:
|
2016-12-04 12:49:30 +01:00
|
|
|
token_stream.pop()
|
2016-12-07 07:28:09 +01:00
|
|
|
elif peek_id is Token.ID and peek_value == 'yield':
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse yield
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Yield statement is not yet supported at %d' % peek_pos)
|
|
|
|
else:
|
|
|
|
has_another = False
|
2016-12-07 08:25:19 +01:00
|
|
|
return (Token.EXPR, exprs)
|
2016-12-03 06:32:11 +01:00
|
|
|
|
2016-12-04 12:49:30 +01:00
|
|
|
def _assign_expression(self, token_stream, stack_top):
|
|
|
|
if stack_top < 0:
|
|
|
|
raise ExtractorError('Recursion limit reached')
|
|
|
|
|
|
|
|
left = self._conditional_expression(token_stream, stack_top - 1)
|
2016-12-03 06:32:11 +01:00
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.AOP:
|
2016-12-03 13:21:03 +01:00
|
|
|
token_stream.pop()
|
2016-12-04 19:15:35 +01:00
|
|
|
_, op = peek_value
|
2016-12-04 12:49:30 +01:00
|
|
|
right = self._assign_expression(token_stream, stack_top - 1)
|
2016-12-03 06:32:11 +01:00
|
|
|
else:
|
2016-12-04 19:15:35 +01:00
|
|
|
op = None
|
2016-12-03 13:21:03 +01:00
|
|
|
right = None
|
2016-12-07 08:25:19 +01:00
|
|
|
return (Token.ASSIGN, op, left, right)
|
2016-11-30 07:49:47 +01:00
|
|
|
|
2016-12-04 12:49:30 +01:00
|
|
|
def _member_expression(self, token_stream, stack_top):
|
2016-12-03 13:21:03 +01:00
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.ID and peek_value == 'new':
|
2016-12-04 12:49:30 +01:00
|
|
|
token_stream.pop()
|
|
|
|
target = self._member_expression(token_stream, stack_top - 1)
|
|
|
|
args = self._arguments(token_stream, stack_top - 1)
|
|
|
|
# Rhino has check for args length
|
|
|
|
# Rhino has experimental syntax allowing an object literal to follow a new expression
|
|
|
|
else:
|
|
|
|
target = self._primary_expression(token_stream, stack_top)
|
|
|
|
args = None
|
|
|
|
|
2016-12-07 08:25:19 +01:00
|
|
|
return (Token.MEMBER, target, args, self._member_tail(token_stream, stack_top - 1))
|
2016-12-03 13:21:03 +01:00
|
|
|
|
2016-12-04 12:49:30 +01:00
|
|
|
def _member_tail(self, token_stream, stack_top):
|
2016-12-06 18:42:59 +01:00
|
|
|
if stack_top < 0:
|
|
|
|
raise ExtractorError('Recursion limit reached')
|
|
|
|
|
2016-12-03 13:21:03 +01:00
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.DOT:
|
2016-12-03 13:21:03 +01:00
|
|
|
token_stream.pop()
|
2016-12-04 12:49:30 +01:00
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.DOT:
|
2016-12-04 12:49:30 +01:00
|
|
|
token_stream.pop()
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
elif peek_id is Token.POPEN:
|
2016-12-04 12:49:30 +01:00
|
|
|
# TODO handle field query
|
|
|
|
raise ExtractorError('Field querry is not yet supported at %d' % peek_pos)
|
|
|
|
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.ID:
|
2016-12-04 12:49:30 +01:00
|
|
|
token_stream.pop()
|
2016-12-07 08:25:19 +01:00
|
|
|
return (Token.FIELD, peek_value, self._member_tail(token_stream, stack_top - 1))
|
2016-12-04 12:49:30 +01:00
|
|
|
else:
|
|
|
|
raise ExtractorError('Identifier name expected at %d' % peek_pos)
|
2016-12-07 08:25:19 +01:00
|
|
|
elif peek_id is Token.SOPEN:
|
2016-12-04 12:49:30 +01:00
|
|
|
token_stream.pop()
|
|
|
|
index = self._expression(token_stream, stack_top - 1)
|
|
|
|
token_id, token_value, token_pos = token_stream.pop()
|
2016-12-07 07:28:09 +01:00
|
|
|
if token_id is Token.SCLOSE:
|
2016-12-07 08:25:19 +01:00
|
|
|
return (Token.ELEM, index, self._member_tail(token_stream, stack_top - 1))
|
2016-12-04 12:49:30 +01:00
|
|
|
else:
|
|
|
|
raise ExtractorError('Unexpected sequence at %d' % token_pos)
|
2016-12-07 07:28:09 +01:00
|
|
|
elif peek_id is Token.POPEN:
|
2016-12-04 12:49:30 +01:00
|
|
|
args = self._arguments(token_stream, stack_top - 1)
|
2016-12-07 08:25:19 +01:00
|
|
|
return (Token.CALL, args, self._member_tail(token_stream, stack_top - 1))
|
2016-12-04 12:49:30 +01:00
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def _primary_expression(self, token_stream, stack_top):
|
2016-12-06 18:42:59 +01:00
|
|
|
if stack_top < 0:
|
|
|
|
raise ExtractorError('Recursion limit reached')
|
|
|
|
|
2016-12-04 12:49:30 +01:00
|
|
|
# TODO support let
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
|
|
|
if peek_id in _token_keys:
|
|
|
|
token_stream.pop()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.ID:
|
2016-12-04 12:49:30 +01:00
|
|
|
# this
|
|
|
|
if peek_value == 'this':
|
2016-12-07 08:25:19 +01:00
|
|
|
return (Token.RSV, 'this')
|
2016-12-04 12:49:30 +01:00
|
|
|
# function expr
|
|
|
|
elif peek_value == 'function':
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse function expression
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Function expression is not yet supported at %d' % peek_pos)
|
|
|
|
# id
|
|
|
|
else:
|
2016-12-05 11:44:32 +01:00
|
|
|
token_stream.chk_id(last=True)
|
2016-12-07 07:28:09 +01:00
|
|
|
return (Token.ID, peek_value)
|
2016-12-04 12:49:30 +01:00
|
|
|
# literals
|
|
|
|
else:
|
|
|
|
return (peek_id, peek_value)
|
|
|
|
# array
|
2016-12-07 07:28:09 +01:00
|
|
|
elif peek_id is Token.SOPEN:
|
2016-12-04 12:49:30 +01:00
|
|
|
return self._array_literal(token_stream, stack_top - 1)
|
|
|
|
# object
|
2016-12-07 07:28:09 +01:00
|
|
|
elif peek_id is Token.SCLOSE:
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse object
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Object literals is not yet supported at %d' % peek_pos)
|
|
|
|
# expr
|
2016-12-07 07:28:09 +01:00
|
|
|
elif peek_id is Token.POPEN:
|
2016-12-04 12:49:30 +01:00
|
|
|
token_stream.pop()
|
|
|
|
open_pos = peek_pos
|
|
|
|
expr = self._expression(token_stream, stack_top - 1)
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is not Token.PCLOSE:
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Unbalanced parentheses at %d' % open_pos)
|
|
|
|
token_stream.pop()
|
2016-12-07 08:25:19 +01:00
|
|
|
return (Token.EXPR, expr)
|
2016-12-04 12:49:30 +01:00
|
|
|
# empty (probably)
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def _arguments(self, token_stream, stack_top):
|
2016-12-06 18:42:59 +01:00
|
|
|
if stack_top < 0:
|
|
|
|
raise ExtractorError('Recursion limit reached')
|
|
|
|
|
2016-12-04 12:49:30 +01:00
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.POPEN:
|
2016-12-04 12:49:30 +01:00
|
|
|
token_stream.pop()
|
|
|
|
open_pos = peek_pos
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
args = []
|
|
|
|
while True:
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.PCLOSE:
|
2016-12-04 12:49:30 +01:00
|
|
|
token_stream.pop()
|
|
|
|
return args
|
|
|
|
# FIXME handle infor
|
|
|
|
args.append(self._assign_expression(token_stream, stack_top - 1))
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse generator expression
|
2016-12-04 12:49:30 +01:00
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
|
|
|
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id not in (Token.COMMA, Token.PCLOSE):
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Unbalanced parentheses at %d' % open_pos)
|
|
|
|
|
|
|
|
def _array_literal(self, token_stream, stack_top):
|
2016-12-06 18:42:59 +01:00
|
|
|
if stack_top < 0:
|
|
|
|
raise ExtractorError('Recursion limit reached')
|
|
|
|
|
|
|
|
# TODO check no linebreak
|
2016-12-04 12:49:30 +01:00
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is not Token.SOPEN:
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Array expected at %d' % peek_pos)
|
|
|
|
token_stream.pop()
|
|
|
|
elements = []
|
|
|
|
|
|
|
|
has_another = True
|
|
|
|
while has_another:
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.COMMA:
|
2016-12-04 12:49:30 +01:00
|
|
|
token_stream.pop()
|
|
|
|
elements.append(None)
|
2016-12-07 07:28:09 +01:00
|
|
|
elif peek_id is Token.SCLOSE:
|
2016-12-04 12:49:30 +01:00
|
|
|
token_stream.pop()
|
|
|
|
has_another = False
|
2016-12-07 07:28:09 +01:00
|
|
|
elif peek_id is Token.ID and peek_value == 'for':
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO parse array comprehension
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Array comprehension is not yet supported at %d' % peek_pos)
|
|
|
|
else:
|
|
|
|
elements.append(self._assign_expression(token_stream, stack_top - 1))
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.pop()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.SCLOSE:
|
2016-12-06 18:42:59 +01:00
|
|
|
has_another = False
|
2016-12-07 07:28:09 +01:00
|
|
|
elif peek_id is not Token.COMMA:
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Expected , after element at %d' % peek_pos)
|
2016-12-06 18:42:59 +01:00
|
|
|
|
2016-12-07 08:25:19 +01:00
|
|
|
return (Token.ARRAY, elements)
|
2016-12-04 12:49:30 +01:00
|
|
|
|
|
|
|
def _conditional_expression(self, token_stream, stack_top):
|
2016-12-06 18:42:59 +01:00
|
|
|
if stack_top < 0:
|
|
|
|
raise ExtractorError('Recursion limit reached')
|
|
|
|
|
2016-12-04 12:49:30 +01:00
|
|
|
expr = self._operator_expression(token_stream, stack_top - 1)
|
2016-12-03 13:21:03 +01:00
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.HOOK:
|
2016-12-03 13:21:03 +01:00
|
|
|
hook_pos = peek_pos
|
2016-12-04 12:49:30 +01:00
|
|
|
true_expr = self._assign_expression(token_stream, stack_top - 1)
|
2016-12-03 13:21:03 +01:00
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.COLON:
|
2016-12-04 12:49:30 +01:00
|
|
|
false_expr = self._assign_expression(token_stream, stack_top - 1)
|
2016-12-03 13:21:03 +01:00
|
|
|
else:
|
|
|
|
raise ExtractorError('Missing : in conditional expression at %d' % hook_pos)
|
2016-12-07 08:25:19 +01:00
|
|
|
return (Token.COND, expr, true_expr, false_expr)
|
2016-12-05 11:44:32 +01:00
|
|
|
return expr
|
2016-12-03 13:21:03 +01:00
|
|
|
|
2016-12-04 12:49:30 +01:00
|
|
|
def _operator_expression(self, token_stream, stack_top):
|
2016-12-06 18:42:59 +01:00
|
|
|
if stack_top < 0:
|
|
|
|
raise ExtractorError('Recursion limit reached')
|
|
|
|
|
2016-12-04 12:49:30 +01:00
|
|
|
# --<---------------------------------<-- op --<--------------------------<----
|
|
|
|
# | |
|
|
|
|
# | --<-- prefix --<-- -->-- postfix -->-- |
|
|
|
|
# | | ^ ^ | ^
|
|
|
|
# v v | | v |
|
|
|
|
# ->------------>----------->-- lefthand-side expression -->----------->------------>---|
|
|
|
|
#
|
2016-12-03 13:21:03 +01:00
|
|
|
# 20 grouping
|
|
|
|
# ... # handled by lefthandside_expression
|
|
|
|
# 17 postfix
|
|
|
|
# 16 unary
|
|
|
|
# 15 exponentiation # not yet found in grammar
|
|
|
|
# 14 mul
|
|
|
|
# 13 add
|
|
|
|
# 12 shift
|
|
|
|
# 11 rel
|
|
|
|
# 10 eq
|
|
|
|
# 9 band
|
|
|
|
# 8 bxor
|
|
|
|
# 7 bor
|
|
|
|
# 6 land
|
|
|
|
# 5 lor
|
|
|
|
# 4 cond # handled by conditional_expression
|
|
|
|
|
2016-12-04 12:49:30 +01:00
|
|
|
out = []
|
|
|
|
stack = []
|
|
|
|
|
2016-12-03 13:21:03 +01:00
|
|
|
has_another = True
|
|
|
|
while has_another:
|
2016-12-04 12:49:30 +01:00
|
|
|
had_inc = False
|
|
|
|
has_prefix = True
|
|
|
|
while has_prefix:
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.UOP:
|
2016-12-05 11:44:32 +01:00
|
|
|
name, op = peek_value
|
2016-12-07 07:28:09 +01:00
|
|
|
had_inc = name in (Token.INC, Token.DEC)
|
2016-12-06 18:42:59 +01:00
|
|
|
while stack and stack[-1][0] > 16:
|
2016-12-05 11:44:32 +01:00
|
|
|
_, stack_id, stack_op = stack.pop()
|
|
|
|
out.append((stack_id, stack_op))
|
|
|
|
stack.append((16, peek_id, op))
|
2016-12-04 12:49:30 +01:00
|
|
|
token_stream.pop()
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-07 07:28:09 +01:00
|
|
|
if had_inc and peek_id is not Token.ID:
|
2016-12-04 12:49:30 +01:00
|
|
|
raise ExtractorError('Prefix operator has to be followed by an identifier at %d' % peek_pos)
|
2016-12-07 07:28:09 +01:00
|
|
|
has_prefix = peek_id is Token.UOP
|
2016-12-04 12:49:30 +01:00
|
|
|
else:
|
|
|
|
has_prefix = False
|
2016-12-03 13:21:03 +01:00
|
|
|
|
2016-12-04 12:49:30 +01:00
|
|
|
left = self._member_expression(token_stream, stack_top - 1)
|
2016-12-03 13:21:03 +01:00
|
|
|
out.append(left)
|
|
|
|
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
2016-12-04 12:49:30 +01:00
|
|
|
# postfix
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.UOP:
|
2016-12-04 12:49:30 +01:00
|
|
|
if had_inc:
|
|
|
|
raise ExtractorError('''Can't have prefix and postfix operator at the same time at %d''' % peek_pos)
|
2016-12-03 13:21:03 +01:00
|
|
|
name, op = peek_value
|
2016-12-07 07:28:09 +01:00
|
|
|
if name in (Token.INC, Token.DEC):
|
2016-12-04 12:49:30 +01:00
|
|
|
prec = 17
|
2016-12-03 13:21:03 +01:00
|
|
|
else:
|
|
|
|
raise ExtractorError('Unexpected operator at %d' % peek_pos)
|
2016-12-06 18:42:59 +01:00
|
|
|
while stack and stack[-1][0] >= 17:
|
2016-12-05 11:44:32 +01:00
|
|
|
_, stack_id, stack_op = stack.pop()
|
|
|
|
out.append((stack_id, stack_op))
|
|
|
|
stack.append((prec, peek_id, op))
|
2016-12-04 12:49:30 +01:00
|
|
|
token_stream.pop()
|
|
|
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
|
|
|
|
2016-12-07 07:28:09 +01:00
|
|
|
if peek_id is Token.REL:
|
2016-12-03 13:21:03 +01:00
|
|
|
name, op = peek_value
|
2016-12-07 07:28:09 +01:00
|
|
|
elif peek_id is Token.OP:
|
2016-12-03 13:21:03 +01:00
|
|
|
name, op = peek_value
|
2016-12-07 07:28:09 +01:00
|
|
|
if name in (Token.MUL, Token.DIV, Token.MOD):
|
2016-12-03 13:21:03 +01:00
|
|
|
prec = 14
|
2016-12-07 07:28:09 +01:00
|
|
|
elif name in (Token.ADD, Token.SUB):
|
2016-12-03 13:21:03 +01:00
|
|
|
prec = 13
|
2016-12-07 07:28:09 +01:00
|
|
|
elif name in (Token.RSHIFT, Token.LSHIFT, Token.URSHIFT):
|
2016-12-03 13:21:03 +01:00
|
|
|
prec = 12
|
2016-12-07 07:28:09 +01:00
|
|
|
elif name is Token.BAND:
|
2016-12-03 13:21:03 +01:00
|
|
|
prec = 9
|
2016-12-07 07:28:09 +01:00
|
|
|
elif name is Token.BXOR:
|
2016-12-03 13:21:03 +01:00
|
|
|
prec = 8
|
2016-12-07 07:28:09 +01:00
|
|
|
elif name is Token.BOR:
|
2016-12-03 13:21:03 +01:00
|
|
|
prec = 7
|
|
|
|
else:
|
|
|
|
raise ExtractorError('Unexpected operator at %d' % peek_pos)
|
2016-12-07 07:28:09 +01:00
|
|
|
elif peek_id is Token.LOP:
|
2016-12-03 13:21:03 +01:00
|
|
|
name, op = peek_value
|
2016-12-07 07:28:09 +01:00
|
|
|
prec = {Token.OR: 5, Token.AND: 6}[name]
|
2016-12-03 13:21:03 +01:00
|
|
|
else:
|
|
|
|
has_another = False
|
2016-12-06 18:42:59 +01:00
|
|
|
prec = 4 # empties stack
|
2016-12-03 13:21:03 +01:00
|
|
|
|
2016-12-06 18:42:59 +01:00
|
|
|
while stack and stack[-1][0] >= prec:
|
2016-12-05 11:44:32 +01:00
|
|
|
_, stack_id, stack_op = stack.pop()
|
|
|
|
out.append((stack_id, stack_op))
|
2016-12-03 13:21:03 +01:00
|
|
|
if has_another:
|
2016-12-05 11:44:32 +01:00
|
|
|
stack.append((prec, peek_id, op))
|
2016-12-03 13:21:03 +01:00
|
|
|
token_stream.pop()
|
|
|
|
|
2016-12-07 08:25:19 +01:00
|
|
|
return (Token.OPEXPR, out)
|
2016-12-03 13:21:03 +01:00
|
|
|
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO use context instead local_vars in argument
|
2016-12-07 08:25:19 +01:00
|
|
|
|
2016-12-06 18:42:59 +01:00
|
|
|
def getvalue(self, ref, local_vars):
|
2016-12-07 19:41:06 +01:00
|
|
|
if ref is None or ref is self.undefined or isinstance(ref, (int, float, str)):
|
2016-12-07 07:28:09 +01:00
|
|
|
return ref
|
2016-12-06 18:42:59 +01:00
|
|
|
ref_id, ref_value = ref
|
2016-12-07 07:28:09 +01:00
|
|
|
if ref_id is Token.ID:
|
2016-12-06 18:42:59 +01:00
|
|
|
return local_vars[ref_value]
|
|
|
|
elif ref_id in _token_keys:
|
|
|
|
return ref_value
|
2016-12-07 08:25:19 +01:00
|
|
|
elif ref_id is Token.EXPR:
|
2016-12-07 19:41:06 +01:00
|
|
|
ref, _ = self.interpret_statement(ref_value, local_vars)
|
2016-12-06 18:42:59 +01:00
|
|
|
return self.getvalue(ref, local_vars)
|
2016-12-07 19:41:06 +01:00
|
|
|
elif ref_id is Token.ARRAY:
|
|
|
|
array = []
|
|
|
|
for expr in ref_value:
|
|
|
|
array.append(self.interpret_expression(expr, local_vars))
|
|
|
|
return array
|
|
|
|
else:
|
|
|
|
raise ExtractorError('Unable to get value of reference type %s' % ref_id)
|
|
|
|
|
|
|
|
def putvalue(self, ref, value, local_vars):
|
|
|
|
ref_id, ref_value = ref
|
|
|
|
if ref_id is Token.ID:
|
|
|
|
local_vars[ref_value] = value
|
2016-12-06 18:42:59 +01:00
|
|
|
|
|
|
|
def interpret_statement(self, stmt, local_vars):
|
|
|
|
if stmt is None:
|
|
|
|
return None, False
|
|
|
|
|
|
|
|
name = stmt[0]
|
|
|
|
ref = None
|
|
|
|
abort = False
|
|
|
|
if name == 'funcdecl':
|
|
|
|
# TODO interpret funcdecl
|
|
|
|
raise ExtractorError('''Can't interpret statement called %s''' % name)
|
2016-12-07 08:25:19 +01:00
|
|
|
elif name is Token.BLOCK:
|
2016-12-06 18:42:59 +01:00
|
|
|
block = stmt[1]
|
|
|
|
for stmt in block:
|
|
|
|
s, abort = self.interpret_statement(stmt, local_vars)
|
|
|
|
if s is not None:
|
|
|
|
ref = self.getvalue(s, local_vars)
|
2016-12-07 08:25:19 +01:00
|
|
|
elif name is Token.VAR:
|
2016-12-06 18:42:59 +01:00
|
|
|
for name, value in stmt[1]:
|
|
|
|
local_vars[name] = self.getvalue(self.interpret_expression(value, local_vars), local_vars)
|
2016-12-07 08:25:19 +01:00
|
|
|
elif name is Token.EXPR:
|
2016-12-06 18:42:59 +01:00
|
|
|
for expr in stmt[1]:
|
|
|
|
ref = self.interpret_expression(expr, local_vars)
|
|
|
|
# if
|
|
|
|
# continue, break
|
2016-12-07 08:25:19 +01:00
|
|
|
elif name is Token.RETURN:
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO use context instead returning abort
|
|
|
|
ref, abort = self.interpret_statement(stmt[1], local_vars)
|
|
|
|
ref = self.getvalue(ref, local_vars)
|
2016-12-07 19:41:06 +01:00
|
|
|
if isinstance(ref, list):
|
|
|
|
# TODO deal with nested arrays
|
|
|
|
ref = [self.getvalue(elem, local_vars) for elem in ref]
|
|
|
|
|
2016-12-06 18:42:59 +01:00
|
|
|
abort = True
|
|
|
|
# with
|
|
|
|
# label
|
|
|
|
# switch
|
|
|
|
# throw
|
|
|
|
# try
|
|
|
|
# debugger
|
|
|
|
else:
|
|
|
|
raise ExtractorError('''Can't interpret statement called %s''' % name)
|
|
|
|
return ref, abort
|
|
|
|
|
|
|
|
def interpret_expression(self, expr, local_vars):
|
|
|
|
name = expr[0]
|
2016-12-07 08:25:19 +01:00
|
|
|
if name is Token.ASSIGN:
|
2016-12-06 18:42:59 +01:00
|
|
|
op, left, right = expr[1:]
|
|
|
|
if op is None:
|
|
|
|
return self.interpret_expression(left, local_vars)
|
|
|
|
else:
|
|
|
|
# TODO handle undeclared variables (create propery)
|
2016-12-07 19:41:06 +01:00
|
|
|
leftref = self.interpret_expression(left, local_vars)
|
|
|
|
leftvalue = self.getvalue(leftref, local_vars)
|
2016-12-06 18:42:59 +01:00
|
|
|
rightvalue = self.getvalue(self.interpret_expression(right, local_vars), local_vars)
|
2016-12-07 19:41:06 +01:00
|
|
|
# TODO set array element
|
|
|
|
leftref = op(leftvalue, rightvalue)
|
|
|
|
return leftref
|
|
|
|
elif name is Token.EXPR:
|
|
|
|
ref, _ = self.interpret_statement(expr, local_vars)
|
|
|
|
return ref
|
2016-12-07 08:25:19 +01:00
|
|
|
elif name is Token.OPEXPR:
|
2016-12-06 18:42:59 +01:00
|
|
|
stack = []
|
2016-12-07 19:41:06 +01:00
|
|
|
rpn = expr[1][:]
|
2016-12-06 18:42:59 +01:00
|
|
|
while rpn:
|
|
|
|
token = rpn.pop(0)
|
2016-12-07 07:28:09 +01:00
|
|
|
if token[0] in (Token.OP, Token.AOP, Token.UOP, Token.LOP, Token.REL):
|
2016-12-06 18:42:59 +01:00
|
|
|
right = stack.pop()
|
|
|
|
left = stack.pop()
|
2016-12-07 07:28:09 +01:00
|
|
|
stack.append(token[1](self.getvalue(left, local_vars), self.getvalue(right, local_vars)))
|
|
|
|
elif token[0] is Token.UOP:
|
2016-12-06 18:42:59 +01:00
|
|
|
right = stack.pop()
|
|
|
|
stack.append(token[1](self.getvalue(right, local_vars)))
|
|
|
|
else:
|
|
|
|
stack.append(self.interpret_expression(token, local_vars))
|
|
|
|
result = stack.pop()
|
|
|
|
if not stack:
|
|
|
|
return result
|
|
|
|
else:
|
|
|
|
raise ExtractorError('Expression has too many values')
|
|
|
|
|
2016-12-07 08:25:19 +01:00
|
|
|
elif name is Token.MEMBER:
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO interpret member
|
|
|
|
target, args, tail = expr[1:]
|
|
|
|
while tail is not None:
|
|
|
|
tail_name, tail_value, tail = tail
|
2016-12-07 08:25:19 +01:00
|
|
|
if tail_name is Token.FIELD:
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO interpret field
|
|
|
|
raise ExtractorError('''Can't interpret expression called %s''' % tail_name)
|
2016-12-07 08:25:19 +01:00
|
|
|
elif tail_name is Token.ELEM:
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO interpret element
|
2016-12-07 19:41:06 +01:00
|
|
|
# raise ExtractorError('''Can't interpret expression called %s''' % tail_name)
|
|
|
|
ret, _ = self.interpret_statement(tail_value, local_vars)
|
|
|
|
index = self.getvalue(ret, local_vars)
|
|
|
|
target = self.getvalue(target, local_vars)
|
|
|
|
target = self.interpret_expression((Token.MEMBER, target[index], args, tail), local_vars)
|
2016-12-07 08:25:19 +01:00
|
|
|
elif tail_name is Token.CALL:
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO interpret call
|
|
|
|
raise ExtractorError('''Can't interpret expression called %s''' % tail_name)
|
|
|
|
return target
|
2016-12-07 19:41:06 +01:00
|
|
|
elif name in (Token.ID, Token.ARRAY):
|
|
|
|
return self.getvalue(expr, local_vars)
|
2016-12-06 18:42:59 +01:00
|
|
|
# literal
|
|
|
|
elif name in _token_keys:
|
2016-12-07 19:41:06 +01:00
|
|
|
return expr
|
2016-12-06 18:42:59 +01:00
|
|
|
|
|
|
|
else:
|
|
|
|
raise ExtractorError('''Can't interpret expression called %s''' % name)
|
2014-03-30 07:02:58 +02:00
|
|
|
|
2014-07-15 22:46:39 +02:00
|
|
|
def extract_object(self, objname):
|
|
|
|
obj = {}
|
|
|
|
obj_m = re.search(
|
|
|
|
(r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
|
2015-11-24 07:45:02 +01:00
|
|
|
r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' +
|
2014-07-15 22:46:39 +02:00
|
|
|
r'\}\s*;',
|
|
|
|
self.code)
|
|
|
|
fields = obj_m.group('fields')
|
|
|
|
# Currently, it only supports function definitions
|
|
|
|
fields_m = re.finditer(
|
2014-07-23 02:13:48 +02:00
|
|
|
r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function'
|
2014-07-15 22:46:39 +02:00
|
|
|
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
|
|
|
|
fields)
|
|
|
|
for f in fields_m:
|
|
|
|
argnames = f.group('args').split(',')
|
|
|
|
obj[f.group('key')] = self.build_function(argnames, f.group('code'))
|
|
|
|
|
|
|
|
return obj
|
|
|
|
|
2014-03-30 07:02:58 +02:00
|
|
|
def extract_function(self, funcname):
|
|
|
|
func_m = re.search(
|
2015-02-01 22:38:26 +01:00
|
|
|
r'''(?x)
|
2016-06-23 09:41:34 +07:00
|
|
|
(?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
|
2015-02-01 22:38:26 +01:00
|
|
|
\((?P<args>[^)]*)\)\s*
|
|
|
|
\{(?P<code>[^}]+)\}''' % (
|
2015-11-10 12:54:02 +08:00
|
|
|
re.escape(funcname), re.escape(funcname), re.escape(funcname)),
|
2014-03-30 07:02:58 +02:00
|
|
|
self.code)
|
2014-03-30 07:15:14 +02:00
|
|
|
if func_m is None:
|
|
|
|
raise ExtractorError('Could not find JS function %r' % funcname)
|
2014-03-30 07:02:58 +02:00
|
|
|
argnames = func_m.group('args').split(',')
|
|
|
|
|
2014-07-15 22:46:39 +02:00
|
|
|
return self.build_function(argnames, func_m.group('code'))
|
|
|
|
|
2015-02-01 22:38:26 +01:00
|
|
|
def call_function(self, funcname, *args):
|
|
|
|
f = self.extract_function(funcname)
|
|
|
|
return f(args)
|
|
|
|
|
2014-07-15 22:46:39 +02:00
|
|
|
def build_function(self, argnames, code):
|
2014-03-30 07:02:58 +02:00
|
|
|
def resf(args):
|
|
|
|
local_vars = dict(zip(argnames, args))
|
2016-12-03 06:32:11 +01:00
|
|
|
for stmt in self.statements(code):
|
2016-12-06 18:42:59 +01:00
|
|
|
res, abort = self.interpret_statement(stmt, local_vars)
|
|
|
|
if abort:
|
|
|
|
break
|
|
|
|
return res
|
2016-12-03 13:21:03 +01:00
|
|
|
return resf
|