[jsinterp] Token class for tokens

This commit is contained in:
sulyi 2016-12-07 07:28:09 +01:00
parent d422aefc03
commit ce4a616c4a
3 changed files with 157 additions and 148 deletions

View File

@ -1,6 +1,20 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
from enum import Enum
class Token(Enum):
COPEN, CCLOSE, POPEN, PCLOSE, SOPEN, SCLOSE = range(0,6)
DOT, END, COMMA, HOOK, COLON = range(6, 11)
AND, OR, INC, DEC, NOT, BNOT, DEL, VOID, TYPE = range(11, 20)
LT, GT, LE, GE, EQ, NE, SEQ, SNE = range(20, 28)
BOR, BXOR, BAND, RSHIFT, LSHIFT, URSHIFT, SUB, ADD, MOD, DIV, MUL = range(28, 39)
OP, AOP, UOP, LOP, REL = range(39, 44)
COMMENT, TOKEN, PUNCT = range(44, 47)
NULL, BOOL, ID, STR, INT, FLOAT, REGEX = range(47, 54)
reflag, rebody = 54, 55
__DECIMAL_RE = r'(?:[1-9][0-9]*)|0' __DECIMAL_RE = r'(?:[1-9][0-9]*)|0'
__OCTAL_RE = r'0[0-7]+' __OCTAL_RE = r'0[0-7]+'
@ -41,23 +55,24 @@ _REGEX_FLAGS_RE = r'(?![gimy]*(?P<reflag>[gimy])[gimy]*(?P=reflag))(?P<reflags>[
_REGEX_RE = r'/(?!\*)(?P<rebody>(?:[^/\n]|(?:\\/))*)/(?:(?:%s)|(?:\s|$))' % _REGEX_FLAGS_RE _REGEX_RE = r'/(?!\*)(?P<rebody>(?:[^/\n]|(?:\\/))*)/(?:(?:%s)|(?:\s|$))' % _REGEX_FLAGS_RE
_TOKENS = [ _TOKENS = [
('null', _NULL_RE), (Token.NULL, _NULL_RE),
('bool', _BOOL_RE), (Token.BOOL, _BOOL_RE),
('id', _NAME_RE), (Token.ID, _NAME_RE),
('str', _STRING_RE), (Token.STR, _STRING_RE),
('int', _INTEGER_RE), (Token.INT, _INTEGER_RE),
('float', _FLOAT_RE), (Token.FLOAT, _FLOAT_RE),
('regex', _REGEX_RE) (Token.REGEX, _REGEX_RE)
] ]
COMMENT_RE = r'(?P<comment>/\*(?:(?!\*/)(?:\n|.))*\*/)' COMMENT_RE = r'(?P<%s>/\*(?:(?!\*/)(?:\n|.))*\*/)' % Token.COMMENT.name
TOKENS_RE = r'|'.join('(?P<%(id)s>%(value)s)' % {'id': name, 'value': value} TOKENS_RE = r'|'.join('(?P<%(id)s>%(value)s)' % {'id': name.name, 'value': value}
for name, value in _TOKENS) for name, value in _TOKENS)
LOGICAL_OPERATORS_RE = r'(?P<lop>%s)' % r'|'.join(re.escape(value) for value in _logical_operator) LOGICAL_OPERATORS_RE = r'(?P<%s>%s)' % (Token.LOP.name, r'|'.join(re.escape(value) for value in _logical_operator))
UNARY_OPERATORS_RE = r'(?P<uop>%s)' % r'|'.join(re.escape(value) for value in _unary_operator) UNARY_OPERATORS_RE = r'(?P<%s>%s)' % (Token.UOP.name, r'|'.join(re.escape(value) for value in _unary_operator))
ASSIGN_OPERATORS_RE = r'(?P<aop>%s)' % r'|'.join(re.escape(value) if value != '=' else re.escape(value) + r'(?!\=)' ASSIGN_OPERATORS_RE = r'(?P<%s>%s)' % (Token.AOP.name,
for value in _assign_operator) r'|'.join(re.escape(value) if value != '=' else re.escape(value) + r'(?!\=)'
OPERATORS_RE = r'(?P<op>%s)' % r'|'.join(re.escape(value) for value in _operator) for value in _assign_operator))
RELATIONS_RE = r'(?P<rel>{0:s})'.format(r'|'.join(re.escape(value) for value in _relation)) OPERATORS_RE = r'(?P<%s>%s)' % (Token.OP.name, r'|'.join(re.escape(value) for value in _operator))
PUNCTUATIONS_RE = r'(?P<punc>%s)' % r'|'.join(re.escape(value) for value in _punctuations) RELATIONS_RE = r'(?P<%s>%s)' % (Token.REL.name, r'|'.join(re.escape(value) for value in _relation))
PUNCTUATIONS_RE = r'(?P<%s>%s)' % (Token.PUNCT.name, r'|'.join(re.escape(value) for value in _punctuations))

View File

@ -4,8 +4,9 @@ import re
from ..utils import ExtractorError from ..utils import ExtractorError
from .tstream import TokenStream from .tstream import TokenStream
from .jsgrammar import Token
_token_keys = 'null', 'bool', 'id', 'str', 'int', 'float', 'regex' _token_keys = Token.NULL, Token.BOOL, Token.ID, Token.STR, Token.INT, Token.FLOAT, Token.REGEX
class JSInterpreter(object): class JSInterpreter(object):
@ -26,24 +27,24 @@ class JSInterpreter(object):
statement = None statement = None
token_id, token_value, token_pos = token_stream.peek() token_id, token_value, token_pos = token_stream.peek()
if token_id in ('cclose', 'end'): if token_id in (Token.CCLOSE, Token.END):
# empty statement goes straight here # empty statement goes straight here
return statement return statement
if token_id == 'id' and token_value == 'function': if token_id is Token.ID and token_value == 'function':
# TODO parse funcdecl # TODO parse funcdecl
raise ExtractorError('Function declaration is not yet supported at %d' % token_pos) raise ExtractorError('Function declaration is not yet supported at %d' % token_pos)
elif token_id == 'copen': elif token_id is Token.COPEN:
# block # block
token_stream.pop() token_stream.pop()
statement_list = [] statement_list = []
for s in self.statements(token_stream, stack_top - 1): for s in self.statements(token_stream, stack_top - 1):
statement_list.append(s) statement_list.append(s)
token_id, token_value, token_pos = token_stream.peek() token_id, token_value, token_pos = token_stream.peek()
if token_id == 'cclose': if token_id is Token.CCLOSE:
token_stream.pop() token_stream.pop()
break break
statement = ('block', statement_list) statement = ('block', statement_list)
elif token_id == 'id': elif token_id is Token.ID:
# TODO parse label # TODO parse label
if token_value == 'var': if token_value == 'var':
token_stream.pop() token_stream.pop()
@ -52,26 +53,26 @@ class JSInterpreter(object):
has_another = True has_another = True
while has_another: while has_another:
token_id, token_value, token_pos = token_stream.pop() token_id, token_value, token_pos = token_stream.pop()
if token_id != 'id': if token_id is not Token.ID:
raise ExtractorError('Missing variable name at %d' % token_pos) raise ExtractorError('Missing variable name at %d' % token_pos)
token_stream.chk_id(last=True) token_stream.chk_id(last=True)
variables.append(token_value) variables.append(token_value)
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id == 'aop': if peek_id is Token.AOP:
token_stream.pop() token_stream.pop()
init.append(self._assign_expression(token_stream, stack_top - 1)) init.append(self._assign_expression(token_stream, stack_top - 1))
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
else: else:
init.append(JSInterpreter.undefined) init.append(JSInterpreter.undefined)
if peek_id == 'end': if peek_id is Token.END:
has_another = False has_another = False
elif peek_id == 'comma': elif peek_id is Token.COMMA:
pass pass
else: else:
# FIXME automatic end insertion # FIXME automatic end insertion
# - token_id == cclose # - token_id is Token.CCLOSE
# - check line terminator # - check line terminator
# - restricted token # - restricted token
raise ExtractorError('Unexpected sequence %s at %d' % (peek_value, peek_pos)) raise ExtractorError('Unexpected sequence %s at %d' % (peek_value, peek_pos))
@ -89,7 +90,7 @@ class JSInterpreter(object):
token_stream.pop() token_stream.pop()
statement = ('return', self._expression(token_stream, stack_top - 1)) statement = ('return', self._expression(token_stream, stack_top - 1))
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id != 'end': if peek_id is not Token.END:
# FIXME automatic end insertion # FIXME automatic end insertion
raise ExtractorError('Unexpected sequence %s at %d' % (peek_value, peek_pos)) raise ExtractorError('Unexpected sequence %s at %d' % (peek_value, peek_pos))
elif token_value == 'with': elif token_value == 'with':
@ -113,13 +114,13 @@ class JSInterpreter(object):
has_another = True has_another = True
while has_another: while has_another:
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if not (peek_id == 'copen' and peek_id == 'id' and peek_value == 'function'): if not (peek_id is Token.COPEN and peek_id is Token.ID and peek_value == 'function'):
expr_list.append(self._assign_expression(token_stream, stack_top - 1)) expr_list.append(self._assign_expression(token_stream, stack_top - 1))
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id == 'end': if peek_id is Token.END:
has_another = False has_another = False
elif peek_id == 'comma': elif peek_id is Token.COMMA:
pass pass
else: else:
# FIXME automatic end insertion # FIXME automatic end insertion
@ -144,9 +145,9 @@ class JSInterpreter(object):
while has_another: while has_another:
exprs.append(self._assign_expression(token_stream, stack_top - 1)) exprs.append(self._assign_expression(token_stream, stack_top - 1))
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id == 'comma': if peek_id is Token.COMMA:
token_stream.pop() token_stream.pop()
elif peek_id == 'id' and peek_value == 'yield': elif peek_id is Token.ID and peek_value == 'yield':
# TODO parse yield # TODO parse yield
raise ExtractorError('Yield statement is not yet supported at %d' % peek_pos) raise ExtractorError('Yield statement is not yet supported at %d' % peek_pos)
else: else:
@ -159,7 +160,7 @@ class JSInterpreter(object):
left = self._conditional_expression(token_stream, stack_top - 1) left = self._conditional_expression(token_stream, stack_top - 1)
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id == 'aop': if peek_id is Token.AOP:
token_stream.pop() token_stream.pop()
_, op = peek_value _, op = peek_value
right = self._assign_expression(token_stream, stack_top - 1) right = self._assign_expression(token_stream, stack_top - 1)
@ -170,7 +171,7 @@ class JSInterpreter(object):
def _member_expression(self, token_stream, stack_top): def _member_expression(self, token_stream, stack_top):
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id == 'id' and peek_value == 'new': if peek_id is Token.ID and peek_value == 'new':
token_stream.pop() token_stream.pop()
target = self._member_expression(token_stream, stack_top - 1) target = self._member_expression(token_stream, stack_top - 1)
args = self._arguments(token_stream, stack_top - 1) args = self._arguments(token_stream, stack_top - 1)
@ -187,30 +188,30 @@ class JSInterpreter(object):
raise ExtractorError('Recursion limit reached') raise ExtractorError('Recursion limit reached')
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id == 'dot': if peek_id is Token.DOT:
token_stream.pop() token_stream.pop()
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id == 'dot': if peek_id is Token.DOT:
token_stream.pop() token_stream.pop()
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
elif peek_id == 'popen': elif peek_id is Token.POPEN:
# TODO handle field query # TODO handle field query
raise ExtractorError('Field querry is not yet supported at %d' % peek_pos) raise ExtractorError('Field querry is not yet supported at %d' % peek_pos)
if peek_id == 'id': if peek_id is Token.ID:
token_stream.pop() token_stream.pop()
return ('field', peek_value, self._member_tail(token_stream, stack_top - 1)) return ('field', peek_value, self._member_tail(token_stream, stack_top - 1))
else: else:
raise ExtractorError('Identifier name expected at %d' % peek_pos) raise ExtractorError('Identifier name expected at %d' % peek_pos)
elif peek_id == 'sopen': elif peek_id is Token.POPEN:
token_stream.pop() token_stream.pop()
index = self._expression(token_stream, stack_top - 1) index = self._expression(token_stream, stack_top - 1)
token_id, token_value, token_pos = token_stream.pop() token_id, token_value, token_pos = token_stream.pop()
if token_id == 'sclose': if token_id is Token.SCLOSE:
return ('element', index, self._member_tail(token_stream, stack_top - 1)) return ('element', index, self._member_tail(token_stream, stack_top - 1))
else: else:
raise ExtractorError('Unexpected sequence at %d' % token_pos) raise ExtractorError('Unexpected sequence at %d' % token_pos)
elif peek_id == 'popen': elif peek_id is Token.POPEN:
args = self._arguments(token_stream, stack_top - 1) args = self._arguments(token_stream, stack_top - 1)
return ('call', args, self._member_tail(token_stream, stack_top - 1)) return ('call', args, self._member_tail(token_stream, stack_top - 1))
else: else:
@ -224,7 +225,7 @@ class JSInterpreter(object):
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id in _token_keys: if peek_id in _token_keys:
token_stream.pop() token_stream.pop()
if peek_id == 'id': if peek_id is Token.ID:
# this # this
if peek_value == 'this': if peek_value == 'this':
return ('rsv', 'this') return ('rsv', 'this')
@ -235,24 +236,24 @@ class JSInterpreter(object):
# id # id
else: else:
token_stream.chk_id(last=True) token_stream.chk_id(last=True)
return ('id', peek_value) return (Token.ID, peek_value)
# literals # literals
else: else:
return (peek_id, peek_value) return (peek_id, peek_value)
# array # array
elif peek_id == 'sopen': elif peek_id is Token.SOPEN:
return self._array_literal(token_stream, stack_top - 1) return self._array_literal(token_stream, stack_top - 1)
# object # object
elif peek_id == 'copen': elif peek_id is Token.SCLOSE:
# TODO parse object # TODO parse object
raise ExtractorError('Object literals is not yet supported at %d' % peek_pos) raise ExtractorError('Object literals is not yet supported at %d' % peek_pos)
# expr # expr
elif peek_id == 'popen': elif peek_id is Token.POPEN:
token_stream.pop() token_stream.pop()
open_pos = peek_pos open_pos = peek_pos
expr = self._expression(token_stream, stack_top - 1) expr = self._expression(token_stream, stack_top - 1)
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id != 'pclose': if peek_id is not Token.PCLOSE:
raise ExtractorError('Unbalanced parentheses at %d' % open_pos) raise ExtractorError('Unbalanced parentheses at %d' % open_pos)
token_stream.pop() token_stream.pop()
return ('expr', expr) return ('expr', expr)
@ -265,7 +266,7 @@ class JSInterpreter(object):
raise ExtractorError('Recursion limit reached') raise ExtractorError('Recursion limit reached')
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id == 'popen': if peek_id is Token.POPEN:
token_stream.pop() token_stream.pop()
open_pos = peek_pos open_pos = peek_pos
else: else:
@ -273,7 +274,7 @@ class JSInterpreter(object):
args = [] args = []
while True: while True:
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id == 'pclose': if peek_id is Token.PCLOSE:
token_stream.pop() token_stream.pop()
return args return args
# FIXME handle infor # FIXME handle infor
@ -281,7 +282,7 @@ class JSInterpreter(object):
# TODO parse generator expression # TODO parse generator expression
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id not in ('comma', 'pclose'): if peek_id not in (Token.COMMA, Token.PCLOSE):
raise ExtractorError('Unbalanced parentheses at %d' % open_pos) raise ExtractorError('Unbalanced parentheses at %d' % open_pos)
def _array_literal(self, token_stream, stack_top): def _array_literal(self, token_stream, stack_top):
@ -290,7 +291,7 @@ class JSInterpreter(object):
# TODO check no linebreak # TODO check no linebreak
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id != 'sopen': if peek_id is not Token.SOPEN:
raise ExtractorError('Array expected at %d' % peek_pos) raise ExtractorError('Array expected at %d' % peek_pos)
token_stream.pop() token_stream.pop()
elements = [] elements = []
@ -298,21 +299,21 @@ class JSInterpreter(object):
has_another = True has_another = True
while has_another: while has_another:
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id == 'comma': if peek_id is Token.COMMA:
token_stream.pop() token_stream.pop()
elements.append(None) elements.append(None)
elif peek_id == 'sclose': elif peek_id is Token.SCLOSE:
token_stream.pop() token_stream.pop()
has_another = False has_another = False
elif peek_id == 'id' and peek_value == 'for': elif peek_id is Token.ID and peek_value == 'for':
# TODO parse array comprehension # TODO parse array comprehension
raise ExtractorError('Array comprehension is not yet supported at %d' % peek_pos) raise ExtractorError('Array comprehension is not yet supported at %d' % peek_pos)
else: else:
elements.append(self._assign_expression(token_stream, stack_top - 1)) elements.append(self._assign_expression(token_stream, stack_top - 1))
peek_id, peek_value, peek_pos = token_stream.pop() peek_id, peek_value, peek_pos = token_stream.pop()
if peek_id == 'sclose': if peek_id is Token.SCLOSE:
has_another = False has_another = False
elif peek_id != 'comma': elif peek_id is not Token.COMMA:
raise ExtractorError('Expected , after element at %d' % peek_pos) raise ExtractorError('Expected , after element at %d' % peek_pos)
return ('array', elements) return ('array', elements)
@ -323,11 +324,11 @@ class JSInterpreter(object):
expr = self._operator_expression(token_stream, stack_top - 1) expr = self._operator_expression(token_stream, stack_top - 1)
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id == 'hook': if peek_id is Token.HOOK:
hook_pos = peek_pos hook_pos = peek_pos
true_expr = self._assign_expression(token_stream, stack_top - 1) true_expr = self._assign_expression(token_stream, stack_top - 1)
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id == 'colon': if peek_id is Token.COLON:
false_expr = self._assign_expression(token_stream, stack_top - 1) false_expr = self._assign_expression(token_stream, stack_top - 1)
else: else:
raise ExtractorError('Missing : in conditional expression at %d' % hook_pos) raise ExtractorError('Missing : in conditional expression at %d' % hook_pos)
@ -371,18 +372,18 @@ class JSInterpreter(object):
has_prefix = True has_prefix = True
while has_prefix: while has_prefix:
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id == 'uop': if peek_id is Token.UOP:
name, op = peek_value name, op = peek_value
had_inc = name in ('inc', 'dec') had_inc = name in (Token.INC, Token.DEC)
while stack and stack[-1][0] > 16: while stack and stack[-1][0] > 16:
_, stack_id, stack_op = stack.pop() _, stack_id, stack_op = stack.pop()
out.append((stack_id, stack_op)) out.append((stack_id, stack_op))
stack.append((16, peek_id, op)) stack.append((16, peek_id, op))
token_stream.pop() token_stream.pop()
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if had_inc and peek_id != 'id': if had_inc and peek_id is not Token.ID:
raise ExtractorError('Prefix operator has to be followed by an identifier at %d' % peek_pos) raise ExtractorError('Prefix operator has to be followed by an identifier at %d' % peek_pos)
has_prefix = peek_id == 'uop' has_prefix = peek_id is Token.UOP
else: else:
has_prefix = False has_prefix = False
@ -391,11 +392,11 @@ class JSInterpreter(object):
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
# postfix # postfix
if peek_id == 'uop': if peek_id is Token.UOP:
if had_inc: if had_inc:
raise ExtractorError('''Can't have prefix and postfix operator at the same time at %d''' % peek_pos) raise ExtractorError('''Can't have prefix and postfix operator at the same time at %d''' % peek_pos)
name, op = peek_value name, op = peek_value
if name in ('inc', 'dec'): if name in (Token.INC, Token.DEC):
prec = 17 prec = 17
else: else:
raise ExtractorError('Unexpected operator at %d' % peek_pos) raise ExtractorError('Unexpected operator at %d' % peek_pos)
@ -406,27 +407,27 @@ class JSInterpreter(object):
token_stream.pop() token_stream.pop()
peek_id, peek_value, peek_pos = token_stream.peek() peek_id, peek_value, peek_pos = token_stream.peek()
if peek_id == 'rel': if peek_id is Token.REL:
name, op = peek_value name, op = peek_value
elif peek_id == 'op': elif peek_id is Token.OP:
name, op = peek_value name, op = peek_value
if name in ('mul', 'div', 'mod'): if name in (Token.MUL, Token.DIV, Token.MOD):
prec = 14 prec = 14
elif name in ('add', 'sub'): elif name in (Token.ADD, Token.SUB):
prec = 13 prec = 13
elif name.endswith('shift'): elif name in (Token.RSHIFT, Token.LSHIFT, Token.URSHIFT):
prec = 12 prec = 12
elif name == 'band': elif name is Token.BAND:
prec = 9 prec = 9
elif name == 'bxor': elif name is Token.BXOR:
prec = 8 prec = 8
elif name == 'bor': elif name is Token.BOR:
prec = 7 prec = 7
else: else:
raise ExtractorError('Unexpected operator at %d' % peek_pos) raise ExtractorError('Unexpected operator at %d' % peek_pos)
elif peek_id == 'lop': elif peek_id is Token.LOP:
name, op = peek_value name, op = peek_value
prec = {'or': 5, 'and': 6}[name] prec = {Token.OR: 5, Token.AND: 6}[name]
else: else:
has_another = False has_another = False
prec = 4 # empties stack prec = 4 # empties stack
@ -441,12 +442,12 @@ class JSInterpreter(object):
return ('rpn', out) return ('rpn', out)
# TODO use context instead local_vars in argument # TODO use context instead local_vars in argument
def getvalue(self, ref, local_vars): def getvalue(self, ref, local_vars):
if ref is None: if ref is None or ref is self.undefined or isinstance(ref, (int, float, str)): # not Token
return None return ref
ref_id, ref_value = ref ref_id, ref_value = ref
if ref_id == 'id': if ref_id is Token.ID:
return local_vars[ref_value] return local_vars[ref_value]
elif ref_id in _token_keys: elif ref_id in _token_keys:
return ref_value return ref_value
@ -512,20 +513,11 @@ class JSInterpreter(object):
rpn = expr[1] rpn = expr[1]
while rpn: while rpn:
token = rpn.pop(0) token = rpn.pop(0)
if token[0] in ('op', 'aop', 'lop', 'rel'): if token[0] in (Token.OP, Token.AOP, Token.UOP, Token.LOP, Token.REL):
right = stack.pop() right = stack.pop()
left = stack.pop() left = stack.pop()
result = token[1](self.getvalue(left, local_vars), self.getvalue(right, local_vars)) stack.append(token[1](self.getvalue(left, local_vars), self.getvalue(right, local_vars)))
if type(result) == int: elif token[0] is Token.UOP:
type_id = 'int'
elif type(result) == float:
type_id = 'float'
elif type(result) == str:
type_id = 'str'
else:
type_id = str(type(result))
stack.append((type_id, result))
elif token[0] == 'uop':
right = stack.pop() right = stack.pop()
stack.append(token[1](self.getvalue(right, local_vars))) stack.append(token[1](self.getvalue(right, local_vars)))
else: else:
@ -551,7 +543,7 @@ class JSInterpreter(object):
# TODO interpret call # TODO interpret call
raise ExtractorError('''Can't interpret expression called %s''' % tail_name) raise ExtractorError('''Can't interpret expression called %s''' % tail_name)
return target return target
elif name == 'id': elif name is Token.ID:
return local_vars[expr[1]] return local_vars[expr[1]]
# literal # literal

View File

@ -12,71 +12,72 @@ from .jsgrammar import (
UNARY_OPERATORS_RE, UNARY_OPERATORS_RE,
RELATIONS_RE, RELATIONS_RE,
ASSIGN_OPERATORS_RE, ASSIGN_OPERATORS_RE,
OPERATORS_RE OPERATORS_RE,
Token
) )
_PUNCTUATIONS = { _PUNCTUATIONS = {
'{': 'copen', '{': Token.COPEN,
'}': 'cclose', '}': Token.CCLOSE,
'(': 'popen', '(': Token.POPEN,
')': 'pclose', ')': Token.PCLOSE,
'[': 'sopen', '[': Token.SOPEN,
']': 'sclose', ']': Token.SCLOSE,
'.': 'dot', '.': Token.DOT,
';': 'end', ';': Token.END,
',': 'comma', ',': Token.COMMA,
'?': 'hook', '?': Token.HOOK,
':': 'colon' ':': Token.COLON
} }
_LOGICAL_OPERATORS = { _LOGICAL_OPERATORS = {
'&&': ('and', lambda cur, right: cur and right), '&&': (Token.AND, lambda cur, right: cur and right),
'||': ('or', lambda cur, right: cur or right) '||': (Token.OR, lambda cur, right: cur or right)
} }
_UNARY_OPERATORS = { _UNARY_OPERATORS = {
'++': ('inc', lambda cur: cur + 1), '++': (Token.INC, lambda cur: cur + 1),
'--': ('dec', lambda cur: cur - 1), '--': (Token.DEC, lambda cur: cur - 1),
'!': ('not', operator.not_), '!': (Token.NOT, operator.not_),
'~': ('bnot', lambda cur: cur ^ -1), '~': (Token.BNOT, lambda cur: cur ^ -1),
# XXX define these operators # XXX define these operators
'delete': ('del', None), 'delete': (Token.DEL, None),
'void': ('void', None), 'void': (Token.VOID, None),
'typeof': ('type', lambda cur: type(cur)) 'typeof': (Token.TYPE, lambda cur: type(cur))
} }
_RELATIONS = { _RELATIONS = {
'<': ('lt', operator.lt), '<': (Token.LT, operator.lt),
'>': ('gt', operator.gt), '>': (Token.GT, operator.gt),
'<=': ('le', operator.le), '<=': (Token.LE, operator.le),
'>=': ('ge', operator.ge), '>=': (Token.GE, operator.ge),
# XXX check python and JavaScript equality difference # XXX check python and JavaScript equality difference
'==': ('eq', operator.eq), '==': (Token.EQ, operator.eq),
'!=': ('ne', operator.ne), '!=': (Token.NE, operator.ne),
'===': ('seq', lambda cur, right: cur == right and type(cur) == type(right)), '===': (Token.SEQ, lambda cur, right: cur == right and type(cur) == type(right)),
'!==': ('sne', lambda cur, right: not cur == right or not type(cur) == type(right)) '!==': (Token.SNE, lambda cur, right: not cur == right or not type(cur) == type(right))
} }
_OPERATORS = { _OPERATORS = {
'|': ('bor', operator.or_), '|': (Token.BOR, operator.or_),
'^': ('bxor', operator.xor), '^': (Token.BXOR, operator.xor),
'&': ('band', operator.and_), '&': (Token.BAND, operator.and_),
# NOTE convert to int before shift float # NOTE convert to int before shift float
'>>': ('rshift', operator.rshift), '>>': (Token.RSHIFT, operator.rshift),
'<<': ('lshift', operator.lshift), '<<': (Token.LSHIFT, operator.lshift),
'>>>': ('urshift', lambda cur, right: cur >> right if cur >= 0 else (cur + 0x100000000) >> right), '>>>': (Token.URSHIFT, lambda cur, right: cur >> right if cur >= 0 else (cur + 0x100000000) >> right),
'-': ('sub', operator.sub), '-': (Token.SUB, operator.sub),
'+': ('add', operator.add), '+': (Token.ADD, operator.add),
'%': ('mod', operator.mod), '%': (Token.MOD, operator.mod),
'/': ('div', operator.truediv), '/': (Token.DIV, operator.truediv),
'*': ('mul', operator.mul) '*': (Token.MUL, operator.mul)
} }
_ASSIGN_OPERATORS = dict((op + '=', ('set_%s' % token[0], token[1])) for op, token in _OPERATORS.items()) _ASSIGN_OPERATORS = dict((op + '=', ('set_%s' % token[0], token[1])) for op, token in _OPERATORS.items())
_ASSIGN_OPERATORS['='] = ('set', lambda cur, right: right) _ASSIGN_OPERATORS['='] = ('set', lambda cur, right: right)
_operator_lookup = { _operator_lookup = {
'op': _OPERATORS, Token.OP: _OPERATORS,
'aop': _ASSIGN_OPERATORS, Token.AOP: _ASSIGN_OPERATORS,
'uop': _UNARY_OPERATORS, Token.UOP: _UNARY_OPERATORS,
'lop': _LOGICAL_OPERATORS, Token.LOP: _LOGICAL_OPERATORS,
'rel': _RELATIONS Token.REL: _RELATIONS
} }
# only to check ids # only to check ids
_reserved_words = ('break', 'case', 'catch', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'finally', _reserved_words = ('break', 'case', 'catch', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'finally',
@ -109,29 +110,30 @@ class TokenStream(object):
token_id = feed_m.lastgroup token_id = feed_m.lastgroup
token_value = feed_m.group(token_id) token_value = feed_m.group(token_id)
pos = feed_m.start(token_id) pos = feed_m.start(token_id)
token_id = Token[token_id]
self.ended = feed_m.end() >= len(self.code) # because how yield works self.ended = feed_m.end() >= len(self.code) # because how yield works
if token_id == 'comment': if token_id is Token.COMMENT:
pass pass
# TODO date # TODO date
elif token_id == 'null': elif token_id is Token.NULL:
yield (token_id, None, pos) yield (token_id, None, pos)
elif token_id == 'bool': elif token_id is Token.BOOL:
yield (token_id, {'true': True, 'false': False}[token_value], pos) yield (token_id, {'true': True, 'false': False}[token_value], pos)
elif token_id == 'str': elif token_id is Token.STR:
yield (token_id, token_value, pos) yield (token_id, token_value, pos)
elif token_id == 'int': elif token_id is Token.INT:
yield (token_id, int(token_value), pos) yield (token_id, int(token_value), pos)
elif token_id == 'float': elif token_id is Token.FLOAT:
yield (token_id, float(token_value), pos) yield (token_id, float(token_value), pos)
elif token_id == 'regex': elif token_id is Token.REGEX:
# TODO error handling # TODO error handling
regex = re.compile(feed_m.group('rebody')) regex = re.compile(feed_m.group('rebody'))
yield (token_id, (regex, feed_m.group('reflags')), pos) yield (token_id, (regex, feed_m.group('reflags')), pos)
elif token_id == 'id': elif token_id is Token.ID:
yield (token_id, token_value, pos) yield (token_id, token_value, pos)
elif token_id in _operator_lookup: elif token_id in _operator_lookup:
yield (token_id, _operator_lookup[token_id][token_value], pos) yield (token_id, _operator_lookup[token_id][token_value], pos)
elif token_id == 'punc': elif token_id is Token.PUNCT:
yield (_PUNCTUATIONS[token_value], token_value, pos) yield (_PUNCTUATIONS[token_value], token_value, pos)
else: else:
raise ExtractorError('Unexpected token at %d' % pos) raise ExtractorError('Unexpected token at %d' % pos)
@ -145,14 +147,14 @@ class TokenStream(object):
name, value, pos = self._last name, value, pos = self._last
else: else:
name, value, pos = self.peek() name, value, pos = self.peek()
if name != 'id' or value in _reserved_words: if name is not Token.ID or value in _reserved_words:
raise ExtractorError('Invalid identifier at %d' % pos) raise ExtractorError('Invalid identifier at %d' % pos)
def peek(self, count=1): def peek(self, count=1):
for _ in range(count - len(self.peeked)): for _ in range(count - len(self.peeked)):
token = next(self._ts, None) token = next(self._ts, None)
if token is None: if token is None:
self.peeked.append(('end', ';', len(self.code))) self.peeked.append((Token.END, ';', len(self.code)))
else: else:
self.peeked.append(token) self.peeked.append(token)
return self.peeked[count - 1] return self.peeked[count - 1]