[jsinterp] Token class for tokens
This commit is contained in:
parent
d422aefc03
commit
ce4a616c4a
@ -1,6 +1,20 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class Token(Enum):
|
||||||
|
COPEN, CCLOSE, POPEN, PCLOSE, SOPEN, SCLOSE = range(0,6)
|
||||||
|
DOT, END, COMMA, HOOK, COLON = range(6, 11)
|
||||||
|
AND, OR, INC, DEC, NOT, BNOT, DEL, VOID, TYPE = range(11, 20)
|
||||||
|
LT, GT, LE, GE, EQ, NE, SEQ, SNE = range(20, 28)
|
||||||
|
BOR, BXOR, BAND, RSHIFT, LSHIFT, URSHIFT, SUB, ADD, MOD, DIV, MUL = range(28, 39)
|
||||||
|
OP, AOP, UOP, LOP, REL = range(39, 44)
|
||||||
|
COMMENT, TOKEN, PUNCT = range(44, 47)
|
||||||
|
NULL, BOOL, ID, STR, INT, FLOAT, REGEX = range(47, 54)
|
||||||
|
reflag, rebody = 54, 55
|
||||||
|
|
||||||
|
|
||||||
__DECIMAL_RE = r'(?:[1-9][0-9]*)|0'
|
__DECIMAL_RE = r'(?:[1-9][0-9]*)|0'
|
||||||
__OCTAL_RE = r'0[0-7]+'
|
__OCTAL_RE = r'0[0-7]+'
|
||||||
@ -41,23 +55,24 @@ _REGEX_FLAGS_RE = r'(?![gimy]*(?P<reflag>[gimy])[gimy]*(?P=reflag))(?P<reflags>[
|
|||||||
_REGEX_RE = r'/(?!\*)(?P<rebody>(?:[^/\n]|(?:\\/))*)/(?:(?:%s)|(?:\s|$))' % _REGEX_FLAGS_RE
|
_REGEX_RE = r'/(?!\*)(?P<rebody>(?:[^/\n]|(?:\\/))*)/(?:(?:%s)|(?:\s|$))' % _REGEX_FLAGS_RE
|
||||||
|
|
||||||
_TOKENS = [
|
_TOKENS = [
|
||||||
('null', _NULL_RE),
|
(Token.NULL, _NULL_RE),
|
||||||
('bool', _BOOL_RE),
|
(Token.BOOL, _BOOL_RE),
|
||||||
('id', _NAME_RE),
|
(Token.ID, _NAME_RE),
|
||||||
('str', _STRING_RE),
|
(Token.STR, _STRING_RE),
|
||||||
('int', _INTEGER_RE),
|
(Token.INT, _INTEGER_RE),
|
||||||
('float', _FLOAT_RE),
|
(Token.FLOAT, _FLOAT_RE),
|
||||||
('regex', _REGEX_RE)
|
(Token.REGEX, _REGEX_RE)
|
||||||
]
|
]
|
||||||
|
|
||||||
COMMENT_RE = r'(?P<comment>/\*(?:(?!\*/)(?:\n|.))*\*/)'
|
COMMENT_RE = r'(?P<%s>/\*(?:(?!\*/)(?:\n|.))*\*/)' % Token.COMMENT.name
|
||||||
TOKENS_RE = r'|'.join('(?P<%(id)s>%(value)s)' % {'id': name, 'value': value}
|
TOKENS_RE = r'|'.join('(?P<%(id)s>%(value)s)' % {'id': name.name, 'value': value}
|
||||||
for name, value in _TOKENS)
|
for name, value in _TOKENS)
|
||||||
|
|
||||||
LOGICAL_OPERATORS_RE = r'(?P<lop>%s)' % r'|'.join(re.escape(value) for value in _logical_operator)
|
LOGICAL_OPERATORS_RE = r'(?P<%s>%s)' % (Token.LOP.name, r'|'.join(re.escape(value) for value in _logical_operator))
|
||||||
UNARY_OPERATORS_RE = r'(?P<uop>%s)' % r'|'.join(re.escape(value) for value in _unary_operator)
|
UNARY_OPERATORS_RE = r'(?P<%s>%s)' % (Token.UOP.name, r'|'.join(re.escape(value) for value in _unary_operator))
|
||||||
ASSIGN_OPERATORS_RE = r'(?P<aop>%s)' % r'|'.join(re.escape(value) if value != '=' else re.escape(value) + r'(?!\=)'
|
ASSIGN_OPERATORS_RE = r'(?P<%s>%s)' % (Token.AOP.name,
|
||||||
for value in _assign_operator)
|
r'|'.join(re.escape(value) if value != '=' else re.escape(value) + r'(?!\=)'
|
||||||
OPERATORS_RE = r'(?P<op>%s)' % r'|'.join(re.escape(value) for value in _operator)
|
for value in _assign_operator))
|
||||||
RELATIONS_RE = r'(?P<rel>{0:s})'.format(r'|'.join(re.escape(value) for value in _relation))
|
OPERATORS_RE = r'(?P<%s>%s)' % (Token.OP.name, r'|'.join(re.escape(value) for value in _operator))
|
||||||
PUNCTUATIONS_RE = r'(?P<punc>%s)' % r'|'.join(re.escape(value) for value in _punctuations)
|
RELATIONS_RE = r'(?P<%s>%s)' % (Token.REL.name, r'|'.join(re.escape(value) for value in _relation))
|
||||||
|
PUNCTUATIONS_RE = r'(?P<%s>%s)' % (Token.PUNCT.name, r'|'.join(re.escape(value) for value in _punctuations))
|
||||||
|
@ -4,8 +4,9 @@ import re
|
|||||||
|
|
||||||
from ..utils import ExtractorError
|
from ..utils import ExtractorError
|
||||||
from .tstream import TokenStream
|
from .tstream import TokenStream
|
||||||
|
from .jsgrammar import Token
|
||||||
|
|
||||||
_token_keys = 'null', 'bool', 'id', 'str', 'int', 'float', 'regex'
|
_token_keys = Token.NULL, Token.BOOL, Token.ID, Token.STR, Token.INT, Token.FLOAT, Token.REGEX
|
||||||
|
|
||||||
|
|
||||||
class JSInterpreter(object):
|
class JSInterpreter(object):
|
||||||
@ -26,24 +27,24 @@ class JSInterpreter(object):
|
|||||||
statement = None
|
statement = None
|
||||||
|
|
||||||
token_id, token_value, token_pos = token_stream.peek()
|
token_id, token_value, token_pos = token_stream.peek()
|
||||||
if token_id in ('cclose', 'end'):
|
if token_id in (Token.CCLOSE, Token.END):
|
||||||
# empty statement goes straight here
|
# empty statement goes straight here
|
||||||
return statement
|
return statement
|
||||||
if token_id == 'id' and token_value == 'function':
|
if token_id is Token.ID and token_value == 'function':
|
||||||
# TODO parse funcdecl
|
# TODO parse funcdecl
|
||||||
raise ExtractorError('Function declaration is not yet supported at %d' % token_pos)
|
raise ExtractorError('Function declaration is not yet supported at %d' % token_pos)
|
||||||
elif token_id == 'copen':
|
elif token_id is Token.COPEN:
|
||||||
# block
|
# block
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
statement_list = []
|
statement_list = []
|
||||||
for s in self.statements(token_stream, stack_top - 1):
|
for s in self.statements(token_stream, stack_top - 1):
|
||||||
statement_list.append(s)
|
statement_list.append(s)
|
||||||
token_id, token_value, token_pos = token_stream.peek()
|
token_id, token_value, token_pos = token_stream.peek()
|
||||||
if token_id == 'cclose':
|
if token_id is Token.CCLOSE:
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
break
|
break
|
||||||
statement = ('block', statement_list)
|
statement = ('block', statement_list)
|
||||||
elif token_id == 'id':
|
elif token_id is Token.ID:
|
||||||
# TODO parse label
|
# TODO parse label
|
||||||
if token_value == 'var':
|
if token_value == 'var':
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
@ -52,26 +53,26 @@ class JSInterpreter(object):
|
|||||||
has_another = True
|
has_another = True
|
||||||
while has_another:
|
while has_another:
|
||||||
token_id, token_value, token_pos = token_stream.pop()
|
token_id, token_value, token_pos = token_stream.pop()
|
||||||
if token_id != 'id':
|
if token_id is not Token.ID:
|
||||||
raise ExtractorError('Missing variable name at %d' % token_pos)
|
raise ExtractorError('Missing variable name at %d' % token_pos)
|
||||||
token_stream.chk_id(last=True)
|
token_stream.chk_id(last=True)
|
||||||
variables.append(token_value)
|
variables.append(token_value)
|
||||||
|
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id == 'aop':
|
if peek_id is Token.AOP:
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
init.append(self._assign_expression(token_stream, stack_top - 1))
|
init.append(self._assign_expression(token_stream, stack_top - 1))
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
else:
|
else:
|
||||||
init.append(JSInterpreter.undefined)
|
init.append(JSInterpreter.undefined)
|
||||||
|
|
||||||
if peek_id == 'end':
|
if peek_id is Token.END:
|
||||||
has_another = False
|
has_another = False
|
||||||
elif peek_id == 'comma':
|
elif peek_id is Token.COMMA:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
# FIXME automatic end insertion
|
# FIXME automatic end insertion
|
||||||
# - token_id == cclose
|
# - token_id is Token.CCLOSE
|
||||||
# - check line terminator
|
# - check line terminator
|
||||||
# - restricted token
|
# - restricted token
|
||||||
raise ExtractorError('Unexpected sequence %s at %d' % (peek_value, peek_pos))
|
raise ExtractorError('Unexpected sequence %s at %d' % (peek_value, peek_pos))
|
||||||
@ -89,7 +90,7 @@ class JSInterpreter(object):
|
|||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
statement = ('return', self._expression(token_stream, stack_top - 1))
|
statement = ('return', self._expression(token_stream, stack_top - 1))
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id != 'end':
|
if peek_id is not Token.END:
|
||||||
# FIXME automatic end insertion
|
# FIXME automatic end insertion
|
||||||
raise ExtractorError('Unexpected sequence %s at %d' % (peek_value, peek_pos))
|
raise ExtractorError('Unexpected sequence %s at %d' % (peek_value, peek_pos))
|
||||||
elif token_value == 'with':
|
elif token_value == 'with':
|
||||||
@ -113,13 +114,13 @@ class JSInterpreter(object):
|
|||||||
has_another = True
|
has_another = True
|
||||||
while has_another:
|
while has_another:
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if not (peek_id == 'copen' and peek_id == 'id' and peek_value == 'function'):
|
if not (peek_id is Token.COPEN and peek_id is Token.ID and peek_value == 'function'):
|
||||||
expr_list.append(self._assign_expression(token_stream, stack_top - 1))
|
expr_list.append(self._assign_expression(token_stream, stack_top - 1))
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
|
|
||||||
if peek_id == 'end':
|
if peek_id is Token.END:
|
||||||
has_another = False
|
has_another = False
|
||||||
elif peek_id == 'comma':
|
elif peek_id is Token.COMMA:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
# FIXME automatic end insertion
|
# FIXME automatic end insertion
|
||||||
@ -144,9 +145,9 @@ class JSInterpreter(object):
|
|||||||
while has_another:
|
while has_another:
|
||||||
exprs.append(self._assign_expression(token_stream, stack_top - 1))
|
exprs.append(self._assign_expression(token_stream, stack_top - 1))
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id == 'comma':
|
if peek_id is Token.COMMA:
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
elif peek_id == 'id' and peek_value == 'yield':
|
elif peek_id is Token.ID and peek_value == 'yield':
|
||||||
# TODO parse yield
|
# TODO parse yield
|
||||||
raise ExtractorError('Yield statement is not yet supported at %d' % peek_pos)
|
raise ExtractorError('Yield statement is not yet supported at %d' % peek_pos)
|
||||||
else:
|
else:
|
||||||
@ -159,7 +160,7 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
left = self._conditional_expression(token_stream, stack_top - 1)
|
left = self._conditional_expression(token_stream, stack_top - 1)
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id == 'aop':
|
if peek_id is Token.AOP:
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
_, op = peek_value
|
_, op = peek_value
|
||||||
right = self._assign_expression(token_stream, stack_top - 1)
|
right = self._assign_expression(token_stream, stack_top - 1)
|
||||||
@ -170,7 +171,7 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
def _member_expression(self, token_stream, stack_top):
|
def _member_expression(self, token_stream, stack_top):
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id == 'id' and peek_value == 'new':
|
if peek_id is Token.ID and peek_value == 'new':
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
target = self._member_expression(token_stream, stack_top - 1)
|
target = self._member_expression(token_stream, stack_top - 1)
|
||||||
args = self._arguments(token_stream, stack_top - 1)
|
args = self._arguments(token_stream, stack_top - 1)
|
||||||
@ -187,30 +188,30 @@ class JSInterpreter(object):
|
|||||||
raise ExtractorError('Recursion limit reached')
|
raise ExtractorError('Recursion limit reached')
|
||||||
|
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id == 'dot':
|
if peek_id is Token.DOT:
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id == 'dot':
|
if peek_id is Token.DOT:
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
elif peek_id == 'popen':
|
elif peek_id is Token.POPEN:
|
||||||
# TODO handle field query
|
# TODO handle field query
|
||||||
raise ExtractorError('Field querry is not yet supported at %d' % peek_pos)
|
raise ExtractorError('Field querry is not yet supported at %d' % peek_pos)
|
||||||
|
|
||||||
if peek_id == 'id':
|
if peek_id is Token.ID:
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
return ('field', peek_value, self._member_tail(token_stream, stack_top - 1))
|
return ('field', peek_value, self._member_tail(token_stream, stack_top - 1))
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Identifier name expected at %d' % peek_pos)
|
raise ExtractorError('Identifier name expected at %d' % peek_pos)
|
||||||
elif peek_id == 'sopen':
|
elif peek_id is Token.POPEN:
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
index = self._expression(token_stream, stack_top - 1)
|
index = self._expression(token_stream, stack_top - 1)
|
||||||
token_id, token_value, token_pos = token_stream.pop()
|
token_id, token_value, token_pos = token_stream.pop()
|
||||||
if token_id == 'sclose':
|
if token_id is Token.SCLOSE:
|
||||||
return ('element', index, self._member_tail(token_stream, stack_top - 1))
|
return ('element', index, self._member_tail(token_stream, stack_top - 1))
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unexpected sequence at %d' % token_pos)
|
raise ExtractorError('Unexpected sequence at %d' % token_pos)
|
||||||
elif peek_id == 'popen':
|
elif peek_id is Token.POPEN:
|
||||||
args = self._arguments(token_stream, stack_top - 1)
|
args = self._arguments(token_stream, stack_top - 1)
|
||||||
return ('call', args, self._member_tail(token_stream, stack_top - 1))
|
return ('call', args, self._member_tail(token_stream, stack_top - 1))
|
||||||
else:
|
else:
|
||||||
@ -224,7 +225,7 @@ class JSInterpreter(object):
|
|||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id in _token_keys:
|
if peek_id in _token_keys:
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
if peek_id == 'id':
|
if peek_id is Token.ID:
|
||||||
# this
|
# this
|
||||||
if peek_value == 'this':
|
if peek_value == 'this':
|
||||||
return ('rsv', 'this')
|
return ('rsv', 'this')
|
||||||
@ -235,24 +236,24 @@ class JSInterpreter(object):
|
|||||||
# id
|
# id
|
||||||
else:
|
else:
|
||||||
token_stream.chk_id(last=True)
|
token_stream.chk_id(last=True)
|
||||||
return ('id', peek_value)
|
return (Token.ID, peek_value)
|
||||||
# literals
|
# literals
|
||||||
else:
|
else:
|
||||||
return (peek_id, peek_value)
|
return (peek_id, peek_value)
|
||||||
# array
|
# array
|
||||||
elif peek_id == 'sopen':
|
elif peek_id is Token.SOPEN:
|
||||||
return self._array_literal(token_stream, stack_top - 1)
|
return self._array_literal(token_stream, stack_top - 1)
|
||||||
# object
|
# object
|
||||||
elif peek_id == 'copen':
|
elif peek_id is Token.SCLOSE:
|
||||||
# TODO parse object
|
# TODO parse object
|
||||||
raise ExtractorError('Object literals is not yet supported at %d' % peek_pos)
|
raise ExtractorError('Object literals is not yet supported at %d' % peek_pos)
|
||||||
# expr
|
# expr
|
||||||
elif peek_id == 'popen':
|
elif peek_id is Token.POPEN:
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
open_pos = peek_pos
|
open_pos = peek_pos
|
||||||
expr = self._expression(token_stream, stack_top - 1)
|
expr = self._expression(token_stream, stack_top - 1)
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id != 'pclose':
|
if peek_id is not Token.PCLOSE:
|
||||||
raise ExtractorError('Unbalanced parentheses at %d' % open_pos)
|
raise ExtractorError('Unbalanced parentheses at %d' % open_pos)
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
return ('expr', expr)
|
return ('expr', expr)
|
||||||
@ -265,7 +266,7 @@ class JSInterpreter(object):
|
|||||||
raise ExtractorError('Recursion limit reached')
|
raise ExtractorError('Recursion limit reached')
|
||||||
|
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id == 'popen':
|
if peek_id is Token.POPEN:
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
open_pos = peek_pos
|
open_pos = peek_pos
|
||||||
else:
|
else:
|
||||||
@ -273,7 +274,7 @@ class JSInterpreter(object):
|
|||||||
args = []
|
args = []
|
||||||
while True:
|
while True:
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id == 'pclose':
|
if peek_id is Token.PCLOSE:
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
return args
|
return args
|
||||||
# FIXME handle infor
|
# FIXME handle infor
|
||||||
@ -281,7 +282,7 @@ class JSInterpreter(object):
|
|||||||
# TODO parse generator expression
|
# TODO parse generator expression
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
|
|
||||||
if peek_id not in ('comma', 'pclose'):
|
if peek_id not in (Token.COMMA, Token.PCLOSE):
|
||||||
raise ExtractorError('Unbalanced parentheses at %d' % open_pos)
|
raise ExtractorError('Unbalanced parentheses at %d' % open_pos)
|
||||||
|
|
||||||
def _array_literal(self, token_stream, stack_top):
|
def _array_literal(self, token_stream, stack_top):
|
||||||
@ -290,7 +291,7 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
# TODO check no linebreak
|
# TODO check no linebreak
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id != 'sopen':
|
if peek_id is not Token.SOPEN:
|
||||||
raise ExtractorError('Array expected at %d' % peek_pos)
|
raise ExtractorError('Array expected at %d' % peek_pos)
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
elements = []
|
elements = []
|
||||||
@ -298,21 +299,21 @@ class JSInterpreter(object):
|
|||||||
has_another = True
|
has_another = True
|
||||||
while has_another:
|
while has_another:
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id == 'comma':
|
if peek_id is Token.COMMA:
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
elements.append(None)
|
elements.append(None)
|
||||||
elif peek_id == 'sclose':
|
elif peek_id is Token.SCLOSE:
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
has_another = False
|
has_another = False
|
||||||
elif peek_id == 'id' and peek_value == 'for':
|
elif peek_id is Token.ID and peek_value == 'for':
|
||||||
# TODO parse array comprehension
|
# TODO parse array comprehension
|
||||||
raise ExtractorError('Array comprehension is not yet supported at %d' % peek_pos)
|
raise ExtractorError('Array comprehension is not yet supported at %d' % peek_pos)
|
||||||
else:
|
else:
|
||||||
elements.append(self._assign_expression(token_stream, stack_top - 1))
|
elements.append(self._assign_expression(token_stream, stack_top - 1))
|
||||||
peek_id, peek_value, peek_pos = token_stream.pop()
|
peek_id, peek_value, peek_pos = token_stream.pop()
|
||||||
if peek_id == 'sclose':
|
if peek_id is Token.SCLOSE:
|
||||||
has_another = False
|
has_another = False
|
||||||
elif peek_id != 'comma':
|
elif peek_id is not Token.COMMA:
|
||||||
raise ExtractorError('Expected , after element at %d' % peek_pos)
|
raise ExtractorError('Expected , after element at %d' % peek_pos)
|
||||||
|
|
||||||
return ('array', elements)
|
return ('array', elements)
|
||||||
@ -323,11 +324,11 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
expr = self._operator_expression(token_stream, stack_top - 1)
|
expr = self._operator_expression(token_stream, stack_top - 1)
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id == 'hook':
|
if peek_id is Token.HOOK:
|
||||||
hook_pos = peek_pos
|
hook_pos = peek_pos
|
||||||
true_expr = self._assign_expression(token_stream, stack_top - 1)
|
true_expr = self._assign_expression(token_stream, stack_top - 1)
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id == 'colon':
|
if peek_id is Token.COLON:
|
||||||
false_expr = self._assign_expression(token_stream, stack_top - 1)
|
false_expr = self._assign_expression(token_stream, stack_top - 1)
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Missing : in conditional expression at %d' % hook_pos)
|
raise ExtractorError('Missing : in conditional expression at %d' % hook_pos)
|
||||||
@ -371,18 +372,18 @@ class JSInterpreter(object):
|
|||||||
has_prefix = True
|
has_prefix = True
|
||||||
while has_prefix:
|
while has_prefix:
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if peek_id == 'uop':
|
if peek_id is Token.UOP:
|
||||||
name, op = peek_value
|
name, op = peek_value
|
||||||
had_inc = name in ('inc', 'dec')
|
had_inc = name in (Token.INC, Token.DEC)
|
||||||
while stack and stack[-1][0] > 16:
|
while stack and stack[-1][0] > 16:
|
||||||
_, stack_id, stack_op = stack.pop()
|
_, stack_id, stack_op = stack.pop()
|
||||||
out.append((stack_id, stack_op))
|
out.append((stack_id, stack_op))
|
||||||
stack.append((16, peek_id, op))
|
stack.append((16, peek_id, op))
|
||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
if had_inc and peek_id != 'id':
|
if had_inc and peek_id is not Token.ID:
|
||||||
raise ExtractorError('Prefix operator has to be followed by an identifier at %d' % peek_pos)
|
raise ExtractorError('Prefix operator has to be followed by an identifier at %d' % peek_pos)
|
||||||
has_prefix = peek_id == 'uop'
|
has_prefix = peek_id is Token.UOP
|
||||||
else:
|
else:
|
||||||
has_prefix = False
|
has_prefix = False
|
||||||
|
|
||||||
@ -391,11 +392,11 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
# postfix
|
# postfix
|
||||||
if peek_id == 'uop':
|
if peek_id is Token.UOP:
|
||||||
if had_inc:
|
if had_inc:
|
||||||
raise ExtractorError('''Can't have prefix and postfix operator at the same time at %d''' % peek_pos)
|
raise ExtractorError('''Can't have prefix and postfix operator at the same time at %d''' % peek_pos)
|
||||||
name, op = peek_value
|
name, op = peek_value
|
||||||
if name in ('inc', 'dec'):
|
if name in (Token.INC, Token.DEC):
|
||||||
prec = 17
|
prec = 17
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unexpected operator at %d' % peek_pos)
|
raise ExtractorError('Unexpected operator at %d' % peek_pos)
|
||||||
@ -406,27 +407,27 @@ class JSInterpreter(object):
|
|||||||
token_stream.pop()
|
token_stream.pop()
|
||||||
peek_id, peek_value, peek_pos = token_stream.peek()
|
peek_id, peek_value, peek_pos = token_stream.peek()
|
||||||
|
|
||||||
if peek_id == 'rel':
|
if peek_id is Token.REL:
|
||||||
name, op = peek_value
|
name, op = peek_value
|
||||||
elif peek_id == 'op':
|
elif peek_id is Token.OP:
|
||||||
name, op = peek_value
|
name, op = peek_value
|
||||||
if name in ('mul', 'div', 'mod'):
|
if name in (Token.MUL, Token.DIV, Token.MOD):
|
||||||
prec = 14
|
prec = 14
|
||||||
elif name in ('add', 'sub'):
|
elif name in (Token.ADD, Token.SUB):
|
||||||
prec = 13
|
prec = 13
|
||||||
elif name.endswith('shift'):
|
elif name in (Token.RSHIFT, Token.LSHIFT, Token.URSHIFT):
|
||||||
prec = 12
|
prec = 12
|
||||||
elif name == 'band':
|
elif name is Token.BAND:
|
||||||
prec = 9
|
prec = 9
|
||||||
elif name == 'bxor':
|
elif name is Token.BXOR:
|
||||||
prec = 8
|
prec = 8
|
||||||
elif name == 'bor':
|
elif name is Token.BOR:
|
||||||
prec = 7
|
prec = 7
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unexpected operator at %d' % peek_pos)
|
raise ExtractorError('Unexpected operator at %d' % peek_pos)
|
||||||
elif peek_id == 'lop':
|
elif peek_id is Token.LOP:
|
||||||
name, op = peek_value
|
name, op = peek_value
|
||||||
prec = {'or': 5, 'and': 6}[name]
|
prec = {Token.OR: 5, Token.AND: 6}[name]
|
||||||
else:
|
else:
|
||||||
has_another = False
|
has_another = False
|
||||||
prec = 4 # empties stack
|
prec = 4 # empties stack
|
||||||
@ -441,12 +442,12 @@ class JSInterpreter(object):
|
|||||||
return ('rpn', out)
|
return ('rpn', out)
|
||||||
|
|
||||||
# TODO use context instead local_vars in argument
|
# TODO use context instead local_vars in argument
|
||||||
|
|
||||||
def getvalue(self, ref, local_vars):
|
def getvalue(self, ref, local_vars):
|
||||||
if ref is None:
|
if ref is None or ref is self.undefined or isinstance(ref, (int, float, str)): # not Token
|
||||||
return None
|
return ref
|
||||||
ref_id, ref_value = ref
|
ref_id, ref_value = ref
|
||||||
if ref_id == 'id':
|
if ref_id is Token.ID:
|
||||||
return local_vars[ref_value]
|
return local_vars[ref_value]
|
||||||
elif ref_id in _token_keys:
|
elif ref_id in _token_keys:
|
||||||
return ref_value
|
return ref_value
|
||||||
@ -512,20 +513,11 @@ class JSInterpreter(object):
|
|||||||
rpn = expr[1]
|
rpn = expr[1]
|
||||||
while rpn:
|
while rpn:
|
||||||
token = rpn.pop(0)
|
token = rpn.pop(0)
|
||||||
if token[0] in ('op', 'aop', 'lop', 'rel'):
|
if token[0] in (Token.OP, Token.AOP, Token.UOP, Token.LOP, Token.REL):
|
||||||
right = stack.pop()
|
right = stack.pop()
|
||||||
left = stack.pop()
|
left = stack.pop()
|
||||||
result = token[1](self.getvalue(left, local_vars), self.getvalue(right, local_vars))
|
stack.append(token[1](self.getvalue(left, local_vars), self.getvalue(right, local_vars)))
|
||||||
if type(result) == int:
|
elif token[0] is Token.UOP:
|
||||||
type_id = 'int'
|
|
||||||
elif type(result) == float:
|
|
||||||
type_id = 'float'
|
|
||||||
elif type(result) == str:
|
|
||||||
type_id = 'str'
|
|
||||||
else:
|
|
||||||
type_id = str(type(result))
|
|
||||||
stack.append((type_id, result))
|
|
||||||
elif token[0] == 'uop':
|
|
||||||
right = stack.pop()
|
right = stack.pop()
|
||||||
stack.append(token[1](self.getvalue(right, local_vars)))
|
stack.append(token[1](self.getvalue(right, local_vars)))
|
||||||
else:
|
else:
|
||||||
@ -551,7 +543,7 @@ class JSInterpreter(object):
|
|||||||
# TODO interpret call
|
# TODO interpret call
|
||||||
raise ExtractorError('''Can't interpret expression called %s''' % tail_name)
|
raise ExtractorError('''Can't interpret expression called %s''' % tail_name)
|
||||||
return target
|
return target
|
||||||
elif name == 'id':
|
elif name is Token.ID:
|
||||||
return local_vars[expr[1]]
|
return local_vars[expr[1]]
|
||||||
|
|
||||||
# literal
|
# literal
|
||||||
|
@ -12,71 +12,72 @@ from .jsgrammar import (
|
|||||||
UNARY_OPERATORS_RE,
|
UNARY_OPERATORS_RE,
|
||||||
RELATIONS_RE,
|
RELATIONS_RE,
|
||||||
ASSIGN_OPERATORS_RE,
|
ASSIGN_OPERATORS_RE,
|
||||||
OPERATORS_RE
|
OPERATORS_RE,
|
||||||
|
Token
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
_PUNCTUATIONS = {
|
_PUNCTUATIONS = {
|
||||||
'{': 'copen',
|
'{': Token.COPEN,
|
||||||
'}': 'cclose',
|
'}': Token.CCLOSE,
|
||||||
'(': 'popen',
|
'(': Token.POPEN,
|
||||||
')': 'pclose',
|
')': Token.PCLOSE,
|
||||||
'[': 'sopen',
|
'[': Token.SOPEN,
|
||||||
']': 'sclose',
|
']': Token.SCLOSE,
|
||||||
'.': 'dot',
|
'.': Token.DOT,
|
||||||
';': 'end',
|
';': Token.END,
|
||||||
',': 'comma',
|
',': Token.COMMA,
|
||||||
'?': 'hook',
|
'?': Token.HOOK,
|
||||||
':': 'colon'
|
':': Token.COLON
|
||||||
}
|
}
|
||||||
_LOGICAL_OPERATORS = {
|
_LOGICAL_OPERATORS = {
|
||||||
'&&': ('and', lambda cur, right: cur and right),
|
'&&': (Token.AND, lambda cur, right: cur and right),
|
||||||
'||': ('or', lambda cur, right: cur or right)
|
'||': (Token.OR, lambda cur, right: cur or right)
|
||||||
}
|
}
|
||||||
_UNARY_OPERATORS = {
|
_UNARY_OPERATORS = {
|
||||||
'++': ('inc', lambda cur: cur + 1),
|
'++': (Token.INC, lambda cur: cur + 1),
|
||||||
'--': ('dec', lambda cur: cur - 1),
|
'--': (Token.DEC, lambda cur: cur - 1),
|
||||||
'!': ('not', operator.not_),
|
'!': (Token.NOT, operator.not_),
|
||||||
'~': ('bnot', lambda cur: cur ^ -1),
|
'~': (Token.BNOT, lambda cur: cur ^ -1),
|
||||||
# XXX define these operators
|
# XXX define these operators
|
||||||
'delete': ('del', None),
|
'delete': (Token.DEL, None),
|
||||||
'void': ('void', None),
|
'void': (Token.VOID, None),
|
||||||
'typeof': ('type', lambda cur: type(cur))
|
'typeof': (Token.TYPE, lambda cur: type(cur))
|
||||||
}
|
}
|
||||||
_RELATIONS = {
|
_RELATIONS = {
|
||||||
'<': ('lt', operator.lt),
|
'<': (Token.LT, operator.lt),
|
||||||
'>': ('gt', operator.gt),
|
'>': (Token.GT, operator.gt),
|
||||||
'<=': ('le', operator.le),
|
'<=': (Token.LE, operator.le),
|
||||||
'>=': ('ge', operator.ge),
|
'>=': (Token.GE, operator.ge),
|
||||||
# XXX check python and JavaScript equality difference
|
# XXX check python and JavaScript equality difference
|
||||||
'==': ('eq', operator.eq),
|
'==': (Token.EQ, operator.eq),
|
||||||
'!=': ('ne', operator.ne),
|
'!=': (Token.NE, operator.ne),
|
||||||
'===': ('seq', lambda cur, right: cur == right and type(cur) == type(right)),
|
'===': (Token.SEQ, lambda cur, right: cur == right and type(cur) == type(right)),
|
||||||
'!==': ('sne', lambda cur, right: not cur == right or not type(cur) == type(right))
|
'!==': (Token.SNE, lambda cur, right: not cur == right or not type(cur) == type(right))
|
||||||
}
|
}
|
||||||
_OPERATORS = {
|
_OPERATORS = {
|
||||||
'|': ('bor', operator.or_),
|
'|': (Token.BOR, operator.or_),
|
||||||
'^': ('bxor', operator.xor),
|
'^': (Token.BXOR, operator.xor),
|
||||||
'&': ('band', operator.and_),
|
'&': (Token.BAND, operator.and_),
|
||||||
# NOTE convert to int before shift float
|
# NOTE convert to int before shift float
|
||||||
'>>': ('rshift', operator.rshift),
|
'>>': (Token.RSHIFT, operator.rshift),
|
||||||
'<<': ('lshift', operator.lshift),
|
'<<': (Token.LSHIFT, operator.lshift),
|
||||||
'>>>': ('urshift', lambda cur, right: cur >> right if cur >= 0 else (cur + 0x100000000) >> right),
|
'>>>': (Token.URSHIFT, lambda cur, right: cur >> right if cur >= 0 else (cur + 0x100000000) >> right),
|
||||||
'-': ('sub', operator.sub),
|
'-': (Token.SUB, operator.sub),
|
||||||
'+': ('add', operator.add),
|
'+': (Token.ADD, operator.add),
|
||||||
'%': ('mod', operator.mod),
|
'%': (Token.MOD, operator.mod),
|
||||||
'/': ('div', operator.truediv),
|
'/': (Token.DIV, operator.truediv),
|
||||||
'*': ('mul', operator.mul)
|
'*': (Token.MUL, operator.mul)
|
||||||
}
|
}
|
||||||
_ASSIGN_OPERATORS = dict((op + '=', ('set_%s' % token[0], token[1])) for op, token in _OPERATORS.items())
|
_ASSIGN_OPERATORS = dict((op + '=', ('set_%s' % token[0], token[1])) for op, token in _OPERATORS.items())
|
||||||
_ASSIGN_OPERATORS['='] = ('set', lambda cur, right: right)
|
_ASSIGN_OPERATORS['='] = ('set', lambda cur, right: right)
|
||||||
|
|
||||||
_operator_lookup = {
|
_operator_lookup = {
|
||||||
'op': _OPERATORS,
|
Token.OP: _OPERATORS,
|
||||||
'aop': _ASSIGN_OPERATORS,
|
Token.AOP: _ASSIGN_OPERATORS,
|
||||||
'uop': _UNARY_OPERATORS,
|
Token.UOP: _UNARY_OPERATORS,
|
||||||
'lop': _LOGICAL_OPERATORS,
|
Token.LOP: _LOGICAL_OPERATORS,
|
||||||
'rel': _RELATIONS
|
Token.REL: _RELATIONS
|
||||||
}
|
}
|
||||||
# only to check ids
|
# only to check ids
|
||||||
_reserved_words = ('break', 'case', 'catch', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'finally',
|
_reserved_words = ('break', 'case', 'catch', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'finally',
|
||||||
@ -109,29 +110,30 @@ class TokenStream(object):
|
|||||||
token_id = feed_m.lastgroup
|
token_id = feed_m.lastgroup
|
||||||
token_value = feed_m.group(token_id)
|
token_value = feed_m.group(token_id)
|
||||||
pos = feed_m.start(token_id)
|
pos = feed_m.start(token_id)
|
||||||
|
token_id = Token[token_id]
|
||||||
self.ended = feed_m.end() >= len(self.code) # because how yield works
|
self.ended = feed_m.end() >= len(self.code) # because how yield works
|
||||||
if token_id == 'comment':
|
if token_id is Token.COMMENT:
|
||||||
pass
|
pass
|
||||||
# TODO date
|
# TODO date
|
||||||
elif token_id == 'null':
|
elif token_id is Token.NULL:
|
||||||
yield (token_id, None, pos)
|
yield (token_id, None, pos)
|
||||||
elif token_id == 'bool':
|
elif token_id is Token.BOOL:
|
||||||
yield (token_id, {'true': True, 'false': False}[token_value], pos)
|
yield (token_id, {'true': True, 'false': False}[token_value], pos)
|
||||||
elif token_id == 'str':
|
elif token_id is Token.STR:
|
||||||
yield (token_id, token_value, pos)
|
yield (token_id, token_value, pos)
|
||||||
elif token_id == 'int':
|
elif token_id is Token.INT:
|
||||||
yield (token_id, int(token_value), pos)
|
yield (token_id, int(token_value), pos)
|
||||||
elif token_id == 'float':
|
elif token_id is Token.FLOAT:
|
||||||
yield (token_id, float(token_value), pos)
|
yield (token_id, float(token_value), pos)
|
||||||
elif token_id == 'regex':
|
elif token_id is Token.REGEX:
|
||||||
# TODO error handling
|
# TODO error handling
|
||||||
regex = re.compile(feed_m.group('rebody'))
|
regex = re.compile(feed_m.group('rebody'))
|
||||||
yield (token_id, (regex, feed_m.group('reflags')), pos)
|
yield (token_id, (regex, feed_m.group('reflags')), pos)
|
||||||
elif token_id == 'id':
|
elif token_id is Token.ID:
|
||||||
yield (token_id, token_value, pos)
|
yield (token_id, token_value, pos)
|
||||||
elif token_id in _operator_lookup:
|
elif token_id in _operator_lookup:
|
||||||
yield (token_id, _operator_lookup[token_id][token_value], pos)
|
yield (token_id, _operator_lookup[token_id][token_value], pos)
|
||||||
elif token_id == 'punc':
|
elif token_id is Token.PUNCT:
|
||||||
yield (_PUNCTUATIONS[token_value], token_value, pos)
|
yield (_PUNCTUATIONS[token_value], token_value, pos)
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unexpected token at %d' % pos)
|
raise ExtractorError('Unexpected token at %d' % pos)
|
||||||
@ -145,14 +147,14 @@ class TokenStream(object):
|
|||||||
name, value, pos = self._last
|
name, value, pos = self._last
|
||||||
else:
|
else:
|
||||||
name, value, pos = self.peek()
|
name, value, pos = self.peek()
|
||||||
if name != 'id' or value in _reserved_words:
|
if name is not Token.ID or value in _reserved_words:
|
||||||
raise ExtractorError('Invalid identifier at %d' % pos)
|
raise ExtractorError('Invalid identifier at %d' % pos)
|
||||||
|
|
||||||
def peek(self, count=1):
|
def peek(self, count=1):
|
||||||
for _ in range(count - len(self.peeked)):
|
for _ in range(count - len(self.peeked)):
|
||||||
token = next(self._ts, None)
|
token = next(self._ts, None)
|
||||||
if token is None:
|
if token is None:
|
||||||
self.peeked.append(('end', ';', len(self.code)))
|
self.peeked.append((Token.END, ';', len(self.code)))
|
||||||
else:
|
else:
|
||||||
self.peeked.append(token)
|
self.peeked.append(token)
|
||||||
return self.peeked[count - 1]
|
return self.peeked[count - 1]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user