2014-03-30 07:02:58 +02:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
2016-12-16 01:21:31 +01:00
|
|
|
from ..compat import compat_str
|
2016-12-04 19:15:35 +01:00
|
|
|
from ..utils import ExtractorError
|
2016-12-29 00:36:24 +01:00
|
|
|
from .jsparser import Parser
|
2016-12-15 10:56:45 +01:00
|
|
|
from .jsgrammar import Token, token_keys
|
2016-11-30 07:49:47 +01:00
|
|
|
|
2016-12-06 18:42:59 +01:00
|
|
|
|
2016-12-09 23:38:48 +01:00
|
|
|
class Context(object):
|
2016-12-10 02:01:19 +01:00
|
|
|
def __init__(self, variables=None, ended=False):
|
2017-01-22 14:26:45 +01:00
|
|
|
super(Context, self).__init__()
|
2016-12-09 23:38:48 +01:00
|
|
|
self.ended = ended
|
2016-12-12 22:56:07 +01:00
|
|
|
self.no_in = True
|
2016-12-16 01:17:56 +01:00
|
|
|
self.local_vars = {}
|
2016-12-10 02:01:19 +01:00
|
|
|
if variables is not None:
|
|
|
|
for k, v in dict(variables).items():
|
2016-12-11 09:42:43 +01:00
|
|
|
# XXX validate identifiers
|
2016-12-10 02:01:19 +01:00
|
|
|
self.local_vars[k] = Reference(v, (self.local_vars, k))
|
2016-12-09 23:38:48 +01:00
|
|
|
|
|
|
|
|
|
|
|
class Reference(object):
|
|
|
|
def __init__(self, value, parent=None):
|
2017-01-22 14:26:45 +01:00
|
|
|
super(Reference, self).__init__()
|
2016-12-10 14:36:32 +01:00
|
|
|
self._value = value
|
|
|
|
self._parent = parent
|
|
|
|
|
2016-12-17 01:13:03 +01:00
|
|
|
def getvalue(self, deep=False):
|
|
|
|
value = self._value
|
|
|
|
if deep:
|
|
|
|
if isinstance(self._value, (list, tuple)):
|
|
|
|
# TODO test nested arrays
|
|
|
|
value = [elem.getvalue() for elem in self._value]
|
|
|
|
elif isinstance(self._value, dict):
|
|
|
|
value = {}
|
|
|
|
for key, prop in self._value.items():
|
|
|
|
value[key] = prop.getvalue()
|
|
|
|
|
|
|
|
return value
|
2016-12-10 14:36:32 +01:00
|
|
|
|
|
|
|
def putvalue(self, value):
|
|
|
|
if self._parent is None:
|
|
|
|
raise ExtractorError('Trying to set a read-only reference')
|
|
|
|
parent, key = self._parent
|
|
|
|
if not hasattr(parent, '__setitem__'):
|
|
|
|
raise ExtractorError('Unknown reference')
|
|
|
|
parent.__setitem__(key, Reference(value, (parent, key)))
|
2016-12-17 01:13:03 +01:00
|
|
|
self._value = value
|
2016-12-15 20:02:04 +01:00
|
|
|
return value
|
2016-12-09 23:38:48 +01:00
|
|
|
|
2016-12-10 02:01:19 +01:00
|
|
|
def __repr__(self):
|
2016-12-10 17:31:29 +01:00
|
|
|
if self._parent is not None:
|
|
|
|
parent, key = self._parent
|
|
|
|
return '<Reference value: %s, parent: %s@(0x%x), key: %s>' % (
|
|
|
|
str(self._value), parent.__class__.__name__, id(parent), key)
|
|
|
|
return '<Reference value: %s, parent: %s>' % (self._value, None)
|
2016-12-10 02:01:19 +01:00
|
|
|
|
2016-12-09 23:38:48 +01:00
|
|
|
|
2014-03-30 07:02:58 +02:00
|
|
|
class JSInterpreter(object):
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO support json
|
2016-12-29 00:36:24 +01:00
|
|
|
|
2016-12-03 06:32:11 +01:00
|
|
|
undefined = object()
|
|
|
|
|
2016-12-10 02:01:19 +01:00
|
|
|
def __init__(self, code, variables=None):
|
2017-01-22 14:26:45 +01:00
|
|
|
super(JSInterpreter, self).__init__()
|
2015-02-18 10:47:40 +01:00
|
|
|
self.code = code
|
2016-12-10 00:52:04 +01:00
|
|
|
self.global_vars = {}
|
2016-12-10 02:01:19 +01:00
|
|
|
if variables is not None:
|
|
|
|
for k, v in dict(variables).items():
|
2016-12-11 09:42:43 +01:00
|
|
|
# XXX validate identifiers
|
2016-12-16 00:11:59 +01:00
|
|
|
self.global_vars[k] = self.create_reference(v, (self.global_vars, k))
|
2016-12-14 18:21:57 +01:00
|
|
|
self._context = Context()
|
2016-12-09 23:38:48 +01:00
|
|
|
self._context_stack = []
|
2015-02-01 22:38:26 +01:00
|
|
|
|
2016-12-16 01:14:36 +01:00
|
|
|
@property
|
|
|
|
def this(self):
|
2016-12-16 01:17:56 +01:00
|
|
|
return self._context.local_vars
|
2016-12-16 01:14:36 +01:00
|
|
|
|
2016-12-16 00:11:59 +01:00
|
|
|
def create_reference(self, value, parent_key):
|
|
|
|
if isinstance(value, dict):
|
|
|
|
o = {}
|
|
|
|
for k, v in value.items():
|
|
|
|
o[k] = self.create_reference(v, (o, k))
|
2016-12-16 00:33:49 +01:00
|
|
|
elif isinstance(value, (list, tuple, set)):
|
2016-12-16 00:11:59 +01:00
|
|
|
o = []
|
|
|
|
for k, v in enumerate(value):
|
|
|
|
o[k] = self.create_reference(v, (o, k))
|
2016-12-16 00:33:49 +01:00
|
|
|
elif isinstance(value, (int, float, compat_str, bool, re._pattern_type)) or value is None:
|
2016-12-16 00:11:59 +01:00
|
|
|
o = value
|
2016-12-16 00:33:49 +01:00
|
|
|
else:
|
|
|
|
raise ExtractorError('Unsupported type, %s in variables' % type(value))
|
2016-12-16 00:11:59 +01:00
|
|
|
|
|
|
|
return Reference(o, parent_key)
|
|
|
|
|
2016-12-10 02:01:19 +01:00
|
|
|
def interpret_statement(self, stmt):
|
2016-12-06 18:42:59 +01:00
|
|
|
if stmt is None:
|
2016-12-10 02:01:19 +01:00
|
|
|
return None
|
2016-12-06 18:42:59 +01:00
|
|
|
|
|
|
|
name = stmt[0]
|
|
|
|
ref = None
|
2016-12-17 01:13:03 +01:00
|
|
|
if name == Token.FUNC:
|
|
|
|
name, args, body = stmt[1:]
|
|
|
|
if name is not None:
|
|
|
|
if self._context_stack:
|
|
|
|
self.this[name] = Reference(self.build_function(args, body), (self.this, name))
|
|
|
|
else:
|
|
|
|
self.global_vars[name] = Reference(self.build_function(args, body), (self.this, name))
|
|
|
|
else:
|
|
|
|
raise ExtractorError('Function expression is not yet implemented')
|
2016-12-07 08:25:19 +01:00
|
|
|
elif name is Token.BLOCK:
|
2016-12-06 18:42:59 +01:00
|
|
|
block = stmt[1]
|
|
|
|
for stmt in block:
|
2016-12-10 02:01:19 +01:00
|
|
|
s = self.interpret_statement(stmt)
|
2016-12-06 18:42:59 +01:00
|
|
|
if s is not None:
|
2016-12-10 14:36:32 +01:00
|
|
|
ref = s.getvalue()
|
2016-12-07 08:25:19 +01:00
|
|
|
elif name is Token.VAR:
|
2016-12-06 18:42:59 +01:00
|
|
|
for name, value in stmt[1]:
|
2016-12-29 00:36:24 +01:00
|
|
|
value = self.interpret_expression(value).getvalue() if value is not None else self.undefined
|
|
|
|
self.this[name] = Reference(value, (self.this, name))
|
2016-12-07 08:25:19 +01:00
|
|
|
elif name is Token.EXPR:
|
2016-12-06 18:42:59 +01:00
|
|
|
for expr in stmt[1]:
|
2016-12-10 02:01:19 +01:00
|
|
|
ref = self.interpret_expression(expr)
|
2016-12-06 18:42:59 +01:00
|
|
|
# if
|
|
|
|
# continue, break
|
2016-12-07 08:25:19 +01:00
|
|
|
elif name is Token.RETURN:
|
2016-12-10 02:01:19 +01:00
|
|
|
ref = self.interpret_statement(stmt[1])
|
2016-12-12 22:56:07 +01:00
|
|
|
self._context.ended = True
|
2016-12-06 18:42:59 +01:00
|
|
|
# with
|
|
|
|
# label
|
|
|
|
# switch
|
|
|
|
# throw
|
|
|
|
# try
|
|
|
|
# debugger
|
|
|
|
else:
|
|
|
|
raise ExtractorError('''Can't interpret statement called %s''' % name)
|
2016-12-10 02:01:19 +01:00
|
|
|
return ref
|
2016-12-06 18:42:59 +01:00
|
|
|
|
2016-12-10 02:01:19 +01:00
|
|
|
def interpret_expression(self, expr):
|
2016-12-09 23:38:48 +01:00
|
|
|
if expr is None:
|
|
|
|
return
|
2016-12-06 18:42:59 +01:00
|
|
|
name = expr[0]
|
2016-12-09 23:38:48 +01:00
|
|
|
|
2016-12-07 08:25:19 +01:00
|
|
|
if name is Token.ASSIGN:
|
2016-12-06 18:42:59 +01:00
|
|
|
op, left, right = expr[1:]
|
|
|
|
if op is None:
|
2016-12-10 02:01:19 +01:00
|
|
|
ref = self.interpret_expression(left)
|
2016-12-06 18:42:59 +01:00
|
|
|
else:
|
2016-12-16 01:14:36 +01:00
|
|
|
try:
|
|
|
|
leftref = self.interpret_expression(left)
|
|
|
|
except KeyError:
|
|
|
|
lname = left[0]
|
|
|
|
key = None
|
|
|
|
if lname is Token.OPEXPR and len(left[1]) == 1:
|
|
|
|
lname = left[1][0][0]
|
|
|
|
if lname is Token.MEMBER:
|
|
|
|
lid, args, tail = left[1][0][1:]
|
|
|
|
if lid[0] is Token.ID and args is None and tail is None:
|
|
|
|
key = lid[1]
|
|
|
|
if key is not None:
|
|
|
|
u = Reference(self.undefined, (self.this, key))
|
|
|
|
leftref = self.this[key] = u
|
|
|
|
else:
|
2016-12-17 01:13:03 +01:00
|
|
|
raise ExtractorError('Invalid left-hand side in assignment')
|
2016-12-10 14:36:32 +01:00
|
|
|
leftvalue = leftref.getvalue()
|
|
|
|
rightvalue = self.interpret_expression(right).getvalue()
|
|
|
|
leftref.putvalue(op(leftvalue, rightvalue))
|
2016-12-11 09:42:43 +01:00
|
|
|
# XXX check specs what to return
|
2016-12-09 23:38:48 +01:00
|
|
|
ref = leftref
|
|
|
|
|
2016-12-07 19:41:06 +01:00
|
|
|
elif name is Token.EXPR:
|
2016-12-10 02:01:19 +01:00
|
|
|
ref = self.interpret_statement(expr)
|
2016-12-09 23:38:48 +01:00
|
|
|
|
2016-12-07 08:25:19 +01:00
|
|
|
elif name is Token.OPEXPR:
|
2016-12-06 18:42:59 +01:00
|
|
|
stack = []
|
2016-12-15 20:02:04 +01:00
|
|
|
postfix = []
|
2016-12-07 19:41:06 +01:00
|
|
|
rpn = expr[1][:]
|
2016-12-15 15:41:56 +01:00
|
|
|
# FIXME support pre- and postfix operators
|
2016-12-06 18:42:59 +01:00
|
|
|
while rpn:
|
|
|
|
token = rpn.pop(0)
|
2016-12-12 23:52:10 +01:00
|
|
|
# XXX relation 'in' 'instanceof'
|
2016-12-15 15:41:56 +01:00
|
|
|
if token[0] in (Token.OP, Token.AOP, Token.LOP, Token.REL):
|
2016-12-06 18:42:59 +01:00
|
|
|
right = stack.pop()
|
|
|
|
left = stack.pop()
|
2016-12-10 14:36:32 +01:00
|
|
|
stack.append(Reference(token[1](left.getvalue(), right.getvalue())))
|
2016-12-15 15:41:56 +01:00
|
|
|
# XXX add unary operator 'delete', 'void', 'instanceof'
|
2016-12-07 07:28:09 +01:00
|
|
|
elif token[0] is Token.UOP:
|
2016-12-06 18:42:59 +01:00
|
|
|
right = stack.pop()
|
2016-12-15 15:41:56 +01:00
|
|
|
stack.append(Reference(token[1](right.getvalue())))
|
2016-12-15 20:02:04 +01:00
|
|
|
elif token[0] is Token.PREFIX:
|
|
|
|
right = stack.pop()
|
|
|
|
stack.append(Reference(right.putvalue(token[1](right.getvalue()))))
|
|
|
|
elif token[0] is Token.POSTFIX:
|
|
|
|
postfix.append((stack[-1], token[1]))
|
2016-12-06 18:42:59 +01:00
|
|
|
else:
|
2016-12-10 02:01:19 +01:00
|
|
|
stack.append(self.interpret_expression(token))
|
2016-12-06 18:42:59 +01:00
|
|
|
result = stack.pop()
|
|
|
|
if not stack:
|
2016-12-15 20:02:04 +01:00
|
|
|
for operand, op in postfix:
|
|
|
|
operand.putvalue(op(operand.getvalue()))
|
2016-12-07 21:03:57 +01:00
|
|
|
ref = result
|
2016-12-06 18:42:59 +01:00
|
|
|
else:
|
|
|
|
raise ExtractorError('Expression has too many values')
|
|
|
|
|
2016-12-07 08:25:19 +01:00
|
|
|
elif name is Token.MEMBER:
|
2016-12-06 18:42:59 +01:00
|
|
|
# TODO interpret member
|
|
|
|
target, args, tail = expr[1:]
|
2016-12-10 02:01:19 +01:00
|
|
|
target = self.interpret_expression(target)
|
2016-12-11 09:40:43 +01:00
|
|
|
if args is not None:
|
|
|
|
# TODO interpret NewExpression
|
|
|
|
pass
|
2016-12-06 18:42:59 +01:00
|
|
|
while tail is not None:
|
|
|
|
tail_name, tail_value, tail = tail
|
2016-12-07 08:25:19 +01:00
|
|
|
if tail_name is Token.FIELD:
|
2016-12-16 00:11:59 +01:00
|
|
|
target = target.getvalue()[tail_value]
|
2016-12-07 08:25:19 +01:00
|
|
|
elif tail_name is Token.ELEM:
|
2016-12-16 00:11:59 +01:00
|
|
|
index = self.interpret_expression(tail_value).getvalue()
|
2016-12-10 14:36:32 +01:00
|
|
|
target = target.getvalue()[index]
|
2016-12-07 08:25:19 +01:00
|
|
|
elif tail_name is Token.CALL:
|
2016-12-17 01:13:03 +01:00
|
|
|
args = (self.interpret_expression(arg).getvalue() for arg in tail_value)
|
|
|
|
target = Reference(target.getvalue()(*args))
|
2016-12-09 23:38:48 +01:00
|
|
|
ref = target
|
2016-12-06 18:42:59 +01:00
|
|
|
|
2016-12-09 23:38:48 +01:00
|
|
|
elif name is Token.ID:
|
2016-12-11 09:42:43 +01:00
|
|
|
# XXX error handling (unknown id)
|
2016-12-17 01:13:03 +01:00
|
|
|
ref = (self.this[expr[1]] if expr[1] in self.this else
|
2016-12-16 01:17:56 +01:00
|
|
|
self.global_vars[expr[1]])
|
2016-12-10 14:36:32 +01:00
|
|
|
|
2016-12-09 23:38:48 +01:00
|
|
|
# literal
|
2016-12-15 10:56:45 +01:00
|
|
|
elif name in token_keys:
|
2016-12-10 14:36:32 +01:00
|
|
|
ref = Reference(expr[1])
|
|
|
|
|
|
|
|
elif name is Token.ARRAY:
|
|
|
|
array = []
|
|
|
|
for key, elem in enumerate(expr[1]):
|
2016-12-16 00:11:59 +01:00
|
|
|
value = self.interpret_expression(elem).getvalue()
|
|
|
|
array.append(Reference(value, (array, key)))
|
2016-12-10 14:36:32 +01:00
|
|
|
ref = Reference(array)
|
|
|
|
|
2016-12-06 18:42:59 +01:00
|
|
|
else:
|
|
|
|
raise ExtractorError('''Can't interpret expression called %s''' % name)
|
2014-03-30 07:02:58 +02:00
|
|
|
|
2016-12-07 21:03:57 +01:00
|
|
|
return ref
|
|
|
|
|
2014-07-15 22:46:39 +02:00
|
|
|
def extract_object(self, objname):
|
|
|
|
obj = {}
|
|
|
|
obj_m = re.search(
|
|
|
|
(r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
|
2015-11-24 07:45:02 +01:00
|
|
|
r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' +
|
2014-07-15 22:46:39 +02:00
|
|
|
r'\}\s*;',
|
|
|
|
self.code)
|
|
|
|
fields = obj_m.group('fields')
|
|
|
|
# Currently, it only supports function definitions
|
|
|
|
fields_m = re.finditer(
|
2014-07-23 02:13:48 +02:00
|
|
|
r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function'
|
2014-07-15 22:46:39 +02:00
|
|
|
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
|
|
|
|
fields)
|
|
|
|
for f in fields_m:
|
|
|
|
argnames = f.group('args').split(',')
|
2016-12-29 00:36:24 +01:00
|
|
|
obj[f.group('key')] = self.build_function(argnames, Parser(f.group('code')).parse())
|
2014-07-15 22:46:39 +02:00
|
|
|
|
|
|
|
return obj
|
|
|
|
|
2014-03-30 07:02:58 +02:00
|
|
|
def extract_function(self, funcname):
|
|
|
|
func_m = re.search(
|
2015-02-01 22:38:26 +01:00
|
|
|
r'''(?x)
|
2016-06-23 09:41:34 +07:00
|
|
|
(?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
|
2015-02-01 22:38:26 +01:00
|
|
|
\((?P<args>[^)]*)\)\s*
|
|
|
|
\{(?P<code>[^}]+)\}''' % (
|
2015-11-10 12:54:02 +08:00
|
|
|
re.escape(funcname), re.escape(funcname), re.escape(funcname)),
|
2014-03-30 07:02:58 +02:00
|
|
|
self.code)
|
2014-03-30 07:15:14 +02:00
|
|
|
if func_m is None:
|
|
|
|
raise ExtractorError('Could not find JS function %r' % funcname)
|
2014-03-30 07:02:58 +02:00
|
|
|
argnames = func_m.group('args').split(',')
|
|
|
|
|
2016-12-29 00:36:24 +01:00
|
|
|
return self.build_function(argnames, Parser(func_m.group('code')).parse())
|
2014-07-15 22:46:39 +02:00
|
|
|
|
2016-12-10 02:01:19 +01:00
|
|
|
def push_context(self, cx):
|
2016-12-12 22:56:07 +01:00
|
|
|
self._context_stack.append(self._context)
|
|
|
|
self._context = cx
|
2016-12-10 02:01:19 +01:00
|
|
|
|
|
|
|
def pop_context(self):
|
2016-12-11 09:42:43 +01:00
|
|
|
# XXX check underflow
|
2016-12-12 22:56:07 +01:00
|
|
|
self._context = self._context_stack.pop()
|
2016-12-10 02:01:19 +01:00
|
|
|
|
2015-02-01 22:38:26 +01:00
|
|
|
def call_function(self, funcname, *args):
|
2016-12-17 01:13:03 +01:00
|
|
|
f = (self.this[funcname] if funcname in self.this else
|
|
|
|
self.global_vars[funcname] if funcname in self.global_vars else
|
|
|
|
self.extract_function(funcname))
|
|
|
|
return f(*args)
|
2015-02-01 22:38:26 +01:00
|
|
|
|
2016-12-17 01:13:03 +01:00
|
|
|
def build_function(self, argnames, ast):
|
|
|
|
def resf(*args):
|
2016-12-10 02:01:19 +01:00
|
|
|
self.push_context(Context(dict(zip(argnames, args))))
|
2016-12-17 01:13:03 +01:00
|
|
|
res = None
|
|
|
|
for stmt in ast:
|
2016-12-10 02:01:19 +01:00
|
|
|
res = self.interpret_statement(stmt)
|
2016-12-17 01:13:03 +01:00
|
|
|
res = None if res is None else res.getvalue(deep=True)
|
2016-12-12 22:56:07 +01:00
|
|
|
if self._context.ended:
|
2016-12-10 02:01:19 +01:00
|
|
|
self.pop_context()
|
2016-12-06 18:42:59 +01:00
|
|
|
break
|
2016-12-09 23:38:48 +01:00
|
|
|
return res
|
2016-12-03 13:21:03 +01:00
|
|
|
return resf
|
2016-12-17 01:13:03 +01:00
|
|
|
|
|
|
|
def run(self, cx=None):
|
|
|
|
if cx is not None:
|
|
|
|
self.push_context(cx)
|
|
|
|
res = None
|
2016-12-29 00:36:24 +01:00
|
|
|
for stmt in Parser(self.code).parse():
|
2016-12-17 01:13:03 +01:00
|
|
|
res = self.interpret_statement(stmt)
|
|
|
|
res = None if res is None else res.getvalue(deep=True)
|
|
|
|
if self._context.ended:
|
|
|
|
if cx is not None:
|
|
|
|
self.pop_context()
|
|
|
|
break
|
|
|
|
return res
|