[jsinterp] First parser tests

This commit is contained in:
sulyi 2016-12-08 03:55:23 +01:00
parent 8ff8a706ed
commit 599b9db878
2 changed files with 338 additions and 18 deletions

View File

@ -0,0 +1,309 @@
#!/usr/bin/env python
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.jsinterp import JSInterpreter
from youtube_dl.jsinterp.jsgrammar import Token
from youtube_dl.jsinterp.tstream import (
_OPERATORS,
_ASSIGN_OPERATORS,
_LOGICAL_OPERATORS,
_UNARY_OPERATORS,
_RELATIONS
)
class TestJSInterpreterParser(unittest.TestCase):
def test_basic(self):
jsi = JSInterpreter(';')
ast = [None]
self.assertEqual(list(jsi.statements()), ast)
jsi = JSInterpreter('return 42;')
ast = [(Token.RETURN,
(Token.EXPR, [
(Token.ASSIGN,
None,
(Token.OPEXPR, [(Token.MEMBER, (Token.INT, 42), None, None)]),
None)
])
)]
self.assertEqual(list(jsi.statements()), ast)
def test_calc(self):
jsi = JSInterpreter('return 2*a+1;')
ast = [(Token.RETURN,
(Token.EXPR, [
(Token.ASSIGN,
None,
(Token.OPEXPR, [
# Reverse Polish Notation!
(Token.MEMBER, (Token.INT, 2), None, None),
(Token.MEMBER, (Token.ID, 'a'), None, None),
(Token.OP, _OPERATORS['*'][1]),
(Token.MEMBER, (Token.INT, 1), None, None),
(Token.OP, _OPERATORS['+'][1]),
]),
None)
])
)]
self.assertEqual(list(jsi.statements()), ast)
def test_empty_return(self):
jsi = JSInterpreter('return; y()')
ast = [(Token.RETURN,
(Token.EXPR, [
(Token.ASSIGN,
None,
(Token.OPEXPR, [(Token.MEMBER, None, None, None)]),
None)
])),
(Token.EXPR, [
(Token.ASSIGN,
None,
(Token.OPEXPR, [
(Token.MEMBER,
(Token.ID, 'y'),
None,
(Token.CALL, [], None)
)
]),
None)
])]
self.assertEqual(list(jsi.statements()), ast)
def test_morespace(self):
jsi = JSInterpreter('x = 2 ; return x;')
ast = [(Token.EXPR,
[(Token.ASSIGN,
_ASSIGN_OPERATORS['='][1],
(Token.OPEXPR, [(Token.MEMBER, (Token.ID, 'x'), None, None)]),
(Token.ASSIGN,
None,
(Token.OPEXPR, [(Token.MEMBER, (Token.INT, 2), None, None)]),
None)
)]
),
(Token.RETURN,
(Token.EXPR, [
(Token.ASSIGN,
None,
(Token.OPEXPR, [(Token.MEMBER, (Token.ID, 'x'), None, None)]),
None)
])
)]
self.assertEqual(list(jsi.statements()), ast)
def test_strange_chars(self):
jsi = JSInterpreter('var $_axY2 = $_axY1 + 1; return $_axY2;')
ast = [(Token.VAR,
zip(['$_axY2'],
[(Token.ASSIGN,
None,
(Token.OPEXPR, [
(Token.MEMBER, (Token.ID, '$_axY1'), None, None),
(Token.MEMBER, (Token.INT, 1), None, None),
(Token.OP, _OPERATORS['+'][1])
]),
None)
])
),
(Token.RETURN,
(Token.EXPR, [
(Token.ASSIGN,
None,
(Token.OPEXPR, [(Token.MEMBER, (Token.ID, '$_axY2'), None, None)]),
None)]
)
)]
self.assertEqual(list(jsi.statements()), ast)
@unittest.skip('Already have a bunch of these')
def test_operators(self):
jsi = JSInterpreter('return 1 << 5;')
ast = []
self.assertEqual(list(jsi.statements()), ast)
jsi = JSInterpreter('return 19 & 21;')
self.assertEqual(list(jsi.statements()), ast)
jsi = JSInterpreter('return 11 >> 2;')
self.assertEqual(list(jsi.statements()), ast)
def test_array_access(self):
jsi = JSInterpreter('var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;')
ast = [(Token.VAR,
zip(['x'],
[(Token.ASSIGN,
None,
(Token.OPEXPR, [
(Token.MEMBER, (Token.ARRAY, [
(Token.ASSIGN, None, (Token.OPEXPR, [
(Token.MEMBER, (Token.INT, 1), None, None)]), None),
(Token.ASSIGN, None, (Token.OPEXPR, [
(Token.MEMBER, (Token.INT, 2), None, None)]), None),
(Token.ASSIGN, None, (Token.OPEXPR, [
(Token.MEMBER, (Token.INT, 3), None, None)]), None)
]), None, None),
]),
None)
])
),
(Token.EXPR, [
(Token.ASSIGN,
_ASSIGN_OPERATORS['='][1],
(Token.OPEXPR, [
(Token.MEMBER, (Token.ID, 'x'),
None,
(Token.ELEM,
(Token.EXPR, [
(Token.ASSIGN,
None,
(Token.OPEXPR, [(Token.MEMBER, (Token.INT, 0), None, None)]),
None)
]),
None))
]),
(Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 4), None, None)]), None)
)
]),
(Token.EXPR, [
(Token.ASSIGN,
_ASSIGN_OPERATORS['='][1],
(Token.OPEXPR, [(Token.MEMBER, (Token.ID, 'x'),
None,
(Token.ELEM, (Token.EXPR, [
(Token.ASSIGN,
None,
(Token.OPEXPR, [(Token.MEMBER, (Token.INT, 0), None, None)]),
None)
]), None))
]),
(Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 5), None, None)]), None))
]),
(Token.EXPR, [
(Token.ASSIGN,
_ASSIGN_OPERATORS['='][1],
(Token.OPEXPR, [(Token.MEMBER, (Token.ID, 'x'),
None,
(Token.ELEM, (Token.EXPR, [
(Token.ASSIGN,
None,
(Token.OPEXPR, [(Token.MEMBER, (Token.INT, 2), None, None)]),
None)
]), None))
]),
(Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 7), None, None)]), None))
]),
(Token.RETURN,
(Token.EXPR, [
(Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.ID, 'x'), None, None)]), None)
])
)
]
self.assertEqual(list(jsi.statements()), ast)
@unittest.skip('Expression parsed as call argument!')
def test_parens(self):
jsi = JSInterpreter('return (1) + (2) * ((( (( (((((3)))))) )) ));')
ast = [(Token.RETURN, (Token.EXPR, [
(Token.ASSIGN, None,
(Token.OPEXPR, [
(Token.EXPR, [
(Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 1), None, None)]), None)
]),
(Token.EXPR, [
(Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 2), None, None)]), None)
]),
(Token.EXPR, [(Token.EXPR, [(Token.EXPR, [
(Token.EXPR, [(Token.EXPR, [
(Token.EXPR, [(Token.EXPR, [(Token.EXPR, [(Token.EXPR, [(Token.EXPR, [
(Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 3), None, None)]), None)
])])])])])
])])
])])])
]), None)
]))
]
self.assertEqual(list(jsi.statements()), ast)
jsi = JSInterpreter('return (1 + 2) * 3;')
ast = []
self.assertEqual(list(jsi.statements()), ast)
@unittest.skip('Incomplete test case')
def test_assignments(self):
jsi = JSInterpreter('var x = 20; x = 30 + 1; return x;')
ast = []
self.assertEqual(list(jsi.statements()), ast)
jsi = JSInterpreter('var x = 20; x += 30 + 1; return x;')
ast = []
self.assertEqual(list(jsi.statements()), ast)
jsi = JSInterpreter('var x = 20; x -= 30 + 1; return x;')
ast = []
self.assertEqual(list(jsi.statements()), ast)
@unittest.skip('Incomplete test case')
def test_comments(self):
# var x = 2; var y = 50; return x + y;
jsi = JSInterpreter('var x = /* 1 + */ 2; var y = /* 30 * 40 */ 50; return x + y;')
ast = []
self.assertEqual(list(jsi.statements()), ast)
# var x = "/*"; var y = 1 + 2; return y;
jsi = JSInterpreter('var x = "/*"; var y = 1 /* comment */ + 2; return y;')
ast = []
self.assertEqual(list(jsi.statements()), ast)
@unittest.skip('Incomplete test case')
def test_precedence(self):
jsi = JSInterpreter(' var a = [10, 20, 30, 40, 50]; var b = 6; a[0]=a[b%a.length]; return a;')
ast = []
self.assertEqual(list(jsi.statements()), ast)
@unittest.skip('Parsing function declaration not yet implemented')
def test_call(self):
jsi = JSInterpreter('''
function x() { return 2; }
function y(a) { return x() + a; }
function z() { return y(3); }
''')
self.assertEqual(jsi.call_function('z'), 5)
jsi = JSInterpreter('function x(a) { return a.split(""); }', objects={'a': 'abc'})
self.assertEqual(jsi.call_function('x'), ["a", "b", "c"])
return
jsi = JSInterpreter('''
function a(x) { return x; }
function b(x) { return x; }
function c() { return [a, b][0](0); }
''')
self.assertEqual(jsi.call_function('c'), 0)
def test_getfield(self):
jsi = JSInterpreter('return a.var;', objects={'a': {'var': 3}})
ast = [(Token.RETURN,
(Token.EXPR, [
(Token.ASSIGN,
None,
(Token.OPEXPR, [
(Token.MEMBER,
(Token.ID, 'a'),
None,
(Token.FIELD, 'var', None)),
]),
None)
]))
]
self.assertEqual(list(jsi.statements()), ast)
if __name__ == '__main__':
unittest.main()

View File

@ -240,7 +240,7 @@ class JSInterpreter(object):
# literals
else:
# TODO use tuple if CONST
return [peek_id, peek_value]
return (peek_id, peek_value)
# array
elif peek_id is Token.SOPEN:
return self._array_literal(token_stream, stack_top - 1)
@ -445,6 +445,7 @@ class JSInterpreter(object):
# TODO use context instead local_vars in argument
def getvalue(self, ref, local_vars):
ref = ref['get']
if ref is None or ref is self.undefined or isinstance(ref, (int, float, str)):
return ref
ref_id, ref_value = ref
@ -454,11 +455,11 @@ class JSInterpreter(object):
return ref_value
elif ref_id is Token.EXPR:
ref, _ = self.interpret_statement(ref_value, local_vars)
return self.getvalue(ref, local_vars)
return self.getvalue(ref['get'], local_vars)
elif ref_id is Token.ARRAY:
array = []
for expr in ref_value:
array.append(self.interpret_expression(expr, local_vars))
array.append(self.interpret_expression(expr, local_vars)['get'])
return array
else:
raise ExtractorError('Unable to get value of reference type %s' % ref_id)
@ -486,13 +487,13 @@ class JSInterpreter(object):
for stmt in block:
s, abort = self.interpret_statement(stmt, local_vars)
if s is not None:
ref = self.getvalue(s, local_vars)
ref = self.getvalue(s['get'], local_vars)
elif name is Token.VAR:
for name, value in stmt[1]:
local_vars[name] = self.getvalue(self.interpret_expression(value, local_vars), local_vars)
elif name is Token.EXPR:
for expr in stmt[1]:
ref = self.interpret_expression(expr, local_vars)
ref = self.interpret_expression(expr, local_vars)['get']
# if
# continue, break
elif name is Token.RETURN:
@ -501,7 +502,7 @@ class JSInterpreter(object):
ref = self.getvalue(ref, local_vars)
if isinstance(ref, list):
# TODO deal with nested arrays
ref = [self.getvalue(elem, local_vars) for elem in ref]
ref = [self.getvalue(elem if hasattr(elem, 'get') else {'get': elem}, local_vars) for elem in ref]
abort = True
# with
@ -512,24 +513,25 @@ class JSInterpreter(object):
# debugger
else:
raise ExtractorError('''Can't interpret statement called %s''' % name)
return ref, abort
return {'get': ref}, abort
def interpret_expression(self, expr, local_vars):
name = expr[0]
if name is Token.ASSIGN:
op, left, right = expr[1:]
if op is None:
ref = self.interpret_expression(left, local_vars)
ref = {'get': self.interpret_expression(left, local_vars)['get']}
else:
# TODO handle undeclared variables (create propery)
leftref = self.interpret_expression(left, local_vars)
leftvalue = self.getvalue(leftref, local_vars)
rightvalue = self.getvalue(self.interpret_expression(right, local_vars), local_vars)
# TODO set array element
self.putvalue(leftref, op(leftvalue, rightvalue), local_vars)
ref = leftref
leftref['set'](op(leftvalue, rightvalue))
ref = {'get': left}
elif name is Token.EXPR:
ref, _ = self.interpret_statement(expr, local_vars)
ref = {'get': ref['get']}
elif name is Token.OPEXPR:
stack = []
rpn = expr[1][:]
@ -553,6 +555,7 @@ class JSInterpreter(object):
elif name is Token.MEMBER:
# TODO interpret member
target, args, tail = expr[1:]
ref = {}
while tail is not None:
tail_name, tail_value, tail = tail
if tail_name is Token.FIELD:
@ -561,19 +564,27 @@ class JSInterpreter(object):
elif tail_name is Token.ELEM:
# TODO interpret element
# raise ExtractorError('''Can't interpret expression called %s''' % tail_name)
ret, _ = self.interpret_statement(tail_value, local_vars)
index = self.getvalue(ret, local_vars)
target = self.getvalue(target, local_vars)
target = self.interpret_expression((Token.MEMBER, target[index], args, tail), local_vars)
index, _ = self.interpret_statement(tail_value, local_vars)
index = self.getvalue(index, local_vars)
target = self.getvalue({'get': target}, local_vars)
def make_setter(t):
def setter(v):
t.__setitem__(index, v)
return setter
ref['set'] = make_setter(target)
target = self.interpret_expression((Token.MEMBER, target[index], args, tail), local_vars)['get']
elif tail_name is Token.CALL:
# TODO interpret call
raise ExtractorError('''Can't interpret expression called %s''' % tail_name)
ref = target
ref['get'] = target
elif name in (Token.ID, Token.ARRAY):
ref = self.getvalue(expr, local_vars)
ref = {'get': self.getvalue(expr, local_vars),
'set': lambda v: local_vars.__setitem__(name, v)}
# literal
elif name in _token_keys:
ref = expr
ref = {'get': expr}
else:
raise ExtractorError('''Can't interpret expression called %s''' % name)
@ -624,5 +635,5 @@ class JSInterpreter(object):
res, abort = self.interpret_statement(stmt, local_vars)
if abort:
break
return res
return res['get']
return resf