From 599b9db8784b3d817988b399d197811c8add067a Mon Sep 17 00:00:00 2001 From: sulyi Date: Thu, 8 Dec 2016 03:55:23 +0100 Subject: [PATCH] [jsinterp] First parser tests --- test/test_jsinterp_parser.py | 309 ++++++++++++++++++++++++++++++++ youtube_dl/jsinterp/jsinterp.py | 47 +++-- 2 files changed, 338 insertions(+), 18 deletions(-) create mode 100644 test/test_jsinterp_parser.py diff --git a/test/test_jsinterp_parser.py b/test/test_jsinterp_parser.py new file mode 100644 index 000000000..d4b70126c --- /dev/null +++ b/test/test_jsinterp_parser.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.jsinterp import JSInterpreter +from youtube_dl.jsinterp.jsgrammar import Token +from youtube_dl.jsinterp.tstream import ( + _OPERATORS, + _ASSIGN_OPERATORS, + _LOGICAL_OPERATORS, + _UNARY_OPERATORS, + _RELATIONS +) + + +class TestJSInterpreterParser(unittest.TestCase): + def test_basic(self): + jsi = JSInterpreter(';') + ast = [None] + self.assertEqual(list(jsi.statements()), ast) + + jsi = JSInterpreter('return 42;') + ast = [(Token.RETURN, + (Token.EXPR, [ + (Token.ASSIGN, + None, + (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 42), None, None)]), + None) + ]) + )] + self.assertEqual(list(jsi.statements()), ast) + + def test_calc(self): + jsi = JSInterpreter('return 2*a+1;') + ast = [(Token.RETURN, + (Token.EXPR, [ + (Token.ASSIGN, + None, + (Token.OPEXPR, [ + # Reverse Polish Notation! + (Token.MEMBER, (Token.INT, 2), None, None), + (Token.MEMBER, (Token.ID, 'a'), None, None), + (Token.OP, _OPERATORS['*'][1]), + (Token.MEMBER, (Token.INT, 1), None, None), + (Token.OP, _OPERATORS['+'][1]), + ]), + None) + ]) + )] + self.assertEqual(list(jsi.statements()), ast) + + def test_empty_return(self): + jsi = JSInterpreter('return; y()') + ast = [(Token.RETURN, + (Token.EXPR, [ + (Token.ASSIGN, + None, + (Token.OPEXPR, [(Token.MEMBER, None, None, None)]), + None) + ])), + (Token.EXPR, [ + (Token.ASSIGN, + None, + (Token.OPEXPR, [ + (Token.MEMBER, + (Token.ID, 'y'), + None, + (Token.CALL, [], None) + ) + ]), + None) + ])] + self.assertEqual(list(jsi.statements()), ast) + + def test_morespace(self): + jsi = JSInterpreter('x = 2 ; return x;') + ast = [(Token.EXPR, + [(Token.ASSIGN, + _ASSIGN_OPERATORS['='][1], + (Token.OPEXPR, [(Token.MEMBER, (Token.ID, 'x'), None, None)]), + (Token.ASSIGN, + None, + (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 2), None, None)]), + None) + )] + ), + (Token.RETURN, + (Token.EXPR, [ + (Token.ASSIGN, + None, + (Token.OPEXPR, [(Token.MEMBER, (Token.ID, 'x'), None, None)]), + None) + ]) + )] + self.assertEqual(list(jsi.statements()), ast) + + def test_strange_chars(self): + jsi = JSInterpreter('var $_axY2 = $_axY1 + 1; return $_axY2;') + ast = [(Token.VAR, + zip(['$_axY2'], + [(Token.ASSIGN, + None, + (Token.OPEXPR, [ + (Token.MEMBER, (Token.ID, '$_axY1'), None, None), + (Token.MEMBER, (Token.INT, 1), None, None), + (Token.OP, _OPERATORS['+'][1]) + ]), + None) + ]) + ), + (Token.RETURN, + (Token.EXPR, [ + (Token.ASSIGN, + None, + (Token.OPEXPR, [(Token.MEMBER, (Token.ID, '$_axY2'), None, None)]), + None)] + ) + )] + self.assertEqual(list(jsi.statements()), ast) + + @unittest.skip('Already have a bunch of these') + def test_operators(self): + jsi = JSInterpreter('return 1 << 5;') + ast = [] + self.assertEqual(list(jsi.statements()), ast) + + jsi = JSInterpreter('return 19 & 21;') + self.assertEqual(list(jsi.statements()), ast) + + jsi = JSInterpreter('return 11 >> 2;') + self.assertEqual(list(jsi.statements()), ast) + + def test_array_access(self): + jsi = JSInterpreter('var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;') + ast = [(Token.VAR, + zip(['x'], + [(Token.ASSIGN, + None, + (Token.OPEXPR, [ + (Token.MEMBER, (Token.ARRAY, [ + (Token.ASSIGN, None, (Token.OPEXPR, [ + (Token.MEMBER, (Token.INT, 1), None, None)]), None), + (Token.ASSIGN, None, (Token.OPEXPR, [ + (Token.MEMBER, (Token.INT, 2), None, None)]), None), + (Token.ASSIGN, None, (Token.OPEXPR, [ + (Token.MEMBER, (Token.INT, 3), None, None)]), None) + ]), None, None), + ]), + None) + ]) + ), + (Token.EXPR, [ + (Token.ASSIGN, + _ASSIGN_OPERATORS['='][1], + (Token.OPEXPR, [ + (Token.MEMBER, (Token.ID, 'x'), + None, + (Token.ELEM, + (Token.EXPR, [ + (Token.ASSIGN, + None, + (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 0), None, None)]), + None) + ]), + None)) + ]), + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 4), None, None)]), None) + ) + ]), + (Token.EXPR, [ + (Token.ASSIGN, + _ASSIGN_OPERATORS['='][1], + (Token.OPEXPR, [(Token.MEMBER, (Token.ID, 'x'), + None, + (Token.ELEM, (Token.EXPR, [ + (Token.ASSIGN, + None, + (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 0), None, None)]), + None) + ]), None)) + ]), + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 5), None, None)]), None)) + ]), + (Token.EXPR, [ + (Token.ASSIGN, + _ASSIGN_OPERATORS['='][1], + (Token.OPEXPR, [(Token.MEMBER, (Token.ID, 'x'), + None, + (Token.ELEM, (Token.EXPR, [ + (Token.ASSIGN, + None, + (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 2), None, None)]), + None) + ]), None)) + ]), + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 7), None, None)]), None)) + ]), + (Token.RETURN, + (Token.EXPR, [ + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.ID, 'x'), None, None)]), None) + ]) + ) + ] + self.assertEqual(list(jsi.statements()), ast) + + @unittest.skip('Expression parsed as call argument!') + def test_parens(self): + jsi = JSInterpreter('return (1) + (2) * ((( (( (((((3)))))) )) ));') + ast = [(Token.RETURN, (Token.EXPR, [ + (Token.ASSIGN, None, + (Token.OPEXPR, [ + (Token.EXPR, [ + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 1), None, None)]), None) + ]), + (Token.EXPR, [ + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 2), None, None)]), None) + ]), + (Token.EXPR, [(Token.EXPR, [(Token.EXPR, [ + (Token.EXPR, [(Token.EXPR, [ + (Token.EXPR, [(Token.EXPR, [(Token.EXPR, [(Token.EXPR, [(Token.EXPR, [ + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 3), None, None)]), None) + ])])])])]) + ])]) + ])])]) + ]), None) + ])) + + ] + self.assertEqual(list(jsi.statements()), ast) + + jsi = JSInterpreter('return (1 + 2) * 3;') + ast = [] + self.assertEqual(list(jsi.statements()), ast) + + @unittest.skip('Incomplete test case') + def test_assignments(self): + jsi = JSInterpreter('var x = 20; x = 30 + 1; return x;') + ast = [] + self.assertEqual(list(jsi.statements()), ast) + + jsi = JSInterpreter('var x = 20; x += 30 + 1; return x;') + ast = [] + self.assertEqual(list(jsi.statements()), ast) + + jsi = JSInterpreter('var x = 20; x -= 30 + 1; return x;') + ast = [] + self.assertEqual(list(jsi.statements()), ast) + + @unittest.skip('Incomplete test case') + def test_comments(self): + # var x = 2; var y = 50; return x + y; + jsi = JSInterpreter('var x = /* 1 + */ 2; var y = /* 30 * 40 */ 50; return x + y;') + ast = [] + self.assertEqual(list(jsi.statements()), ast) + + # var x = "/*"; var y = 1 + 2; return y; + jsi = JSInterpreter('var x = "/*"; var y = 1 /* comment */ + 2; return y;') + ast = [] + self.assertEqual(list(jsi.statements()), ast) + + @unittest.skip('Incomplete test case') + def test_precedence(self): + jsi = JSInterpreter(' var a = [10, 20, 30, 40, 50]; var b = 6; a[0]=a[b%a.length]; return a;') + ast = [] + self.assertEqual(list(jsi.statements()), ast) + + @unittest.skip('Parsing function declaration not yet implemented') + def test_call(self): + jsi = JSInterpreter(''' + function x() { return 2; } + function y(a) { return x() + a; } + function z() { return y(3); } + ''') + self.assertEqual(jsi.call_function('z'), 5) + jsi = JSInterpreter('function x(a) { return a.split(""); }', objects={'a': 'abc'}) + self.assertEqual(jsi.call_function('x'), ["a", "b", "c"]) + return + jsi = JSInterpreter(''' + function a(x) { return x; } + function b(x) { return x; } + function c() { return [a, b][0](0); } + ''') + self.assertEqual(jsi.call_function('c'), 0) + + def test_getfield(self): + jsi = JSInterpreter('return a.var;', objects={'a': {'var': 3}}) + ast = [(Token.RETURN, + (Token.EXPR, [ + (Token.ASSIGN, + None, + (Token.OPEXPR, [ + (Token.MEMBER, + (Token.ID, 'a'), + None, + (Token.FIELD, 'var', None)), + ]), + None) + ])) + ] + self.assertEqual(list(jsi.statements()), ast) + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/jsinterp/jsinterp.py b/youtube_dl/jsinterp/jsinterp.py index bb7d5e572..05784d99d 100644 --- a/youtube_dl/jsinterp/jsinterp.py +++ b/youtube_dl/jsinterp/jsinterp.py @@ -240,7 +240,7 @@ class JSInterpreter(object): # literals else: # TODO use tuple if CONST - return [peek_id, peek_value] + return (peek_id, peek_value) # array elif peek_id is Token.SOPEN: return self._array_literal(token_stream, stack_top - 1) @@ -445,6 +445,7 @@ class JSInterpreter(object): # TODO use context instead local_vars in argument def getvalue(self, ref, local_vars): + ref = ref['get'] if ref is None or ref is self.undefined or isinstance(ref, (int, float, str)): return ref ref_id, ref_value = ref @@ -454,11 +455,11 @@ class JSInterpreter(object): return ref_value elif ref_id is Token.EXPR: ref, _ = self.interpret_statement(ref_value, local_vars) - return self.getvalue(ref, local_vars) + return self.getvalue(ref['get'], local_vars) elif ref_id is Token.ARRAY: array = [] for expr in ref_value: - array.append(self.interpret_expression(expr, local_vars)) + array.append(self.interpret_expression(expr, local_vars)['get']) return array else: raise ExtractorError('Unable to get value of reference type %s' % ref_id) @@ -486,13 +487,13 @@ class JSInterpreter(object): for stmt in block: s, abort = self.interpret_statement(stmt, local_vars) if s is not None: - ref = self.getvalue(s, local_vars) + ref = self.getvalue(s['get'], local_vars) elif name is Token.VAR: for name, value in stmt[1]: local_vars[name] = self.getvalue(self.interpret_expression(value, local_vars), local_vars) elif name is Token.EXPR: for expr in stmt[1]: - ref = self.interpret_expression(expr, local_vars) + ref = self.interpret_expression(expr, local_vars)['get'] # if # continue, break elif name is Token.RETURN: @@ -501,7 +502,7 @@ class JSInterpreter(object): ref = self.getvalue(ref, local_vars) if isinstance(ref, list): # TODO deal with nested arrays - ref = [self.getvalue(elem, local_vars) for elem in ref] + ref = [self.getvalue(elem if hasattr(elem, 'get') else {'get': elem}, local_vars) for elem in ref] abort = True # with @@ -512,24 +513,25 @@ class JSInterpreter(object): # debugger else: raise ExtractorError('''Can't interpret statement called %s''' % name) - return ref, abort + return {'get': ref}, abort def interpret_expression(self, expr, local_vars): name = expr[0] if name is Token.ASSIGN: op, left, right = expr[1:] if op is None: - ref = self.interpret_expression(left, local_vars) + ref = {'get': self.interpret_expression(left, local_vars)['get']} else: # TODO handle undeclared variables (create propery) leftref = self.interpret_expression(left, local_vars) leftvalue = self.getvalue(leftref, local_vars) rightvalue = self.getvalue(self.interpret_expression(right, local_vars), local_vars) # TODO set array element - self.putvalue(leftref, op(leftvalue, rightvalue), local_vars) - ref = leftref + leftref['set'](op(leftvalue, rightvalue)) + ref = {'get': left} elif name is Token.EXPR: ref, _ = self.interpret_statement(expr, local_vars) + ref = {'get': ref['get']} elif name is Token.OPEXPR: stack = [] rpn = expr[1][:] @@ -553,6 +555,7 @@ class JSInterpreter(object): elif name is Token.MEMBER: # TODO interpret member target, args, tail = expr[1:] + ref = {} while tail is not None: tail_name, tail_value, tail = tail if tail_name is Token.FIELD: @@ -561,19 +564,27 @@ class JSInterpreter(object): elif tail_name is Token.ELEM: # TODO interpret element # raise ExtractorError('''Can't interpret expression called %s''' % tail_name) - ret, _ = self.interpret_statement(tail_value, local_vars) - index = self.getvalue(ret, local_vars) - target = self.getvalue(target, local_vars) - target = self.interpret_expression((Token.MEMBER, target[index], args, tail), local_vars) + index, _ = self.interpret_statement(tail_value, local_vars) + index = self.getvalue(index, local_vars) + target = self.getvalue({'get': target}, local_vars) + + def make_setter(t): + def setter(v): + t.__setitem__(index, v) + return setter + + ref['set'] = make_setter(target) + target = self.interpret_expression((Token.MEMBER, target[index], args, tail), local_vars)['get'] elif tail_name is Token.CALL: # TODO interpret call raise ExtractorError('''Can't interpret expression called %s''' % tail_name) - ref = target + ref['get'] = target elif name in (Token.ID, Token.ARRAY): - ref = self.getvalue(expr, local_vars) + ref = {'get': self.getvalue(expr, local_vars), + 'set': lambda v: local_vars.__setitem__(name, v)} # literal elif name in _token_keys: - ref = expr + ref = {'get': expr} else: raise ExtractorError('''Can't interpret expression called %s''' % name) @@ -624,5 +635,5 @@ class JSInterpreter(object): res, abort = self.interpret_statement(stmt, local_vars) if abort: break - return res + return res['get'] return resf