From 86de1e89db5710284cb479603f3d64322a989c51 Mon Sep 17 00:00:00 2001 From: sulyi Date: Sat, 10 Dec 2016 22:57:02 +0100 Subject: [PATCH] [jsinterp] Adding function declaration and fixing block statement parser --- test/test_jsinterp_parser.py | 99 +++++++++++++++++++++++++++++--- youtube_dl/jsinterp/jsgrammar.py | 1 + youtube_dl/jsinterp/jsinterp.py | 59 ++++++++++++++++--- 3 files changed, 143 insertions(+), 16 deletions(-) diff --git a/test/test_jsinterp_parser.py b/test/test_jsinterp_parser.py index b62442d8c..7cf32ba3d 100644 --- a/test/test_jsinterp_parser.py +++ b/test/test_jsinterp_parser.py @@ -64,7 +64,7 @@ class TestJSInterpreterParser(unittest.TestCase): (Token.MEMBER, (Token.ID, 'a'), None, None), (Token.OP, _OPERATORS['*'][1]), (Token.MEMBER, (Token.INT, 1), None, None), - (Token.OP, _OPERATORS['+'][1]), + (Token.OP, _OPERATORS['+'][1]) ]), None) ]) @@ -505,7 +505,6 @@ class TestJSInterpreterParser(unittest.TestCase): ] self.assertEqual(list(traverse(list(jsi.statements()))), list(traverse(ast))) - @unittest.skip('Parsing function declaration not yet implemented') def test_call(self): jsi = JSInterpreter(''' function x() { return 2; } @@ -513,21 +512,107 @@ class TestJSInterpreterParser(unittest.TestCase): function z() { return y(3); } ''') - ast = [] + ast = [ + (Token.FUNC, 'x', + [], + (Token.BLOCK, [ + (Token.RETURN, (Token.EXPR, [ + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 2), None, None)]), None) + ]) + ) + ])), + (Token.FUNC, 'y', + ['a'], + (Token.BLOCK, [ + (Token.RETURN, (Token.EXPR, [ + (Token.ASSIGN, None, + (Token.OPEXPR, [ + # Not sure about this one + (Token.MEMBER, (Token.ID, 'x'), None, (Token.CALL, [], None)), + (Token.MEMBER, (Token.ID, 'a'), None, None), + (Token.OP, _OPERATORS['+'][1]) + ]), None) + ]) + ) + ])), + (Token.FUNC, 'z', + [], + (Token.BLOCK, [ + (Token.RETURN, (Token.EXPR, [ + (Token.ASSIGN, None, (Token.OPEXPR, [ + # Not sure about this one + (Token.MEMBER, (Token.ID, 'y'), None, (Token.CALL, [ + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 3), None, None)]), None) + ], None)) + ]), None) + ]) + ) + ])) + ] self.assertEqual(list(jsi.statements()), ast) - jsi = JSInterpreter('function x(a) { return a.split(""); }', variables={'a': 'abc'}) - ast = [] + ast = [ + (Token.FUNC, 'x', + ['a'], + (Token.BLOCK, [ + (Token.RETURN, (Token.EXPR, [ + (Token.ASSIGN, None, (Token.OPEXPR, [ + (Token.MEMBER, (Token.ID, 'a'), None, + (Token.FIELD, 'split', + (Token.CALL, [ + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.STR, ''), None, None)]), None) + ], None)) + )]), + None) + ]) + ) + ])) + ] self.assertEqual(list(jsi.statements()), ast) - @unittest.skip('Parsing function declaration not yet implemented') def test_complex_call(self): jsi = JSInterpreter(''' function a(x) { return x; } function b(x) { return x; } function c() { return [a, b][0](0); } ''') - ast = [] + ast = [ + (Token.FUNC, 'a', + ['x'], + (Token.BLOCK, [ + (Token.RETURN, (Token.EXPR, [ + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.ID, 'x'), None, None)]), None) + ]) + ) + ])), + (Token.FUNC, 'b', + ['x'], + (Token.BLOCK, [ + (Token.RETURN, (Token.EXPR, [ + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.ID, 'x'), None, None)]), None) + ]) + ) + ])), + (Token.FUNC, 'c', + [], + (Token.BLOCK, [ + (Token.RETURN, (Token.EXPR, [ + (Token.ASSIGN, None, (Token.OPEXPR, [ + (Token.MEMBER, (Token.ARRAY, [ + (Token.ASSIGN, None, (Token.OPEXPR, [ + (Token.MEMBER, (Token.ID, 'a'), None, None)]), None), + (Token.ASSIGN, None, (Token.OPEXPR, [ + (Token.MEMBER, (Token.ID, 'b'), None, None)]), None) + ]), None, (Token.ELEM, (Token.EXPR, [ + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 0), None, None)]), None) + ]), (Token.CALL, [ + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 0), None, None)]), None) + ], None))) + ]), None) + ]) + ) + ])), + ] self.assertEqual(list(jsi.statements()), ast) def test_getfield(self): diff --git a/youtube_dl/jsinterp/jsgrammar.py b/youtube_dl/jsinterp/jsgrammar.py index d9daf3362..570d4162f 100644 --- a/youtube_dl/jsinterp/jsgrammar.py +++ b/youtube_dl/jsinterp/jsgrammar.py @@ -13,6 +13,7 @@ _token_keys = ('COPEN', 'CCLOSE', 'POPEN', 'PCLOSE', 'SOPEN', 'SCLOSE', 'COMMENT', 'TOKEN', 'PUNCT', 'NULL', 'BOOL', 'ID', 'STR', 'INT', 'FLOAT', 'REGEX', 'REFLAGS', 'REBODY', + 'FUNC', 'BLOCK', 'VAR', 'EXPR', 'IF', 'ITER', 'CONTINUE', 'BREAK', 'RETURN', 'WITH', 'LABEL', 'SWITCH', 'THROW', 'TRY', 'DEBUG', 'ASSIGN', 'MEMBER', 'FIELD', 'ELEM', 'CALL', 'ARRAY', 'COND', 'OPEXPR', diff --git a/youtube_dl/jsinterp/jsinterp.py b/youtube_dl/jsinterp/jsinterp.py index dc468dbe7..77df5a3c7 100644 --- a/youtube_dl/jsinterp/jsinterp.py +++ b/youtube_dl/jsinterp/jsinterp.py @@ -69,18 +69,53 @@ class JSInterpreter(object): # empty statement goes straight here return statement if token_id is Token.ID and token_value == 'function': - # TODO parse funcdecl - raise ExtractorError('Function declaration is not yet supported at %d' % token_pos) - elif token_id is Token.COPEN: - # block token_stream.pop() - statement_list = [] - for s in self.statements(token_stream, stack_top - 1): - statement_list.append(s) + token_stream.chk_id() + token_id, name, token_pos = token_stream.pop() + token_id, token_value, token_pos = token_stream.pop() + if token_id is Token.POPEN: + open_pos = token_pos + else: + raise ExtractorError('Expected argument list at %d' % token_pos) + + args = [] + while True: token_id, token_value, token_pos = token_stream.peek() - if token_id is Token.CCLOSE: + if token_id is Token.PCLOSE: token_stream.pop() break + token_stream.chk_id() + token_stream.pop() + args.append(token_value) + token_id, token_value, token_pos = token_stream.peek() + if token_id is Token.COMMA: + token_stream.pop() + elif token_id is Token.PCLOSE: + pass + elif token_id is Token.END and token_stream.ended: + raise ExtractorError('Unbalanced parentheses at %d' % open_pos) + else: + raise ExtractorError('Expected , separator at %d' % token_pos) + + token_id, token_value, token_pos = token_stream.peek() + if token_id is not Token.COPEN: + raise ExtractorError('Expected function body at %d' % token_pos) + + statement = (Token.FUNC, name, args, self._next_statement(token_stream, stack_top - 1)) + elif token_id is Token.COPEN: + # block + open_pos = token_pos + token_stream.pop() + statement_list = [] + while True: + statement_list.append(self._next_statement(token_stream, stack_top - 1)) + token_stream.pop() + token_id, token_value, token_pos = token_stream.peek() + if token_id is Token.CCLOSE: + # TODO handle unmatched Token.COPEN + break + elif token_id is Token.END and token_stream.ended: + raise ExtractorError('Unbalanced parentheses at %d' % open_pos) statement = (Token.BLOCK, statement_list) elif token_id is Token.ID: # TODO parse label @@ -322,8 +357,14 @@ class JSInterpreter(object): # TODO parse generator expression peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id not in (Token.COMMA, Token.PCLOSE): + if peek_id is Token.COMMA: + token_stream.pop() + elif peek_id is Token.PCLOSE: + pass + elif peek_id is Token.END and token_stream.ended: raise ExtractorError('Unbalanced parentheses at %d' % open_pos) + else: + raise ExtractorError('Expected , separator at %d' % peek_pos) def _array_literal(self, token_stream, stack_top): if stack_top < 0: