diff --git a/test/test_jsinterp_parser.py b/test/test_jsinterp_parser.py index 11534a8b9..feccb2cce 100644 --- a/test/test_jsinterp_parser.py +++ b/test/test_jsinterp_parser.py @@ -842,19 +842,44 @@ class TestJSInterpreterParser(unittest.TestCase): ast = [] self.assertEqual(list(jsi.statements()), ast) - @unittest.skip('Test not yet implemented: missing ast') def test_do(self): - # ASAP do statement test jsi = JSInterpreter(''' function f(x){ - i = 1 + i = 1; do{ i++; - } while (i < x) + } while (i < x); return i; } ''') - ast = [] + ast = [ + (Token.FUNC, 'f', ['x'], + (Token.BLOCK, [ + (Token.EXPR, [ + (Token.ASSIGN, _ASSIGN_OPERATORS['='][1], + (Token.OPEXPR, [(Token.MEMBER, (Token.ID, 'i'), None, None)]), + (Token.ASSIGN, None, (Token.OPEXPR, [(Token.MEMBER, (Token.INT, 1), None, None)]), None)) + ]), + (Token.DO, + (Token.EXPR, [ + (Token.ASSIGN, None, (Token.OPEXPR, [ + (Token.MEMBER, (Token.ID, 'i'), None, None), + (Token.MEMBER, (Token.ID, 'x'), None, None), + (Token.REL, _RELATIONS['<'][1]) + ]), None) + ]), + (Token.BLOCK, [ + (Token.EXPR, [ + (Token.ASSIGN, None, (Token.OPEXPR, [ + (Token.MEMBER, (Token.ID, 'i'), None, None), + (Token.UOP, _UNARY_OPERATORS['++'][1]) + ]), None) + ]) + ])), + (Token.RETURN, (Token.EXPR, [(Token.ASSIGN, None, (Token.OPEXPR, [ + (Token.MEMBER, (Token.ID, 'i'), None, None)]), None)])) + ])) + ] self.assertEqual(list(jsi.statements()), ast) @unittest.skip('Test not yet implemented: missing ast') diff --git a/youtube_dl/jsinterp/jsgrammar.py b/youtube_dl/jsinterp/jsgrammar.py index 262b5ca6b..c38693762 100644 --- a/youtube_dl/jsinterp/jsgrammar.py +++ b/youtube_dl/jsinterp/jsgrammar.py @@ -14,8 +14,8 @@ _token_keys = ('COPEN', 'CCLOSE', 'POPEN', 'PCLOSE', 'SOPEN', 'SCLOSE', 'NULL', 'BOOL', 'ID', 'STR', 'INT', 'FLOAT', 'REGEX', 'OBJECT', 'REFLAGS', 'REBODY', 'FUNC', - 'BLOCK', 'VAR', 'EXPR', 'IF', 'ITER', 'CONTINUE', 'BREAK', 'RETURN', 'WITH', 'LABEL', 'SWITCH', - 'THROW', 'TRY', 'DEBUG', + 'BLOCK', 'VAR', 'EXPR', 'IF', 'FOR', 'DO', 'WHILE', 'CONTINUE', 'BREAK', 'RETURN', + 'WITH', 'LABEL', 'SWITCH', 'THROW', 'TRY', 'DEBUG', 'ASSIGN', 'MEMBER', 'FIELD', 'ELEM', 'CALL', 'ARRAY', 'COND', 'OPEXPR', 'PROPGET', 'PROPSET', 'PROPVALUE', 'RSV') diff --git a/youtube_dl/jsinterp/jsinterp.py b/youtube_dl/jsinterp/jsinterp.py index 0acce6321..43947cc99 100644 --- a/youtube_dl/jsinterp/jsinterp.py +++ b/youtube_dl/jsinterp/jsinterp.py @@ -61,12 +61,11 @@ class JSInterpreter(object): if code is None: code = self.code ts = TokenStream(code, pos) - while not ts.ended: - yield self._next_statement(ts, stack_size) + yield self._statement(ts, stack_size) raise StopIteration - def _next_statement(self, token_stream, stack_top): + def _statement(self, token_stream, stack_top): if stack_top < 0: raise ExtractorError('Recursion limit reached') # ast @@ -85,7 +84,7 @@ class JSInterpreter(object): # block elif token_id is Token.COPEN: - # XXX refactor will deprecate some _next_statement calls + # XXX refactor will deprecate some _statement calls open_pos = token_pos token_stream.pop() block = [] @@ -96,7 +95,7 @@ class JSInterpreter(object): break elif token_id is Token.END and token_stream.ended: raise ExtractorError('Unbalanced parentheses at %d' % open_pos) - block.append(self._next_statement(token_stream, stack_top - 1)) + block.append(self._statement(token_stream, stack_top - 1)) statement = (Token.BLOCK, block) @@ -137,13 +136,13 @@ class JSInterpreter(object): elif token_value == 'if': statement = self._if_statement(token_stream, stack_top - 1) - elif token_value is 'for': + elif token_value == 'for': statement = self._for_loop(token_stream, stack_top - 1) - elif token_value is 'do': + elif token_value == 'do': statement = self._do_loop(token_stream, stack_top - 1) - elif token_value is 'while': + elif token_value == 'while': statement = self._while_loop(token_stream, stack_top - 1) elif token_value in ('break', 'continue'): @@ -209,7 +208,7 @@ class JSInterpreter(object): if token_id is Token.COLON: token_id, label_name, token_pos = token_stream.pop(2) token_stream.chk_id(last=True) - statement = (Token.LABEL, label_name, self._next_statement(token_stream, stack_top - 1)) + statement = (Token.LABEL, label_name, self._statement(token_stream, stack_top - 1)) # expr if statement is None: @@ -230,12 +229,12 @@ class JSInterpreter(object): raise ExtractorError('Missing condition at %d' % token_pos) cond_expr = self._expression(token_stream, stack_top - 1) token_stream.pop() # Token.PCLOSE - true_expr = self._next_statement(token_stream, stack_top - 1) + true_expr = self._statement(token_stream, stack_top - 1) false_expr = None token_id, token_value, token_pos = token_stream.peek() if token_id is Token.ID and token_value == 'else': token_stream.pop() - false_expr = self._next_statement(token_stream, stack_top - 1) + false_expr = self._statement(token_stream, stack_top - 1) return (Token.IF, cond_expr, true_expr, false_expr) def _for_loop(self, token_stream, stack_top): @@ -250,7 +249,7 @@ class JSInterpreter(object): init = None elif token_id.ID and token_value == 'var': # XXX refactor (create dedicated method for handling variable declaration list) - init = self._next_statement(token_stream, stack_top - 1) + init = self._statement(token_stream, stack_top - 1) else: init = self._expression(token_stream, stack_top - 1) token_id, token_value, token_pos = token_stream.pop() @@ -274,12 +273,12 @@ class JSInterpreter(object): token_id, token_value, token_pos = token_stream.pop() if token_id is not Token.PCLOSE: raise ExtractorError('''Expected ')' at %d''' % token_pos) - body = self._next_statement(token_stream, stack_top - 1) + body = self._statement(token_stream, stack_top - 1) return (Token.FOR, init, cond, incr, body) def _do_loop(self, token_stream, stack_top): token_stream.pop() - body = self._next_statement(token_stream, stack_top - 1) + body = self._statement(token_stream, stack_top - 1) token_id, token_value, token_pos = token_stream.pop() if token_id is not Token.ID and token_value != 'while': raise ExtractorError('''Expected 'while' at %d''' % token_pos) @@ -307,7 +306,7 @@ class JSInterpreter(object): token_id, token_value, token_pos = token_stream.pop() if token_id is not Token.PCLOSE: raise ExtractorError('''Expected ')' at %d''' % token_pos) - body = self._next_statement(token_stream, stack_top) + body = self._statement(token_stream, stack_top) return (Token.DO, expr, body) def _return_statement(self, token_stream, stack_top): @@ -324,7 +323,7 @@ class JSInterpreter(object): raise ExtractorError('Missing expression at %d' % token_pos) expr = self._expression(token_stream, stack_top - 1) token_stream.pop() # Token.PCLOSE - return (Token.WITH, expr, self._next_statement(token_stream, stack_top - 1)) + return (Token.WITH, expr, self._statement(token_stream, stack_top - 1)) def _switch_statement(self, token_stream, stack_top): token_stream.pop() @@ -371,7 +370,7 @@ class JSInterpreter(object): break elif token_id is Token.END and token_stream.ended: raise ExtractorError('Unbalanced parentheses at %d' % open_pos) - statement_list.append(self._next_statement(token_stream, stack_top - 1)) + statement_list.append(self._statement(token_stream, stack_top - 1)) block.append((expr, statement_list)) token_stream.pop() @@ -382,7 +381,7 @@ class JSInterpreter(object): token_id, token_value, token_pos = token_stream.peek() if token_id is not Token.COPEN: raise ExtractorError('Block is expected at %d' % token_pos) - try_block = self._next_statement(token_stream, stack_top - 1) + try_block = self._statement(token_stream, stack_top - 1) token_id, token_value, token_pos = token_stream.pop() catch_block = None if token_id is Token.ID and token_value == 'catch': @@ -398,13 +397,13 @@ class JSInterpreter(object): token_id, token_value, token_pos = token_stream.peek() if token_id is not Token.COPEN: raise ExtractorError('Block is expected at %d' % token_pos) - catch_block = (error_name, self._next_statement(token_stream, stack_top - 1)) + catch_block = (error_name, self._statement(token_stream, stack_top - 1)) finally_block = None if token_id is Token.ID and token_value == 'finally': token_id, token_value, token_pos = token_stream.peek() if token_id is not Token.COPEN: raise ExtractorError('Block is expected at %d' % token_pos) - finally_block = self._next_statement(token_stream, stack_top - 1) + finally_block = self._statement(token_stream, stack_top - 1) if catch_block is None and finally_block is None: raise ExtractorError('Try statement is expecting catch or finally at %d' % token_pos) return (Token.TRY, try_block, catch_block, finally_block) @@ -570,7 +569,7 @@ class JSInterpreter(object): if token_id is not Token.COPEN: raise ExtractorError('Expected function body at %d' % token_pos) - return (Token.FUNC, name, args, self._next_statement(token_stream, stack_top - 1)) + return (Token.FUNC, name, args, self._statement(token_stream, stack_top - 1)) def _arguments(self, token_stream, stack_top): if stack_top < 0: @@ -600,7 +599,7 @@ class JSInterpreter(object): elif peek_id is Token.END and token_stream.ended: raise ExtractorError('Unbalanced parentheses at %d' % open_pos) else: - raise ExtractorError('Expected , separator at %d' % peek_pos) + raise ExtractorError('''Expected ',' separator at %d''' % peek_pos) def _array_literal(self, token_stream, stack_top): if stack_top < 0: @@ -631,7 +630,7 @@ class JSInterpreter(object): if peek_id is Token.SCLOSE: has_another = False elif peek_id is not Token.COMMA: - raise ExtractorError('Expected , after element at %d' % peek_pos) + raise ExtractorError('''Expected ',' after element at %d''' % peek_pos) return (Token.ARRAY, elements) @@ -664,9 +663,9 @@ class JSInterpreter(object): raise ExtractorError('''Expected ')' at %d''' % token_pos) if is_set: - desc = (Token.PROPSET, arg, self._next_statement(token_stream, stack_top - 1)) + desc = (Token.PROPSET, arg, self._statement(token_stream, stack_top - 1)) else: - desc = (Token.PROPGET, self._next_statement(token_stream, stack_top - 1)) + desc = (Token.PROPGET, self._statement(token_stream, stack_top - 1)) elif token_id in (Token.ID, Token.STR, Token.INT, Token.FLOAT): property_name = token_value @@ -677,7 +676,7 @@ class JSInterpreter(object): desc = (Token.PROPVALUE, self._assign_expression(token_stream, stack_top - 1)) elif token_stream.ended: - raise ExtractorError('Unmatched parenteses at %d' % open_pos) + raise ExtractorError('Unmatched parentheses at %d' % open_pos) else: raise ExtractorError('Property assignment is expected at %d' % token_pos)