diff --git a/test/test_jsinterp_parser.py b/test/test_jsinterp_parser.py index 3a2aa7874..b1d6d2176 100644 --- a/test/test_jsinterp_parser.py +++ b/test/test_jsinterp_parser.py @@ -731,20 +731,6 @@ class TestJSInterpreterParser(unittest.TestCase): ] self.assertEqual(list(jsi.statements()), ast) - @unittest.skip('Test not yet implemented: missing code and ast') - def test_object(self): - # TODO object literal test - jsi = JSInterpreter('') - ast = [] - self.assertEqual(list(jsi.statements()), ast) - - @unittest.skip('Test not yet implemented: missing code and ast') - def test_with(self): - # TODO with statement test - jsi = JSInterpreter('') - ast = [] - self.assertEqual(list(jsi.statements()), ast) - @unittest.skip('Test not yet implemented: missing ast') def test_switch(self): # TODO switch statement test @@ -769,6 +755,28 @@ class TestJSInterpreterParser(unittest.TestCase): ast = [] self.assertEqual(list(jsi.statements()), ast) + @unittest.skip('Test not yet implemented: missing code and ast') + def test_do(self): + # TODO do statement test + jsi = JSInterpreter('') + ast = [] + self.assertEqual(list(jsi.statements()), ast) + + @unittest.skip('Test not yet implemented: missing code and ast') + def test_while(self): + # TODO while statement test + jsi = JSInterpreter('') + ast = [] + self.assertEqual(list(jsi.statements()), ast) + + @unittest.skip('Test not yet implemented: missing code and ast') + def test_label(self): + # TODO label (break, continue) statement test + # might be combined with another + jsi = JSInterpreter('') + ast = [] + self.assertEqual(list(jsi.statements()), ast) + @unittest.skip('Test not yet implemented: missing code and ast') def test_funct_expr(self): # TODO function expression test @@ -777,6 +785,13 @@ class TestJSInterpreterParser(unittest.TestCase): ast = [] self.assertEqual(list(jsi.statements()), ast) + @unittest.skip('Test not yet implemented: missing code and ast') + def test_object(self): + # TODO object literal test + jsi = JSInterpreter('') + ast = [] + self.assertEqual(list(jsi.statements()), ast) + @unittest.skip('Test not yet implemented: missing code and ast') def test_try(self): # TODO try statement test @@ -793,9 +808,8 @@ class TestJSInterpreterParser(unittest.TestCase): self.assertEqual(list(jsi.statements()), ast) @unittest.skip('Test not yet implemented: missing code and ast') - def test_label(self): - # TODO label (break, continue) statement test - # might be combined with another + def test_with(self): + # TODO with statement test jsi = JSInterpreter('') ast = [] self.assertEqual(list(jsi.statements()), ast) diff --git a/youtube_dl/jsinterp/jsgrammar.py b/youtube_dl/jsinterp/jsgrammar.py index 60bdedcab..87cba7869 100644 --- a/youtube_dl/jsinterp/jsgrammar.py +++ b/youtube_dl/jsinterp/jsgrammar.py @@ -17,7 +17,7 @@ _token_keys = ('COPEN', 'CCLOSE', 'POPEN', 'PCLOSE', 'SOPEN', 'SCLOSE', 'BLOCK', 'VAR', 'EXPR', 'IF', 'ITER', 'CONTINUE', 'BREAK', 'RETURN', 'WITH', 'LABEL', 'SWITCH', 'THROW', 'TRY', 'DEBUG', 'ASSIGN', 'MEMBER', 'FIELD', 'ELEM', 'CALL', 'ARRAY', 'COND', 'OPEXPR', - 'PROPGET', 'PROPSET', 'PROPVALUE' + 'PROPGET', 'PROPSET', 'PROPVALUE', 'RSV') Token = namedtuple('Token', _token_keys)._make(_token_keys) diff --git a/youtube_dl/jsinterp/jsinterp.py b/youtube_dl/jsinterp/jsinterp.py index b3b700783..4921e1732 100644 --- a/youtube_dl/jsinterp/jsinterp.py +++ b/youtube_dl/jsinterp/jsinterp.py @@ -64,12 +64,12 @@ class JSInterpreter(object): statement = None token_id, token_value, token_pos = token_stream.peek() - if token_id in (Token.CCLOSE, Token.END): + if token_id is Token.END: # empty statement goes straight here token_stream.pop() return statement - if token_id is Token.ID and token_value == 'function': + elif token_id is Token.ID and token_value == 'function': # FIXME allowed only in program and function body # main, function expr, object literal (set, get), function declaration statement = self._function(token_stream, stack_top - 1) @@ -139,9 +139,55 @@ class JSInterpreter(object): false_expr = self._next_statement(token_stream, stack_top - 1) statement = (Token.IF, cond_expr, true_expr, false_expr) - elif token_value in ('for', 'do', 'while'): - # ASAP parse iter statement - raise ExtractorError('Loops is not yet supported at %d' % token_pos) + elif token_value is 'for': + # ASAP parse for loop statement + + + + raise ExtractorError('For loop is not yet supported at %d' % token_pos) + + elif token_value is 'do': + token_stream.pop() + body = self._next_statement(token_stream, stack_top) + + token_id, token_value, token_pos = token_stream.pop() + if token_id is not Token.ID and token_value != 'while': + raise ExtractorError('''Expected 'while' at %d''' % token_pos) + + token_id, token_value, token_pos = token_stream.pop() + if token_id is not Token.POPEN: + raise ExtractorError('''Expected '(' at %d''' % token_pos) + + expr = self._expression(token_stream, stack_top - 1) + + token_id, token_value, token_pos = token_stream.pop() + if token_id is not Token.PCLOSE: + raise ExtractorError('''Expected ')' at %d''' % token_pos) + + statement = (Token.DO, expr, body) + + peek_id, peek_value, peek_pos = token_stream.peek() + if peek_id is not Token.END: + # FIXME automatic end insertion + raise ExtractorError('Unexpected sequence %s at %d' % (peek_value, peek_pos)) + else: + token_stream.pop() + + elif token_value is 'while': + token_stream.pop() + + token_id, token_value, token_pos = token_stream.pop() + if token_id is not Token.POPEN: + raise ExtractorError('''Expected '(' at %d''' % token_pos) + + expr = self._expression(token_stream, stack_top - 1) + + token_id, token_value, token_pos = token_stream.pop() + if token_id is not Token.PCLOSE: + raise ExtractorError('''Expected ')' at %d''' % token_pos) + + body = self._next_statement(token_stream, stack_top) + statement = (Token.DO, expr, body) elif token_value in ('break', 'continue'): token_stream.pop() @@ -293,6 +339,7 @@ class JSInterpreter(object): else: token_stream.pop() else: + # XXX possible refactoring (this is the only branch not poping) token_id, token_value, token_pos = token_stream.peek(2) if token_id is Token.COLON: token_id, label_name, token_pos = token_stream.pop(2) @@ -304,8 +351,8 @@ class JSInterpreter(object): expr_list = [] has_another = True while has_another: - # ASAP check specs is it just the first AssignmentExpression can't be FunctionExpression? peek_id, peek_value, peek_pos = token_stream.peek() + # XXX this check can be abandoned, it's only here to mirror the grammar if not (peek_id is Token.COPEN and peek_id is Token.ID and peek_value == 'function'): expr_list.append(self._assign_expression(token_stream, stack_top - 1)) peek_id, peek_value, peek_pos = token_stream.peek() @@ -435,7 +482,7 @@ class JSInterpreter(object): elif peek_id is Token.SOPEN: return self._array_literal(token_stream, stack_top - 1) # object - elif peek_id is Token.SCLOSE: + elif peek_id is Token.COPEN: token_stream.pop() open_pos = peek_pos property_list = [] @@ -444,7 +491,7 @@ class JSInterpreter(object): if token_id.CCLOSE: token_stream.pop() break - # XXX consider refactoring + # ASAP refactor elif token_value == 'get': token_id, token_value, token_pos = token_stream.pop() if token_id not in (Token.ID, Token.STR, Token.INT, Token.FLOAT): @@ -467,8 +514,10 @@ class JSInterpreter(object): token_id, token_value, token_pos = token_stream.pop() if token_id is not Token.POPEN: raise ExtractorError('''Expected '(' at %d''' % token_pos) + token_stream.chk_id() token_id, arg, token_pos = token_stream.pop() + token_id, token_value, token_pos = token_stream.pop() if token_id is not Token.PCLOSE: raise ExtractorError('''Expected ')' at %d''' % token_pos) @@ -501,10 +550,8 @@ class JSInterpreter(object): raise ExtractorError('Unbalanced parentheses at %d' % open_pos) token_stream.pop() return expr - # empty (probably) else: - # XXX check specs what to do here - return None + raise ExtractorError('Syntax error at %d' % peek_pos) def _function(self, token_stream, stack_top, is_expr=False): token_stream.pop()