From 61fe8d219f92eaa67567700063582963e6cf5fd4 Mon Sep 17 00:00:00 2001 From: sulyi Date: Sun, 22 Jan 2017 14:26:45 +0100 Subject: [PATCH] [jsbuilt-ins] premerge --- test/js2tests/__init__.py | 65 + test/{jstests => js2tests}/array_access.py | 4 +- test/{jstests => js2tests}/assignments.py | 4 +- test/{jstests => js2tests}/basic.py | 2 +- test/{jstests => js2tests}/branch.py | 4 +- test/{jstests => js2tests}/calc.py | 4 +- test/{jstests => js2tests}/call.py | 4 +- test/{jstests => js2tests}/comments.py | 4 +- test/{jstests => js2tests}/debug.py | 2 +- test/{jstests => js2tests}/do_loop.py | 4 +- test/{jstests => js2tests}/empty_return.py | 2 +- test/{jstests => js2tests}/for_empty.py | 4 +- test/{jstests => js2tests}/for_in.py | 4 +- test/{jstests => js2tests}/for_loop.py | 4 +- test/{jstests => js2tests}/func_expr.py | 4 +- test/{jstests => js2tests}/getfield.py | 2 +- test/{jstests => js2tests}/label.py | 2 +- test/{jstests => js2tests}/morespace.py | 4 +- test/{jstests => js2tests}/object_literal.py | 4 +- test/{jstests => js2tests}/operators.py | 4 +- test/{jstests => js2tests}/parens.py | 4 +- test/{jstests => js2tests}/precedence.py | 4 +- test/{jstests => js2tests}/strange_chars.py | 4 +- test/{jstests => js2tests}/stringprototype.py | 0 test/{jstests => js2tests}/switch.py | 4 +- test/{jstests => js2tests}/try_statement.py | 2 +- test/{jstests => js2tests}/unary.py | 0 test/{jstests => js2tests}/unshift.py | 0 test/{jstests => js2tests}/while_loop.py | 4 +- test/{jstests => js2tests}/with_statement.py | 2 +- test/jstests/__init__.py | 48 - test/test_jsinterp.py | 4 +- test/test_jsinterp_parse.py | 4 +- youtube_dl/jsinterp/jsinterp.py | 1135 ----------------- .../{jsinterp => jsinterp2}/__init__.py | 0 .../jsbuilt_ins/__init__.py | 0 .../jsbuilt_ins/base.py | 0 .../jsbuilt_ins/internals.py | 0 .../jsbuilt_ins/jsarray.py | 0 .../jsbuilt_ins/jsboolean.py | 0 .../jsbuilt_ins/jsfunction.py | 0 .../jsbuilt_ins/jsnumber.py | 0 .../jsbuilt_ins/jsobject.py | 0 .../jsbuilt_ins/jsstring.py | 0 .../jsbuilt_ins/utils.py | 0 .../{jsinterp => jsinterp2}/jsgrammar.py | 0 youtube_dl/jsinterp2/jsinterp.py | 316 +++++ youtube_dl/jsinterp2/jsparser.py | 835 ++++++++++++ youtube_dl/{jsinterp => jsinterp2}/tstream.py | 1 + 49 files changed, 1266 insertions(+), 1232 deletions(-) create mode 100644 test/js2tests/__init__.py rename test/{jstests => js2tests}/array_access.py (96%) rename test/{jstests => js2tests}/assignments.py (91%) rename test/{jstests => js2tests}/basic.py (91%) rename test/{jstests => js2tests}/branch.py (91%) rename test/{jstests => js2tests}/calc.py (87%) rename test/{jstests => js2tests}/call.py (97%) rename test/{jstests => js2tests}/comments.py (95%) rename test/{jstests => js2tests}/debug.py (83%) rename test/{jstests => js2tests}/do_loop.py (92%) rename test/{jstests => js2tests}/empty_return.py (89%) rename test/{jstests => js2tests}/for_empty.py (92%) rename test/{jstests => js2tests}/for_in.py (92%) rename test/{jstests => js2tests}/for_loop.py (92%) rename test/{jstests => js2tests}/func_expr.py (95%) rename test/{jstests => js2tests}/getfield.py (91%) rename test/{jstests => js2tests}/label.py (82%) rename test/{jstests => js2tests}/morespace.py (88%) rename test/{jstests => js2tests}/object_literal.py (95%) rename test/{jstests => js2tests}/operators.py (92%) rename test/{jstests => js2tests}/parens.py (97%) rename test/{jstests => js2tests}/precedence.py (96%) rename test/{jstests => js2tests}/strange_chars.py (89%) rename test/{jstests => js2tests}/stringprototype.py (100%) rename test/{jstests => js2tests}/switch.py (95%) rename test/{jstests => js2tests}/try_statement.py (82%) rename test/{jstests => js2tests}/unary.py (100%) rename test/{jstests => js2tests}/unshift.py (100%) rename test/{jstests => js2tests}/while_loop.py (92%) rename test/{jstests => js2tests}/with_statement.py (82%) delete mode 100644 test/jstests/__init__.py delete mode 100644 youtube_dl/jsinterp/jsinterp.py rename youtube_dl/{jsinterp => jsinterp2}/__init__.py (100%) rename youtube_dl/{jsinterp => jsinterp2}/jsbuilt_ins/__init__.py (100%) rename youtube_dl/{jsinterp => jsinterp2}/jsbuilt_ins/base.py (100%) rename youtube_dl/{jsinterp => jsinterp2}/jsbuilt_ins/internals.py (100%) rename youtube_dl/{jsinterp => jsinterp2}/jsbuilt_ins/jsarray.py (100%) rename youtube_dl/{jsinterp => jsinterp2}/jsbuilt_ins/jsboolean.py (100%) rename youtube_dl/{jsinterp => jsinterp2}/jsbuilt_ins/jsfunction.py (100%) rename youtube_dl/{jsinterp => jsinterp2}/jsbuilt_ins/jsnumber.py (100%) rename youtube_dl/{jsinterp => jsinterp2}/jsbuilt_ins/jsobject.py (100%) rename youtube_dl/{jsinterp => jsinterp2}/jsbuilt_ins/jsstring.py (100%) rename youtube_dl/{jsinterp => jsinterp2}/jsbuilt_ins/utils.py (100%) rename youtube_dl/{jsinterp => jsinterp2}/jsgrammar.py (100%) create mode 100644 youtube_dl/jsinterp2/jsinterp.py create mode 100644 youtube_dl/jsinterp2/jsparser.py rename youtube_dl/{jsinterp => jsinterp2}/tstream.py (99%) diff --git a/test/js2tests/__init__.py b/test/js2tests/__init__.py new file mode 100644 index 000000000..ecca434de --- /dev/null +++ b/test/js2tests/__init__.py @@ -0,0 +1,65 @@ +# """ +# This package contains templates for `test_jsinterp` and `test_interp_parse` to create test methods. +# These modules will create a test method for each module in this package. A test method consist of one or more subtest. +# Each subtest initializes an instance of the tested class and runs one or more assertion. +# +# Any module should have a `list` of `dict` named ``tests`` and optionally a `dict` named ``skip``. +# +# Each `dict` in ``tests`` may have the following keys: +# +# code: If missing subtest is skipped, Otherwise it's value is used as code to initialize the tested class. +# globals: Optional. Used only by `test_jsinterp`. If set used as argument `variables` initializing `JSInterperter`. +# asserts: Used only by `test_jsinterp`. If this is missing subtest is skipped, Should be a list of `dict`, each used +# as an assertion for the initialized `JSInterpreter`. Each `dict` may have the following keys: +# value: If missing assertion is skipped. Otherwise it's value is used as expected value in +# an `assertEqual` call. +# call: Optional. If set used as arguments of a `call_function` call of the initialized `JSInterpreter` +# and the actual value of the created `assertEqual` call will be the return value of it. +# Otherwise the actual value will be the return value of the `run` call. +# ast: Used only by `test_interp_parse`. If missing subtest is skipped, Otherwise it's value is used as +# expected value in an `assertEqual` call. The actual value will be the return value of the `parse` call +# converted to `list`. Both on expected anc actual value `traverse` is called first to flatten and handle `zip` +# objects. +# +# In the `dict` named ``skip`` is optional and may have the following keys: +# interpret +# parse +# Both used as the argument of `skipTest` decorator of the created test method in `test_jsinterp` +# and `test_jsinterp_parse` respectably. Unless they're value is `True`, that case the test method is skipped entirely, +# or `False`, which is the default value. +# +# Example: +# This is not a functional template, rather a skeleton: +# +# skip = {'interpret': 'Test not yet implemented', +# 'parse': 'Test not yet implemented'} +# +# tests = [ +# { +# 'code': '', +# 'globals': {}, +# 'asserts': [{'value': 0, 'call': ('f',)}], +# 'ast': [] +# } +# ] +# """ + +from __future__ import unicode_literals + + +def gettestcases(): + import os + + modules = [module[:-3] for module in os.listdir(os.path.dirname(__file__)) + if module != '__init__.py' and module[-3:] == '.py'] + me = __import__(__name__, globals(), locals(), modules) + + for module_name in modules: + module = getattr(me, module_name) + if hasattr(module, 'tests'): + case = { + 'name': module.__name__[len(__name__) + 1:], + 'subtests': module.tests, + 'skip': getattr(module, 'skip', {}) + } + yield case diff --git a/test/jstests/array_access.py b/test/js2tests/array_access.py similarity index 96% rename from test/jstests/array_access.py rename to test/js2tests/array_access.py index 12eae6fed..72d089c15 100644 --- a/test/jstests/array_access.py +++ b/test/js2tests/array_access.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _ASSIGN_OPERATORS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _ASSIGN_OPERATORS tests = [ {'code': 'var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;', diff --git a/test/jstests/assignments.py b/test/js2tests/assignments.py similarity index 91% rename from test/jstests/assignments.py rename to test/js2tests/assignments.py index 3565b315f..1705f9e02 100644 --- a/test/jstests/assignments.py +++ b/test/js2tests/assignments.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _OPERATORS, _ASSIGN_OPERATORS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _OPERATORS, _ASSIGN_OPERATORS tests = [ { diff --git a/test/jstests/basic.py b/test/js2tests/basic.py similarity index 91% rename from test/jstests/basic.py rename to test/js2tests/basic.py index 3f99528c4..c6790109b 100644 --- a/test/jstests/basic.py +++ b/test/js2tests/basic.py @@ -1,4 +1,4 @@ -from youtube_dl.jsinterp.jsgrammar import Token +from youtube_dl.jsinterp2.jsgrammar import Token tests = [ { diff --git a/test/jstests/branch.py b/test/js2tests/branch.py similarity index 91% rename from test/jstests/branch.py rename to test/js2tests/branch.py index bd1d38da6..6398f7d89 100644 --- a/test/jstests/branch.py +++ b/test/js2tests/branch.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _RELATIONS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _RELATIONS skip = {'interpret': 'Interpreting if statement not yet implemented'} diff --git a/test/jstests/calc.py b/test/js2tests/calc.py similarity index 87% rename from test/jstests/calc.py rename to test/js2tests/calc.py index 6e9fd8774..f987973eb 100644 --- a/test/jstests/calc.py +++ b/test/js2tests/calc.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _OPERATORS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _OPERATORS tests = [ {'code': 'return 2*a+1;', diff --git a/test/jstests/call.py b/test/js2tests/call.py similarity index 97% rename from test/jstests/call.py rename to test/js2tests/call.py index ac0fdbb94..2c3d55c95 100644 --- a/test/jstests/call.py +++ b/test/js2tests/call.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _OPERATORS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _OPERATORS tests = [ { diff --git a/test/jstests/comments.py b/test/js2tests/comments.py similarity index 95% rename from test/jstests/comments.py rename to test/js2tests/comments.py index 0f297bcde..729e769ac 100644 --- a/test/jstests/comments.py +++ b/test/js2tests/comments.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _OPERATORS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _OPERATORS tests = [ { diff --git a/test/jstests/debug.py b/test/js2tests/debug.py similarity index 83% rename from test/jstests/debug.py rename to test/js2tests/debug.py index a998cb68e..aa81f8fd9 100644 --- a/test/jstests/debug.py +++ b/test/js2tests/debug.py @@ -1,4 +1,4 @@ -from youtube_dl.jsinterp.jsgrammar import Token +from youtube_dl.jsinterp2.jsgrammar import Token skip = {'interpret': 'Interpreting debugger statement not yet implemented', 'parse': 'Test not yet implemented: missing code and ast'} diff --git a/test/jstests/do_loop.py b/test/js2tests/do_loop.py similarity index 92% rename from test/jstests/do_loop.py rename to test/js2tests/do_loop.py index 6d419b0ca..04d7e0d01 100644 --- a/test/jstests/do_loop.py +++ b/test/js2tests/do_loop.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _ASSIGN_OPERATORS, _UNARY_OPERATORS, _RELATIONS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _ASSIGN_OPERATORS, _UNARY_OPERATORS, _RELATIONS skip = {'interpret': 'Interpreting do loop not yet implemented'} diff --git a/test/jstests/empty_return.py b/test/js2tests/empty_return.py similarity index 89% rename from test/jstests/empty_return.py rename to test/js2tests/empty_return.py index 283073fbe..643c38e66 100644 --- a/test/jstests/empty_return.py +++ b/test/js2tests/empty_return.py @@ -1,4 +1,4 @@ -from youtube_dl.jsinterp.jsgrammar import Token +from youtube_dl.jsinterp2.jsgrammar import Token tests = [ {'code': 'return; y()', diff --git a/test/jstests/for_empty.py b/test/js2tests/for_empty.py similarity index 92% rename from test/jstests/for_empty.py rename to test/js2tests/for_empty.py index 6a99e5b3f..ba90184fa 100644 --- a/test/jstests/for_empty.py +++ b/test/js2tests/for_empty.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _ASSIGN_OPERATORS, _UNARY_OPERATORS, _RELATIONS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _ASSIGN_OPERATORS, _UNARY_OPERATORS, _RELATIONS skip = {'interpret': 'Interpreting for empty loop not yet implemented'} diff --git a/test/jstests/for_in.py b/test/js2tests/for_in.py similarity index 92% rename from test/jstests/for_in.py rename to test/js2tests/for_in.py index af385f007..b5c111a0e 100644 --- a/test/jstests/for_in.py +++ b/test/js2tests/for_in.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _ASSIGN_OPERATORS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _ASSIGN_OPERATORS skip = {'interpret': 'Interpreting for in loop not yet implemented'} diff --git a/test/jstests/for_loop.py b/test/js2tests/for_loop.py similarity index 92% rename from test/jstests/for_loop.py rename to test/js2tests/for_loop.py index f45958fe5..60cb03600 100644 --- a/test/jstests/for_loop.py +++ b/test/js2tests/for_loop.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _ASSIGN_OPERATORS, _UNARY_OPERATORS, _RELATIONS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _ASSIGN_OPERATORS, _UNARY_OPERATORS, _RELATIONS skip = {'interpret': 'Interpreting for loop not yet implemented'} diff --git a/test/jstests/func_expr.py b/test/js2tests/func_expr.py similarity index 95% rename from test/jstests/func_expr.py rename to test/js2tests/func_expr.py index da43137b7..68e6fa6eb 100644 --- a/test/jstests/func_expr.py +++ b/test/js2tests/func_expr.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _ASSIGN_OPERATORS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _ASSIGN_OPERATORS skip = {'interpret': 'Interpreting function expression not yet implemented'} diff --git a/test/jstests/getfield.py b/test/js2tests/getfield.py similarity index 91% rename from test/jstests/getfield.py rename to test/js2tests/getfield.py index 39dc1d5b5..3b63ce415 100644 --- a/test/jstests/getfield.py +++ b/test/js2tests/getfield.py @@ -1,4 +1,4 @@ -from youtube_dl.jsinterp.jsgrammar import Token +from youtube_dl.jsinterp2.jsgrammar import Token tests = [ { diff --git a/test/jstests/label.py b/test/js2tests/label.py similarity index 82% rename from test/jstests/label.py rename to test/js2tests/label.py index 91170bdb9..441abbba1 100644 --- a/test/jstests/label.py +++ b/test/js2tests/label.py @@ -1,4 +1,4 @@ -from youtube_dl.jsinterp.jsgrammar import Token +from youtube_dl.jsinterp2.jsgrammar import Token skip = {'interpret': 'Interpreting label not yet implemented', 'parse': 'Test not yet implemented: missing code and ast'} diff --git a/test/jstests/morespace.py b/test/js2tests/morespace.py similarity index 88% rename from test/jstests/morespace.py rename to test/js2tests/morespace.py index 327e46192..2a18235b8 100644 --- a/test/jstests/morespace.py +++ b/test/js2tests/morespace.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _ASSIGN_OPERATORS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _ASSIGN_OPERATORS tests = [ { diff --git a/test/jstests/object_literal.py b/test/js2tests/object_literal.py similarity index 95% rename from test/jstests/object_literal.py rename to test/js2tests/object_literal.py index 683128352..ce651eb32 100644 --- a/test/jstests/object_literal.py +++ b/test/js2tests/object_literal.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _ASSIGN_OPERATORS, _OPERATORS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _ASSIGN_OPERATORS, _OPERATORS skip = {'interpret': 'Interpreting object literals not yet implemented'} diff --git a/test/jstests/operators.py b/test/js2tests/operators.py similarity index 92% rename from test/jstests/operators.py rename to test/js2tests/operators.py index c95a8baca..757cef523 100644 --- a/test/jstests/operators.py +++ b/test/js2tests/operators.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _OPERATORS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _OPERATORS tests = [ { diff --git a/test/jstests/parens.py b/test/js2tests/parens.py similarity index 97% rename from test/jstests/parens.py rename to test/js2tests/parens.py index 52eef903f..fe433a09b 100644 --- a/test/jstests/parens.py +++ b/test/js2tests/parens.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _OPERATORS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _OPERATORS tests = [ { diff --git a/test/jstests/precedence.py b/test/js2tests/precedence.py similarity index 96% rename from test/jstests/precedence.py rename to test/js2tests/precedence.py index e8b042e70..47a80fd28 100644 --- a/test/jstests/precedence.py +++ b/test/js2tests/precedence.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _ASSIGN_OPERATORS, _OPERATORS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _ASSIGN_OPERATORS, _OPERATORS skip = {'interpret': 'Interpreting built-in fields not yet implemented'} diff --git a/test/jstests/strange_chars.py b/test/js2tests/strange_chars.py similarity index 89% rename from test/jstests/strange_chars.py rename to test/js2tests/strange_chars.py index 96355eaed..3d3c9b1ad 100644 --- a/test/jstests/strange_chars.py +++ b/test/js2tests/strange_chars.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _OPERATORS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _OPERATORS tests = [ { diff --git a/test/jstests/stringprototype.py b/test/js2tests/stringprototype.py similarity index 100% rename from test/jstests/stringprototype.py rename to test/js2tests/stringprototype.py diff --git a/test/jstests/switch.py b/test/js2tests/switch.py similarity index 95% rename from test/jstests/switch.py rename to test/js2tests/switch.py index 22ac2f590..66fed25a9 100644 --- a/test/jstests/switch.py +++ b/test/js2tests/switch.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _ASSIGN_OPERATORS, _UNARY_OPERATORS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _ASSIGN_OPERATORS, _UNARY_OPERATORS skip = {'interpret': 'Interpreting switch statement not yet implemented'} diff --git a/test/jstests/try_statement.py b/test/js2tests/try_statement.py similarity index 82% rename from test/jstests/try_statement.py rename to test/js2tests/try_statement.py index 961ab9ff3..8e93ee398 100644 --- a/test/jstests/try_statement.py +++ b/test/js2tests/try_statement.py @@ -1,4 +1,4 @@ -from youtube_dl.jsinterp.jsgrammar import Token +from youtube_dl.jsinterp2.jsgrammar import Token skip = {'interpret': 'Interpreting try statement not yet implemented', 'parse': 'Test not yet implemented: missing code and ast'} diff --git a/test/jstests/unary.py b/test/js2tests/unary.py similarity index 100% rename from test/jstests/unary.py rename to test/js2tests/unary.py diff --git a/test/jstests/unshift.py b/test/js2tests/unshift.py similarity index 100% rename from test/jstests/unshift.py rename to test/js2tests/unshift.py diff --git a/test/jstests/while_loop.py b/test/js2tests/while_loop.py similarity index 92% rename from test/jstests/while_loop.py rename to test/js2tests/while_loop.py index 5a4bc39ee..39078a11b 100644 --- a/test/jstests/while_loop.py +++ b/test/js2tests/while_loop.py @@ -1,5 +1,5 @@ -from youtube_dl.jsinterp.jsgrammar import Token -from youtube_dl.jsinterp.tstream import _ASSIGN_OPERATORS, _UNARY_OPERATORS, _RELATIONS +from youtube_dl.jsinterp2.jsgrammar import Token +from youtube_dl.jsinterp2.tstream import _ASSIGN_OPERATORS, _UNARY_OPERATORS, _RELATIONS skip = {'interpret': 'Interpreting while loop not yet implemented'} diff --git a/test/jstests/with_statement.py b/test/js2tests/with_statement.py similarity index 82% rename from test/jstests/with_statement.py rename to test/js2tests/with_statement.py index c84aec1c5..84ed25069 100644 --- a/test/jstests/with_statement.py +++ b/test/js2tests/with_statement.py @@ -1,4 +1,4 @@ -from youtube_dl.jsinterp.jsgrammar import Token +from youtube_dl.jsinterp2.jsgrammar import Token skip = {'interpret': 'Interpreting with statement not yet implemented', 'parse': 'Test not yet implemented: missing code and ast'} diff --git a/test/jstests/__init__.py b/test/jstests/__init__.py deleted file mode 100644 index 5c670287b..000000000 --- a/test/jstests/__init__.py +++ /dev/null @@ -1,48 +0,0 @@ -from . import ( - basic, - calc, - empty_return, - morespace, - strange_chars, - operators, - unary, - array_access, - parens, - assignments, - comments, - precedence, - call, - getfield, - branch, - switch, - for_loop, - for_empty, - for_in, - do_loop, - while_loop, - label, - func_expr, - object_literal, - try_statement, - with_statement, - debug, - unshift, - stringprototype -) - - -modules = [basic, calc, empty_return, morespace, strange_chars, operators, unary, array_access, parens, assignments, - comments, precedence, call, getfield, branch, switch, for_loop, for_empty, for_in, do_loop, while_loop, - label, func_expr, object_literal, try_statement, with_statement, debug, unshift, stringprototype] - - -def gettestcases(): - for module in modules: - if hasattr(module, 'tests'): - case = {'name': module.__name__[len(__name__) + 1:], 'subtests': [], 'skip': {}} - for test in getattr(module, 'tests'): - if 'code' in test: - case['subtests'].append(test) - if hasattr(module, 'skip'): - case['skip'] = getattr(module, 'skip') - yield case diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 36b6b7cb0..d818c51f4 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -13,8 +13,8 @@ else: import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.jsinterp import JSInterpreter -from test.jstests import gettestcases +from youtube_dl.jsinterp2 import JSInterpreter +from test.js2tests import gettestcases defs = gettestcases() # set level to logging.DEBUG to see messages about missing assertions diff --git a/test/test_jsinterp_parse.py b/test/test_jsinterp_parse.py index 9aaf3f44b..3cb827b8c 100644 --- a/test/test_jsinterp_parse.py +++ b/test/test_jsinterp_parse.py @@ -14,8 +14,8 @@ else: import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.jsinterp import JSInterpreter -from .jstests import gettestcases +from youtube_dl.jsinterp2 import JSInterpreter +from .js2tests import gettestcases def traverse(node, tree_types=(list, tuple)): diff --git a/youtube_dl/jsinterp/jsinterp.py b/youtube_dl/jsinterp/jsinterp.py deleted file mode 100644 index fdcb7bf65..000000000 --- a/youtube_dl/jsinterp/jsinterp.py +++ /dev/null @@ -1,1135 +0,0 @@ -from __future__ import unicode_literals - -import re - -from . import jsbuilt_ins -from .jsgrammar import Token, token_keys -from .tstream import TokenStream, convert_to_unary -from ..compat import compat_str -from ..utils import ExtractorError - - -class Context(object): - def __init__(self, variables=None, ended=False): - self.ended = ended - self.no_in = True - self.local_vars = {} - if variables is not None: - for k, v in dict(variables).items(): - # XXX validate identifiers - self.local_vars[k] = Reference(v, (self.local_vars, k)) - - -class Reference(object): - def __init__(self, value, parent=None): - self._value = value - self._parent = parent - - def getvalue(self, deep=False): - value = self._value - if deep: - if isinstance(self._value, (list, tuple)): - # TODO test nested arrays - value = [elem.getvalue() for elem in self._value] - elif isinstance(self._value, dict): - value = {} - for key, prop in self._value.items(): - value[key] = prop.getvalue() - - return value - - def putvalue(self, value): - if self._parent is None: - raise ExtractorError('Trying to set a read-only reference') - parent, key = self._parent - if not hasattr(parent, '__setitem__'): - raise ExtractorError('Unknown reference') - parent.__setitem__(key, Reference(value, (parent, key))) - self._value = value - return value - - def __repr__(self): - if self._parent is not None: - parent, key = self._parent - return '' % ( - str(self._value), parent.__class__.__name__, id(parent), key) - return '' % (self._value, None) - - -class JSInterpreter(object): - # TODO support json - - def __init__(self, code, variables=None): - self.code = code - self.global_vars = {} - if variables is not None: - for k, v in dict(variables).items(): - # XXX validate identifiers - self.global_vars[k] = self.create_reference(v, (self.global_vars, k)) - self._context = Context() - self._context_stack = [] - - @property - def this(self): - return self._context.local_vars - - def parse(self, code=None, pos=0, stack_size=100): - if code is None: - code = self.code - ts = TokenStream(code, pos) - while not ts.ended: - yield self._source_element(ts, stack_size) - raise StopIteration - - def create_reference(self, value, parent_key): - if isinstance(value, dict): - o = {} - for k, v in value.items(): - o[k] = self.create_reference(v, (o, k)) - elif isinstance(value, (list, tuple, set)): - o = [] - for k, v in enumerate(value): - o[k] = self.create_reference(v, (o, k)) - elif isinstance(value, (int, float, compat_str, bool, re._pattern_type)) or value is None: - o = value - else: - raise ExtractorError('Unsupported type, %s in variables' % type(value)) - - return Reference(o, parent_key) - - def _source_element(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - token_id, token_value, token_pos = token_stream.peek() - if token_id is Token.ID and token_value == 'function': - source_element = self._function(token_stream, stack_top - 1) - else: - source_element = self._statement(token_stream, stack_top - 1) - - return source_element - - def _statement(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - statement = None - token_id, token_value, token_pos = token_stream.peek() - if token_id is Token.END: - # empty statement goes straight here - token_stream.pop() - return statement - - # block - elif token_id is Token.COPEN: - # XXX refactor will deprecate some _statement calls - open_pos = token_pos - token_stream.pop() - block = [] - while True: - token_id, token_value, token_pos = token_stream.peek() - if token_id is Token.CCLOSE: - token_stream.pop() - break - elif token_id is Token.END and token_stream.ended: - raise ExtractorError('Unbalanced parentheses at %d' % open_pos) - block.append(self._statement(token_stream, stack_top - 1)) - - statement = (Token.BLOCK, block) - - elif token_id is Token.ID: - if token_value == 'var': - token_stream.pop() - variables = [] - init = [] - has_another = True - while has_another: - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.ID: - raise ExtractorError('Missing variable name at %d' % token_pos) - token_stream.chk_id(last=True) - variables.append(token_value) - - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.AOP: - token_stream.pop() - init.append(self._assign_expression(token_stream, stack_top - 1)) - peek_id, peek_value, peek_pos = token_stream.peek() - else: - init.append(jsbuilt_ins.undefined) - - if peek_id is Token.END: - if self._context.no_in: - token_stream.pop() - has_another = False - elif peek_id is Token.COMMA: - pass - else: - # FIXME automatic end insertion - # - token_id is Token.CCLOSE - # - check line terminator - # - restricted token - raise ExtractorError('Unexpected sequence at %d' % peek_pos) - statement = (Token.VAR, zip(variables, init)) - - elif token_value == 'if': - statement = self._if_statement(token_stream, stack_top - 1) - - elif token_value == 'for': - statement = self._for_loop(token_stream, stack_top - 1) - - elif token_value == 'do': - statement = self._do_loop(token_stream, stack_top - 1) - - elif token_value == 'while': - statement = self._while_loop(token_stream, stack_top - 1) - - elif token_value in ('break', 'continue'): - token_stream.pop() - token = {'break': Token.BREAK, 'continue': Token.CONTINUE}[token_value] - peek_id, peek_value, peek_pos = token_stream.peek() - # XXX no line break here - label_name = None - if peek_id is not Token.END: - token_stream.chk_id() - label_name = peek_value - token_stream.pop() - statement = (token, label_name) - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.END: - token_stream.pop() - else: - # FIXME automatic end insertion - raise ExtractorError('Unexpected sequence at %d' % peek_pos) - - elif token_value == 'return': - statement = self._return_statement(token_stream, stack_top - 1) - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.END: - token_stream.pop() - else: - # FIXME automatic end insertion - raise ExtractorError('Unexpected sequence at %d' % peek_pos) - - elif token_value == 'with': - statement = self._with_statement(token_stream, stack_top - 1) - - elif token_value == 'switch': - statement = self._switch_statement(token_stream, stack_top - 1) - - elif token_value == 'throw': - token_stream.pop() - # XXX no line break here - expr = self._expression(token_stream, stack_top - 1) - statement = (Token.RETURN, expr) - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.END: - token_stream.pop() - else: - # FIXME automatic end insertion - raise ExtractorError('Unexpected sequence at %d' % peek_pos) - - elif token_value == 'try': - statement = self._try_statement(token_stream, stack_top - 1) - - elif token_value == 'debugger': - token_stream.pop() - statement = (Token.DEBUG) - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.END: - token_stream.pop() - else: - # FIXME automatic end insertion - raise ExtractorError('Unexpected sequence at %d' % peek_pos) - else: # label - # XXX possible refactoring (this is the only branch not poping) - token_id, token_value, token_pos = token_stream.peek(2) - if token_id is Token.COLON: - token_id, label_name, token_pos = token_stream.pop(2) - token_stream.chk_id(last=True) - statement = (Token.LABEL, label_name, self._statement(token_stream, stack_top - 1)) - - # expr - if statement is None: - statement = self._expression(token_stream, stack_top - 1) - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.END: - token_stream.pop() - else: - # FIXME automatic end insertion - raise ExtractorError('Unexpected sequence at %d' % peek_pos) - - return statement - - def _if_statement(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - token_stream.pop() - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.POPEN: - raise ExtractorError('Missing condition at %d' % token_pos) - cond_expr = self._expression(token_stream, stack_top - 1) - token_stream.pop() # Token.PCLOSE - true_stmt = self._statement(token_stream, stack_top - 1) - false_stmt = None - token_id, token_value, token_pos = token_stream.peek() - if token_id is Token.ID and token_value == 'else': - token_stream.pop() - false_stmt = self._statement(token_stream, stack_top - 1) - return (Token.IF, cond_expr, true_stmt, false_stmt) - - def _for_loop(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - token_stream.pop() - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.POPEN: - raise ExtractorError('''Expected '(' at %d''' % token_pos) - - # FIXME set infor True (checked by variable declaration and relation expression) - self._context.no_in = False - token_id, token_value, token_pos = token_stream.peek() - if token_id is Token.END: - init = None - elif token_id is Token.ID and token_value == 'var': - # XXX change it on refactoring variable declaration list - init = self._statement(token_stream, stack_top - 1) - else: - init = self._expression(token_stream, stack_top - 1) - self._context.no_in = True - - token_id, token_value, token_pos = token_stream.pop() - if token_id is Token.ID and token_value == 'in': - cond = self._expression(token_stream, stack_top - 1) - # FIXME further processing of operator 'in' needed for interpretation - incr = None - # NOTE ES6 has 'of' operator - elif token_id is Token.END: - token_id, token_value, token_pos = token_stream.peek() - cond = None if token_id is Token.END else self._expression(token_stream, stack_top - 1) - - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.END: - raise ExtractorError('''Expected ';' at %d''' % token_pos) - - token_id, token_value, token_pos = token_stream.peek() - incr = None if token_id is Token.END else self._expression(token_stream, stack_top - 1) - else: - raise ExtractorError('Invalid condition in for loop initialization at %d' % token_pos) - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.PCLOSE: - raise ExtractorError('''Expected ')' at %d''' % token_pos) - body = self._statement(token_stream, stack_top - 1) - return (Token.FOR, init, cond, incr, body) - - def _do_loop(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - token_stream.pop() - body = self._statement(token_stream, stack_top - 1) - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.ID and token_value != 'while': - raise ExtractorError('''Expected 'while' at %d''' % token_pos) - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.POPEN: - raise ExtractorError('''Expected '(' at %d''' % token_pos) - expr = self._expression(token_stream, stack_top - 1) - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.PCLOSE: - raise ExtractorError('''Expected ')' at %d''' % token_pos) - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.END: - token_stream.pop() - else: - # FIXME automatic end insertion - raise ExtractorError('''Expected ';' at %d''' % peek_pos) - return (Token.DO, expr, body) - - def _while_loop(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - token_stream.pop() - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.POPEN: - raise ExtractorError('''Expected '(' at %d''' % token_pos) - expr = self._expression(token_stream, stack_top - 1) - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.PCLOSE: - raise ExtractorError('''Expected ')' at %d''' % token_pos) - body = self._statement(token_stream, stack_top) - return (Token.WHILE, expr, body) - - def _return_statement(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - token_stream.pop() - peek_id, peek_value, peek_pos = token_stream.peek() - # XXX no line break here - expr = self._expression(token_stream, stack_top - 1) if peek_id is not Token.END else None - return (Token.RETURN, expr) - - def _with_statement(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - token_stream.pop() - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.POPEN: - raise ExtractorError('Missing expression at %d' % token_pos) - expr = self._expression(token_stream, stack_top - 1) - token_stream.pop() # Token.PCLOSE - return (Token.WITH, expr, self._statement(token_stream, stack_top - 1)) - - def _switch_statement(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - token_stream.pop() - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.POPEN: - raise ExtractorError('Missing expression at %d' % token_pos) - discriminant = self._expression(token_stream, stack_top - 1) - token_stream.pop() # Token.PCLOSE - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.COPEN: - raise ExtractorError('Missing case block at %d' % token_pos) - open_pos = token_pos - has_default = False - block = [] - while True: - token_id, token_value, token_pos = token_stream.peek() - if token_id is Token.CCLOSE: - break - elif token_id is Token.ID and token_value == 'case': - token_stream.pop() - expr = self._expression(token_stream, stack_top - 1) - - elif token_id is Token.ID and token_value == 'default': - if has_default: - raise ExtractorError('Multiple default clause') - token_stream.pop() - has_default = True - expr = None - - elif token_id is Token.END and token_stream.ended: - raise ExtractorError('Unbalanced parentheses at %d' % open_pos) - else: - raise ExtractorError('Unexpected sequence at %d, default or case clause is expected' % - token_pos) - - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.COLON: - raise ExtractorError('''Unexpected sequence at %d, ':' is expected''' % token_pos) - - statement_list = [] - while True: - token_id, token_value, token_pos = token_stream.peek() - if token_id == Token.CCLOSE or (token_id is Token.ID and (token_value in ('default', 'case'))): - break - elif token_id is Token.END and token_stream.ended: - raise ExtractorError('Unbalanced parentheses at %d' % open_pos) - statement_list.append(self._statement(token_stream, stack_top - 1)) - - block.append((expr, statement_list)) - token_stream.pop() - return (Token.SWITCH, discriminant, block) - - def _try_statement(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - token_stream.pop() - token_id, token_value, token_pos = token_stream.peek() - if token_id is not Token.COPEN: - raise ExtractorError('Block is expected at %d' % token_pos) - try_block = self._statement(token_stream, stack_top - 1) - token_id, token_value, token_pos = token_stream.pop() - catch_block = None - if token_id is Token.ID and token_value == 'catch': - token_id, token_value, token_pos = token_stream.peek() - if token_id is not Token.POPEN: - raise ExtractorError('Catch clause is missing an identifier at %d' % token_pos) - token_stream.pop() - token_stream.chk_id() - token_id, error_name, token_pos = token_stream.pop() - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.PCLOSE: - raise ExtractorError('Catch clause expects a single identifier at %d' % token_pos) - token_id, token_value, token_pos = token_stream.peek() - if token_id is not Token.COPEN: - raise ExtractorError('Block is expected at %d' % token_pos) - catch_block = (error_name, self._statement(token_stream, stack_top - 1)) - finally_block = None - if token_id is Token.ID and token_value == 'finally': - token_id, token_value, token_pos = token_stream.peek() - if token_id is not Token.COPEN: - raise ExtractorError('Block is expected at %d' % token_pos) - finally_block = self._statement(token_stream, stack_top - 1) - if catch_block is None and finally_block is None: - raise ExtractorError('Try statement is expecting catch or finally at %d' % token_pos) - return (Token.TRY, try_block, catch_block, finally_block) - - def _expression(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - expr_list = [] - has_another = True - while has_another: - expr_list.append(self._assign_expression(token_stream, stack_top - 1)) - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.COMMA: - token_stream.pop() - elif peek_id is Token.ID and peek_value == 'yield': - # TODO parse yield - raise ExtractorError('Yield statement is not yet supported at %d' % peek_pos) - else: - has_another = False - return (Token.EXPR, expr_list) - - def _assign_expression(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - left = self._conditional_expression(token_stream, stack_top - 1) - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.AOP: - token_stream.pop() - _, op = peek_value - right = self._assign_expression(token_stream, stack_top - 1) - else: - op = None - right = None - return (Token.ASSIGN, op, left, right) - - def _member_expression(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.ID and peek_value == 'new': - token_stream.pop() - target = self._member_expression(token_stream, stack_top - 1) - args = self._arguments(token_stream, stack_top - 1) - # Rhino has check for args length - # Rhino has experimental syntax allowing an object literal to follow a new expression - else: - target = self._primary_expression(token_stream, stack_top) - args = None - - return (Token.MEMBER, target, args, self._member_tail(token_stream, stack_top - 1)) - - def _member_tail(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.DOT: - token_stream.pop() - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.DOT: - token_stream.pop() - peek_id, peek_value, peek_pos = token_stream.peek() - elif peek_id is Token.POPEN: - # TODO parse field query - raise ExtractorError('Field query is not yet supported at %d' % peek_pos) - - if peek_id is Token.ID: - token_stream.pop() - return (Token.FIELD, peek_value, self._member_tail(token_stream, stack_top - 1)) - else: - raise ExtractorError('Identifier name expected at %d' % peek_pos) - elif peek_id is Token.SOPEN: - token_stream.pop() - index = self._expression(token_stream, stack_top - 1) - token_id, token_value, token_pos = token_stream.pop() - if token_id is Token.SCLOSE: - return (Token.ELEM, index, self._member_tail(token_stream, stack_top - 1)) - else: - raise ExtractorError('Unexpected sequence at %d' % token_pos) - elif peek_id is Token.POPEN: - args = self._arguments(token_stream, stack_top - 1) - return (Token.CALL, args, self._member_tail(token_stream, stack_top - 1)) - else: - return None - - def _primary_expression(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - # TODO support let - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id in token_keys: - if peek_id is Token.ID: - # this - if peek_value == 'this': - token_stream.pop() - return (Token.RSV, 'this') - # function expr - elif peek_value == 'function': - return self._function(token_stream, stack_top - 1, True) - # id - else: - token_stream.chk_id() - token_stream.pop() - return (Token.ID, peek_value) - # literals - else: - token_stream.pop() - return (peek_id, peek_value) - # array - elif peek_id is Token.SOPEN: - return self._array_literal(token_stream, stack_top - 1) - # object - elif peek_id is Token.COPEN: - return self._object_literal(token_stream, stack_top) - # expr - elif peek_id is Token.POPEN: - token_stream.pop() - open_pos = peek_pos - expr = self._expression(token_stream, stack_top - 1) - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is not Token.PCLOSE: - raise ExtractorError('Unbalanced parentheses at %d' % open_pos) - token_stream.pop() - return expr - else: - raise ExtractorError('Syntax error at %d' % peek_pos) - - def _function(self, token_stream, stack_top, is_expr=False): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - token_stream.pop() - token_id, token_value, token_pos = token_stream.peek() - - name = None - if token_id is Token.ID: - token_stream.chk_id() - token_id, name, token_pos = token_stream.pop() - token_id, token_value, token_pos = token_stream.peek() - elif not is_expr: - raise ExtractorError('Function declaration at %d is missing identifier' % token_pos) - - if token_id is not Token.POPEN: - raise ExtractorError('Expected argument list at %d' % token_pos) - - # args - token_stream.pop() - open_pos = token_pos - args = [] - while True: - token_id, token_value, token_pos = token_stream.peek() - if token_id is Token.PCLOSE: - token_stream.pop() - break - token_stream.chk_id() - token_stream.pop() - args.append(token_value) - token_id, token_value, token_pos = token_stream.peek() - if token_id is Token.COMMA: - token_stream.pop() - elif token_id is Token.PCLOSE: - pass - elif token_id is Token.END and token_stream.ended: - raise ExtractorError('Unbalanced parentheses at %d' % open_pos) - else: - raise ExtractorError('Expected , separator at %d' % token_pos) - - token_id, token_value, token_pos = token_stream.peek() - if token_id is not Token.COPEN: - raise ExtractorError('Expected function body at %d' % token_pos) - - return (Token.FUNC, name, args, (self._function_body(token_stream, stack_top - 1))) - - def _function_body(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - token_id, token_value, open_pos = token_stream.pop() - body = [] - while True: - token_id, token_value, token_pos = token_stream.peek() - if token_id is Token.CCLOSE: - token_stream.pop() - break - elif token_id is Token.END and token_stream.ended: - raise ExtractorError('Unbalanced parentheses at %d' % open_pos) - body.append(self._source_element(token_stream, stack_top - 1)) - - return body - - def _arguments(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.POPEN: - token_stream.pop() - open_pos = peek_pos - else: - return None - args = [] - while True: - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.PCLOSE: - token_stream.pop() - return args - # FIXME handle infor - args.append(self._assign_expression(token_stream, stack_top - 1)) - # TODO parse generator expression - peek_id, peek_value, peek_pos = token_stream.peek() - - if peek_id is Token.COMMA: - token_stream.pop() - elif peek_id is Token.PCLOSE: - pass - elif peek_id is Token.END and token_stream.ended: - raise ExtractorError('Unbalanced parentheses at %d' % open_pos) - else: - raise ExtractorError('''Expected ',' separator at %d''' % peek_pos) - - def _array_literal(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - # XXX check no linebreak here - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is not Token.SOPEN: - raise ExtractorError('Array expected at %d' % peek_pos) - token_stream.pop() - elements = [] - - has_another = True - while has_another: - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.COMMA: - token_stream.pop() - elements.append(None) - elif peek_id is Token.SCLOSE: - token_stream.pop() - has_another = False - elif peek_id is Token.ID and peek_value == 'for': - # TODO parse array comprehension - raise ExtractorError('Array comprehension is not yet supported at %d' % peek_pos) - else: - elements.append(self._assign_expression(token_stream, stack_top - 1)) - peek_id, peek_value, peek_pos = token_stream.pop() - if peek_id is Token.SCLOSE: - has_another = False - elif peek_id is not Token.COMMA: - raise ExtractorError('''Expected ',' after element at %d''' % peek_pos) - - return (Token.ARRAY, elements) - - def _object_literal(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - token_id, token_value, open_pos = token_stream.pop() - property_list = [] - while True: - token_id, token_value, token_pos = token_stream.pop() - if token_id is Token.CCLOSE: - break - elif token_id is Token.COMMA: - continue - elif token_id is Token.ID and token_value in ('get', 'set'): - is_set = token_id is Token.ID and token_value == 'set' - - token_id, token_value, token_pos = token_stream.pop() - if token_id not in (Token.ID, Token.STR, Token.INT, Token.FLOAT): - raise ExtractorError('Property name is expected at %d' % token_pos) - property_name = token_value - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.POPEN: - raise ExtractorError('''Expected '(' at %d''' % token_pos) - - if is_set: - token_stream.chk_id() - token_id, arg, token_pos = token_stream.pop() - - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.PCLOSE: - raise ExtractorError('''Expected ')' at %d''' % token_pos) - - if is_set: - desc = (Token.PROPSET, arg, self._function_body(token_stream, stack_top - 1)) - else: - desc = (Token.PROPGET, self._function_body(token_stream, stack_top - 1)) - - elif token_id in (Token.ID, Token.STR, Token.INT, Token.FLOAT): - property_name = token_value - token_id, token_value, token_pos = token_stream.pop() - if token_id is not Token.COLON: - raise ExtractorError('Property name is expected at %d' % token_pos) - - desc = (Token.PROPVALUE, self._assign_expression(token_stream, stack_top - 1)) - - elif token_stream.ended: - raise ExtractorError('Unmatched parentheses at %d' % open_pos) - else: - raise ExtractorError('Property assignment is expected at %d' % token_pos) - - property_list.append((property_name, desc)) - - return (Token.OBJECT, property_list) - - def _conditional_expression(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - expr = self._operator_expression(token_stream, stack_top - 1) - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.HOOK: - hook_pos = peek_pos - true_expr = self._assign_expression(token_stream, stack_top - 1) - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.COLON: - false_expr = self._assign_expression(token_stream, stack_top - 1) - else: - raise ExtractorError('Missing : in conditional expression at %d' % hook_pos) - return (Token.COND, expr, true_expr, false_expr) - return expr - - def _operator_expression(self, token_stream, stack_top): - if stack_top < 0: - raise ExtractorError('Recursion limit reached') - - # --<---------------------------------<-- op --<--------------------------<---- - # | | - # | --<-- prefix --<-- -->-- postfix -->-- | - # | | ^ ^ | ^ - # v v | | v | - # ->------------>----------->-- lefthand-side expression -->----------->------------>---| - # - # 20 grouping - # ... # handled by lefthandside_expression - # 17 postfix - # 16 unary - # 15 exponentiation # not yet found in grammar - # 14 mul - # 13 add - # 12 shift - # 11 rel - # 10 eq - # 9 band - # 8 bxor - # 7 bor - # 6 land - # 5 lor - # 4 cond # handled by conditional_expression - - out = [] - stack = [] - - while True: - had_inc = False - has_prefix = True - while has_prefix: - peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id is Token.OP and peek_value[0] in (Token.ADD, Token.SUB): - # any binary operators will be consumed later - peek_id = Token.UOP - peek_value = convert_to_unary(peek_value) - if peek_id is Token.UOP: - name, op = peek_value - had_inc = name in (Token.INC, Token.DEC) - if had_inc: - peek_id = Token.PREFIX - while stack and stack[-1][0] > 16: - _, stack_id, stack_op = stack.pop() - out.append((stack_id, stack_op)) - stack.append((16, peek_id, op)) - token_stream.pop() - peek_id, peek_value, peek_pos = token_stream.peek() - if had_inc and peek_id is not Token.ID: - raise ExtractorError('Prefix operator has to be followed by an identifier at %d' % peek_pos) - has_prefix = peek_id is Token.UOP - else: - has_prefix = False - - left = self._member_expression(token_stream, stack_top - 1) - out.append(left) - - peek_id, peek_value, peek_pos = token_stream.peek() - # postfix - if peek_id is Token.UOP: - if had_inc: - raise ExtractorError('''Can't have prefix and postfix operator at the same time at %d''' % peek_pos) - name, op = peek_value - if name in (Token.INC, Token.DEC): - peek_id = Token.POSTFIX - prec = 17 - else: - raise ExtractorError('Unexpected operator at %d' % peek_pos) - while stack and stack[-1][0] >= 17: - _, stack_id, stack_op = stack.pop() - out.append((stack_id, stack_op)) - stack.append((prec, peek_id, op)) - token_stream.pop() - peek_id, peek_value, peek_pos = token_stream.peek() - - if peek_id is Token.REL: - name, op = peek_value - prec = 11 - elif peek_id is Token.OP: - name, op = peek_value - if name in (Token.MUL, Token.DIV, Token.MOD): - prec = 14 - elif name in (Token.ADD, Token.SUB): - prec = 13 - elif name in (Token.RSHIFT, Token.LSHIFT, Token.URSHIFT): - prec = 12 - elif name is Token.BAND: - prec = 9 - elif name is Token.BXOR: - prec = 8 - elif name is Token.BOR: - prec = 7 - else: - raise ExtractorError('Unexpected operator at %d' % peek_pos) - elif peek_id is Token.LOP: - name, op = peek_value - prec = {Token.OR: 5, Token.AND: 6}[name] - else: - op = None - prec = 4 # empties stack - - while stack and stack[-1][0] >= prec: - _, stack_id, stack_op = stack.pop() - out.append((stack_id, stack_op)) - if op is None: - break - else: - stack.append((prec, peek_id, op)) - token_stream.pop() - - return (Token.OPEXPR, out) - - def interpret_statement(self, stmt): - if stmt is None: - return None - - name = stmt[0] - ref = None - if name == Token.FUNC: - name, args, body = stmt[1:] - if name is not None: - if self._context_stack: - self.this[name] = Reference(self.build_function(args, body), (self.this, name)) - else: - self.global_vars[name] = Reference(self.build_function(args, body), (self.this, name)) - else: - raise ExtractorError('Function expression is not yet implemented') - elif name is Token.BLOCK: - block = stmt[1] - for stmt in block: - s = self.interpret_statement(stmt) - if s is not None: - ref = s.getvalue() - elif name is Token.VAR: - for name, value in stmt[1]: - self.this[name] = Reference(self.interpret_expression(value).getvalue(), - (self.this, name)) - elif name is Token.EXPR: - for expr in stmt[1]: - ref = self.interpret_expression(expr) - # if - # continue, break - elif name is Token.RETURN: - ref = self.interpret_statement(stmt[1]) - self._context.ended = True - # with - # label - # switch - # throw - # try - # debugger - else: - raise ExtractorError('''Can't interpret statement called %s''' % name) - return ref - - def interpret_expression(self, expr): - if expr is None: - return - name = expr[0] - - if name is Token.ASSIGN: - op, left, right = expr[1:] - if op is None: - ref = self.interpret_expression(left) - else: - try: - leftref = self.interpret_expression(left) - except KeyError: - lname = left[0] - key = None - if lname is Token.OPEXPR and len(left[1]) == 1: - lname = left[1][0][0] - if lname is Token.MEMBER: - lid, args, tail = left[1][0][1:] - if lid[0] is Token.ID and args is None and tail is None: - key = lid[1] - if key is not None: - u = Reference(jsbuilt_ins.undefined, (self.this, key)) - leftref = self.this[key] = u - else: - raise ExtractorError('Invalid left-hand side in assignment') - leftvalue = leftref.getvalue() - rightvalue = self.interpret_expression(right).getvalue() - leftref.putvalue(op(leftvalue, rightvalue)) - # XXX check specs what to return - ref = leftref - - elif name is Token.EXPR: - ref = self.interpret_statement(expr) - - elif name is Token.OPEXPR: - stack = [] - postfix = [] - rpn = expr[1][:] - # FIXME support pre- and postfix operators - while rpn: - token = rpn.pop(0) - # XXX relation 'in' 'instanceof' - if token[0] in (Token.OP, Token.AOP, Token.LOP, Token.REL): - right = stack.pop() - left = stack.pop() - stack.append(Reference(token[1](left.getvalue(), right.getvalue()))) - # XXX add unary operator 'delete', 'void', 'instanceof' - elif token[0] is Token.UOP: - right = stack.pop() - stack.append(Reference(token[1](right.getvalue()))) - elif token[0] is Token.PREFIX: - right = stack.pop() - stack.append(Reference(right.putvalue(token[1](right.getvalue())))) - elif token[0] is Token.POSTFIX: - postfix.append((stack[-1], token[1])) - else: - stack.append(self.interpret_expression(token)) - result = stack.pop() - if not stack: - for operand, op in postfix: - operand.putvalue(op(operand.getvalue())) - ref = result - else: - raise ExtractorError('Expression has too many values') - - elif name is Token.MEMBER: - # TODO interpret member - target, args, tail = expr[1:] - target = self.interpret_expression(target) - if args is not None: - # TODO interpret NewExpression - pass - while tail is not None: - tail_name, tail_value, tail = tail - if tail_name is Token.FIELD: - target = target.getvalue()[tail_value] - elif tail_name is Token.ELEM: - index = self.interpret_expression(tail_value).getvalue() - target = target.getvalue()[index] - elif tail_name is Token.CALL: - args = (self.interpret_expression(arg).getvalue() for arg in tail_value) - target = Reference(target.getvalue()(*args)) - ref = target - - elif name is Token.ID: - # XXX error handling (unknown id) - ref = (self.this[expr[1]] if expr[1] in self.this else - self.global_vars[expr[1]]) - - # literal - elif name in token_keys: - ref = Reference(expr[1]) - - elif name is Token.ARRAY: - array = [] - for key, elem in enumerate(expr[1]): - value = self.interpret_expression(elem).getvalue() - array.append(Reference(value, (array, key))) - ref = Reference(array) - - else: - raise ExtractorError('''Can't interpret expression called %s''' % name) - - return ref - - def extract_object(self, objname): - obj = {} - obj_m = re.search( - (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) + - r'\s*(?P([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' + - r'\}\s*;', - self.code) - fields = obj_m.group('fields') - # Currently, it only supports function definitions - fields_m = re.finditer( - r'(?P[a-zA-Z$0-9]+)\s*:\s*function' - r'\((?P[a-z,]+)\){(?P[^}]+)}', - fields) - for f in fields_m: - argnames = f.group('args').split(',') - obj[f.group('key')] = self.build_function(argnames, self.parse(f.group('code'))) - - return obj - - def extract_function(self, funcname): - func_m = re.search( - r'''(?x) - (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s* - \((?P[^)]*)\)\s* - \{(?P[^}]+)\}''' % ( - re.escape(funcname), re.escape(funcname), re.escape(funcname)), - self.code) - if func_m is None: - raise ExtractorError('Could not find JS function %r' % funcname) - argnames = func_m.group('args').split(',') - - return self.build_function(argnames, self.parse(func_m.group('code'))) - - def push_context(self, cx): - self._context_stack.append(self._context) - self._context = cx - - def pop_context(self): - # XXX check underflow - self._context = self._context_stack.pop() - - def call_function(self, funcname, *args): - f = (self.this[funcname] if funcname in self.this else - self.global_vars[funcname] if funcname in self.global_vars else - self.extract_function(funcname)) - return f(*args) - - def build_function(self, argnames, ast): - def resf(*args): - self.push_context(Context(dict(zip(argnames, args)))) - res = None - for stmt in ast: - res = self.interpret_statement(stmt) - res = None if res is None else res.getvalue(deep=True) - if self._context.ended: - self.pop_context() - break - return res - return resf - - def run(self, cx=None): - if cx is not None: - self.push_context(cx) - res = None - for stmt in self.parse(): - res = self.interpret_statement(stmt) - res = None if res is None else res.getvalue(deep=True) - if self._context.ended: - if cx is not None: - self.pop_context() - break - return res diff --git a/youtube_dl/jsinterp/__init__.py b/youtube_dl/jsinterp2/__init__.py similarity index 100% rename from youtube_dl/jsinterp/__init__.py rename to youtube_dl/jsinterp2/__init__.py diff --git a/youtube_dl/jsinterp/jsbuilt_ins/__init__.py b/youtube_dl/jsinterp2/jsbuilt_ins/__init__.py similarity index 100% rename from youtube_dl/jsinterp/jsbuilt_ins/__init__.py rename to youtube_dl/jsinterp2/jsbuilt_ins/__init__.py diff --git a/youtube_dl/jsinterp/jsbuilt_ins/base.py b/youtube_dl/jsinterp2/jsbuilt_ins/base.py similarity index 100% rename from youtube_dl/jsinterp/jsbuilt_ins/base.py rename to youtube_dl/jsinterp2/jsbuilt_ins/base.py diff --git a/youtube_dl/jsinterp/jsbuilt_ins/internals.py b/youtube_dl/jsinterp2/jsbuilt_ins/internals.py similarity index 100% rename from youtube_dl/jsinterp/jsbuilt_ins/internals.py rename to youtube_dl/jsinterp2/jsbuilt_ins/internals.py diff --git a/youtube_dl/jsinterp/jsbuilt_ins/jsarray.py b/youtube_dl/jsinterp2/jsbuilt_ins/jsarray.py similarity index 100% rename from youtube_dl/jsinterp/jsbuilt_ins/jsarray.py rename to youtube_dl/jsinterp2/jsbuilt_ins/jsarray.py diff --git a/youtube_dl/jsinterp/jsbuilt_ins/jsboolean.py b/youtube_dl/jsinterp2/jsbuilt_ins/jsboolean.py similarity index 100% rename from youtube_dl/jsinterp/jsbuilt_ins/jsboolean.py rename to youtube_dl/jsinterp2/jsbuilt_ins/jsboolean.py diff --git a/youtube_dl/jsinterp/jsbuilt_ins/jsfunction.py b/youtube_dl/jsinterp2/jsbuilt_ins/jsfunction.py similarity index 100% rename from youtube_dl/jsinterp/jsbuilt_ins/jsfunction.py rename to youtube_dl/jsinterp2/jsbuilt_ins/jsfunction.py diff --git a/youtube_dl/jsinterp/jsbuilt_ins/jsnumber.py b/youtube_dl/jsinterp2/jsbuilt_ins/jsnumber.py similarity index 100% rename from youtube_dl/jsinterp/jsbuilt_ins/jsnumber.py rename to youtube_dl/jsinterp2/jsbuilt_ins/jsnumber.py diff --git a/youtube_dl/jsinterp/jsbuilt_ins/jsobject.py b/youtube_dl/jsinterp2/jsbuilt_ins/jsobject.py similarity index 100% rename from youtube_dl/jsinterp/jsbuilt_ins/jsobject.py rename to youtube_dl/jsinterp2/jsbuilt_ins/jsobject.py diff --git a/youtube_dl/jsinterp/jsbuilt_ins/jsstring.py b/youtube_dl/jsinterp2/jsbuilt_ins/jsstring.py similarity index 100% rename from youtube_dl/jsinterp/jsbuilt_ins/jsstring.py rename to youtube_dl/jsinterp2/jsbuilt_ins/jsstring.py diff --git a/youtube_dl/jsinterp/jsbuilt_ins/utils.py b/youtube_dl/jsinterp2/jsbuilt_ins/utils.py similarity index 100% rename from youtube_dl/jsinterp/jsbuilt_ins/utils.py rename to youtube_dl/jsinterp2/jsbuilt_ins/utils.py diff --git a/youtube_dl/jsinterp/jsgrammar.py b/youtube_dl/jsinterp2/jsgrammar.py similarity index 100% rename from youtube_dl/jsinterp/jsgrammar.py rename to youtube_dl/jsinterp2/jsgrammar.py diff --git a/youtube_dl/jsinterp2/jsinterp.py b/youtube_dl/jsinterp2/jsinterp.py new file mode 100644 index 000000000..0a30907da --- /dev/null +++ b/youtube_dl/jsinterp2/jsinterp.py @@ -0,0 +1,316 @@ +from __future__ import unicode_literals + +import re + +from ..compat import compat_str +from ..utils import ExtractorError +from .jsparser import Parser +from .jsgrammar import Token, token_keys + + +class Context(object): + def __init__(self, variables=None, ended=False): + super(Context, self).__init__() + self.ended = ended + self.no_in = True + self.local_vars = {} + if variables is not None: + for k, v in dict(variables).items(): + # XXX validate identifiers + self.local_vars[k] = Reference(v, (self.local_vars, k)) + + +class Reference(object): + def __init__(self, value, parent=None): + super(Reference, self).__init__() + self._value = value + self._parent = parent + + def getvalue(self, deep=False): + value = self._value + if deep: + if isinstance(self._value, (list, tuple)): + # TODO test nested arrays + value = [elem.getvalue() for elem in self._value] + elif isinstance(self._value, dict): + value = {} + for key, prop in self._value.items(): + value[key] = prop.getvalue() + + return value + + def putvalue(self, value): + if self._parent is None: + raise ExtractorError('Trying to set a read-only reference') + parent, key = self._parent + if not hasattr(parent, '__setitem__'): + raise ExtractorError('Unknown reference') + parent.__setitem__(key, Reference(value, (parent, key))) + self._value = value + return value + + def __repr__(self): + if self._parent is not None: + parent, key = self._parent + return '' % ( + str(self._value), parent.__class__.__name__, id(parent), key) + return '' % (self._value, None) + + +class JSInterpreter(object): + # TODO support json + + undefined = object() + + def __init__(self, code, variables=None): + super(JSInterpreter, self).__init__() + self.code = code + self.global_vars = {} + if variables is not None: + for k, v in dict(variables).items(): + # XXX validate identifiers + self.global_vars[k] = self.create_reference(v, (self.global_vars, k)) + self._context = Context() + self._context_stack = [] + + @property + def this(self): + return self._context.local_vars + + def create_reference(self, value, parent_key): + if isinstance(value, dict): + o = {} + for k, v in value.items(): + o[k] = self.create_reference(v, (o, k)) + elif isinstance(value, (list, tuple, set)): + o = [] + for k, v in enumerate(value): + o[k] = self.create_reference(v, (o, k)) + elif isinstance(value, (int, float, compat_str, bool, re._pattern_type)) or value is None: + o = value + else: + raise ExtractorError('Unsupported type, %s in variables' % type(value)) + + return Reference(o, parent_key) + + def interpret_statement(self, stmt): + if stmt is None: + return None + + name = stmt[0] + ref = None + if name == Token.FUNC: + name, args, body = stmt[1:] + if name is not None: + if self._context_stack: + self.this[name] = Reference(self.build_function(args, body), (self.this, name)) + else: + self.global_vars[name] = Reference(self.build_function(args, body), (self.this, name)) + else: + raise ExtractorError('Function expression is not yet implemented') + elif name is Token.BLOCK: + block = stmt[1] + for stmt in block: + s = self.interpret_statement(stmt) + if s is not None: + ref = s.getvalue() + elif name is Token.VAR: + for name, value in stmt[1]: + value = self.interpret_expression(value).getvalue() if value is not None else self.undefined + self.this[name] = Reference(value, (self.this, name)) + elif name is Token.EXPR: + for expr in stmt[1]: + ref = self.interpret_expression(expr) + # if + # continue, break + elif name is Token.RETURN: + ref = self.interpret_statement(stmt[1]) + self._context.ended = True + # with + # label + # switch + # throw + # try + # debugger + else: + raise ExtractorError('''Can't interpret statement called %s''' % name) + return ref + + def interpret_expression(self, expr): + if expr is None: + return + name = expr[0] + + if name is Token.ASSIGN: + op, left, right = expr[1:] + if op is None: + ref = self.interpret_expression(left) + else: + try: + leftref = self.interpret_expression(left) + except KeyError: + lname = left[0] + key = None + if lname is Token.OPEXPR and len(left[1]) == 1: + lname = left[1][0][0] + if lname is Token.MEMBER: + lid, args, tail = left[1][0][1:] + if lid[0] is Token.ID and args is None and tail is None: + key = lid[1] + if key is not None: + u = Reference(self.undefined, (self.this, key)) + leftref = self.this[key] = u + else: + raise ExtractorError('Invalid left-hand side in assignment') + leftvalue = leftref.getvalue() + rightvalue = self.interpret_expression(right).getvalue() + leftref.putvalue(op(leftvalue, rightvalue)) + # XXX check specs what to return + ref = leftref + + elif name is Token.EXPR: + ref = self.interpret_statement(expr) + + elif name is Token.OPEXPR: + stack = [] + postfix = [] + rpn = expr[1][:] + # FIXME support pre- and postfix operators + while rpn: + token = rpn.pop(0) + # XXX relation 'in' 'instanceof' + if token[0] in (Token.OP, Token.AOP, Token.LOP, Token.REL): + right = stack.pop() + left = stack.pop() + stack.append(Reference(token[1](left.getvalue(), right.getvalue()))) + # XXX add unary operator 'delete', 'void', 'instanceof' + elif token[0] is Token.UOP: + right = stack.pop() + stack.append(Reference(token[1](right.getvalue()))) + elif token[0] is Token.PREFIX: + right = stack.pop() + stack.append(Reference(right.putvalue(token[1](right.getvalue())))) + elif token[0] is Token.POSTFIX: + postfix.append((stack[-1], token[1])) + else: + stack.append(self.interpret_expression(token)) + result = stack.pop() + if not stack: + for operand, op in postfix: + operand.putvalue(op(operand.getvalue())) + ref = result + else: + raise ExtractorError('Expression has too many values') + + elif name is Token.MEMBER: + # TODO interpret member + target, args, tail = expr[1:] + target = self.interpret_expression(target) + if args is not None: + # TODO interpret NewExpression + pass + while tail is not None: + tail_name, tail_value, tail = tail + if tail_name is Token.FIELD: + target = target.getvalue()[tail_value] + elif tail_name is Token.ELEM: + index = self.interpret_expression(tail_value).getvalue() + target = target.getvalue()[index] + elif tail_name is Token.CALL: + args = (self.interpret_expression(arg).getvalue() for arg in tail_value) + target = Reference(target.getvalue()(*args)) + ref = target + + elif name is Token.ID: + # XXX error handling (unknown id) + ref = (self.this[expr[1]] if expr[1] in self.this else + self.global_vars[expr[1]]) + + # literal + elif name in token_keys: + ref = Reference(expr[1]) + + elif name is Token.ARRAY: + array = [] + for key, elem in enumerate(expr[1]): + value = self.interpret_expression(elem).getvalue() + array.append(Reference(value, (array, key))) + ref = Reference(array) + + else: + raise ExtractorError('''Can't interpret expression called %s''' % name) + + return ref + + def extract_object(self, objname): + obj = {} + obj_m = re.search( + (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) + + r'\s*(?P([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' + + r'\}\s*;', + self.code) + fields = obj_m.group('fields') + # Currently, it only supports function definitions + fields_m = re.finditer( + r'(?P[a-zA-Z$0-9]+)\s*:\s*function' + r'\((?P[a-z,]+)\){(?P[^}]+)}', + fields) + for f in fields_m: + argnames = f.group('args').split(',') + obj[f.group('key')] = self.build_function(argnames, Parser(f.group('code')).parse()) + + return obj + + def extract_function(self, funcname): + func_m = re.search( + r'''(?x) + (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s* + \((?P[^)]*)\)\s* + \{(?P[^}]+)\}''' % ( + re.escape(funcname), re.escape(funcname), re.escape(funcname)), + self.code) + if func_m is None: + raise ExtractorError('Could not find JS function %r' % funcname) + argnames = func_m.group('args').split(',') + + return self.build_function(argnames, Parser(func_m.group('code')).parse()) + + def push_context(self, cx): + self._context_stack.append(self._context) + self._context = cx + + def pop_context(self): + # XXX check underflow + self._context = self._context_stack.pop() + + def call_function(self, funcname, *args): + f = (self.this[funcname] if funcname in self.this else + self.global_vars[funcname] if funcname in self.global_vars else + self.extract_function(funcname)) + return f(*args) + + def build_function(self, argnames, ast): + def resf(*args): + self.push_context(Context(dict(zip(argnames, args)))) + res = None + for stmt in ast: + res = self.interpret_statement(stmt) + res = None if res is None else res.getvalue(deep=True) + if self._context.ended: + self.pop_context() + break + return res + return resf + + def run(self, cx=None): + if cx is not None: + self.push_context(cx) + res = None + for stmt in Parser(self.code).parse(): + res = self.interpret_statement(stmt) + res = None if res is None else res.getvalue(deep=True) + if self._context.ended: + if cx is not None: + self.pop_context() + break + return res diff --git a/youtube_dl/jsinterp2/jsparser.py b/youtube_dl/jsinterp2/jsparser.py new file mode 100644 index 000000000..3564d4713 --- /dev/null +++ b/youtube_dl/jsinterp2/jsparser.py @@ -0,0 +1,835 @@ +from __future__ import unicode_literals + +from ..utils import ExtractorError +from .jsgrammar import Token, token_keys +from .tstream import TokenStream, convert_to_unary + + +class Parser(object): + + def __init__(self, code, pos=0, stack_size=100): + super(Parser, self).__init__() + self.token_stream = TokenStream(code, pos) + self.stack_top = stack_size + self._no_in = True + + def parse(self): + while not self.token_stream.ended: + yield self._source_element(self.stack_top) + raise StopIteration + + def _source_element(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + token_id, token_value, token_pos = self.token_stream.peek() + if token_id is Token.ID and token_value == 'function': + source_element = self._function(stack_top - 1) + else: + source_element = self._statement(stack_top - 1) + + return source_element + + def _statement(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + statement = None + token_id, token_value, token_pos = self.token_stream.peek() + if token_id is Token.END: + # empty statement goes straight here + self.token_stream.pop() + return statement + + # block + elif token_id is Token.COPEN: + # XXX refactor will deprecate some _statement calls + open_pos = token_pos + self.token_stream.pop() + block = [] + while True: + token_id, token_value, token_pos = self.token_stream.peek() + if token_id is Token.CCLOSE: + self.token_stream.pop() + break + elif token_id is Token.END and self.token_stream.ended: + raise ExtractorError('Unbalanced parentheses at %d' % open_pos) + block.append(self._statement(stack_top - 1)) + + statement = (Token.BLOCK, block) + + elif token_id is Token.ID: + if token_value == 'var': + self.token_stream.pop() + variables = [] + init = [] + has_another = True + while has_another: + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.ID: + raise ExtractorError('Missing variable name at %d' % token_pos) + self.token_stream.chk_id(last=True) + variables.append(token_value) + + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.AOP: + self.token_stream.pop() + init.append(self._assign_expression(stack_top - 1)) + peek_id, peek_value, peek_pos = self.token_stream.peek() + else: + init.append(None) + + if peek_id is Token.END: + if self._no_in: + self.token_stream.pop() + has_another = False + elif peek_id is Token.COMMA: + # TODO for not NoIn + pass + else: + # FIXME automatic end insertion + # - token_id is Token.CCLOSE + # - check line terminator + # - restricted token + raise ExtractorError('Unexpected sequence at %d' % peek_pos) + statement = (Token.VAR, zip(variables, init)) + + elif token_value == 'if': + statement = self._if_statement(stack_top - 1) + + elif token_value == 'for': + statement = self._for_loop(stack_top - 1) + + elif token_value == 'do': + statement = self._do_loop(stack_top - 1) + + elif token_value == 'while': + statement = self._while_loop(stack_top - 1) + + elif token_value in ('break', 'continue'): + self.token_stream.pop() + token = {'break': Token.BREAK, 'continue': Token.CONTINUE}[token_value] + peek_id, peek_value, peek_pos = self.token_stream.peek() + # XXX no line break here + label_name = None + if peek_id is not Token.END: + self.token_stream.chk_id() + label_name = peek_value + self.token_stream.pop() + statement = (token, label_name) + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.END: + self.token_stream.pop() + else: + # FIXME automatic end insertion + raise ExtractorError('Unexpected sequence at %d' % peek_pos) + + elif token_value == 'return': + statement = self._return_statement(stack_top - 1) + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.END: + self.token_stream.pop() + else: + # FIXME automatic end insertion + raise ExtractorError('Unexpected sequence at %d' % peek_pos) + + elif token_value == 'with': + statement = self._with_statement(stack_top - 1) + + elif token_value == 'switch': + statement = self._switch_statement(stack_top - 1) + + elif token_value == 'throw': + self.token_stream.pop() + # XXX no line break here + expr = self._expression(stack_top - 1) + statement = (Token.RETURN, expr) + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.END: + self.token_stream.pop() + else: + # FIXME automatic end insertion + raise ExtractorError('Unexpected sequence at %d' % peek_pos) + + elif token_value == 'try': + statement = self._try_statement(stack_top - 1) + + elif token_value == 'debugger': + self.token_stream.pop() + statement = (Token.DEBUG) + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.END: + self.token_stream.pop() + else: + # FIXME automatic end insertion + raise ExtractorError('Unexpected sequence at %d' % peek_pos) + else: # label + # XXX possible refactoring (this is the only branch not poping) + token_id, token_value, token_pos = self.token_stream.peek(2) + if token_id is Token.COLON: + token_id, label_name, token_pos = self.token_stream.pop(2) + self.token_stream.chk_id(last=True) + statement = (Token.LABEL, label_name, self._statement(stack_top - 1)) + + # expr + if statement is None: + statement = self._expression(stack_top - 1) + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.END: + self.token_stream.pop() + else: + # FIXME automatic end insertion + raise ExtractorError('Unexpected sequence at %d' % peek_pos) + + return statement + + def _if_statement(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + self.token_stream.pop() + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.POPEN: + raise ExtractorError('Missing condition at %d' % token_pos) + cond_expr = self._expression(stack_top - 1) + self.token_stream.pop() # Token.PCLOSE + true_stmt = self._statement(stack_top - 1) + false_stmt = None + token_id, token_value, token_pos = self.token_stream.peek() + if token_id is Token.ID and token_value == 'else': + self.token_stream.pop() + false_stmt = self._statement(stack_top - 1) + return (Token.IF, cond_expr, true_stmt, false_stmt) + + def _for_loop(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + self.token_stream.pop() + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.POPEN: + raise ExtractorError('''Expected '(' at %d''' % token_pos) + + # FIXME set infor True (checked by variable declaration and relation expression) + self._no_in = False + token_id, token_value, token_pos = self.token_stream.peek() + if token_id is Token.END: + init = None + elif token_id is Token.ID and token_value == 'var': + # XXX change it on refactoring variable declaration list + init = self._statement(stack_top - 1) + else: + init = self._expression(stack_top - 1) + self._no_in = True + + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is Token.ID and token_value == 'in': + cond = self._expression(stack_top - 1) + # FIXME further processing of operator 'in' needed for interpretation + incr = None + # NOTE ES6 has 'of' operator + elif token_id is Token.END: + token_id, token_value, token_pos = self.token_stream.peek() + cond = None if token_id is Token.END else self._expression(stack_top - 1) + + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.END: + raise ExtractorError('''Expected ';' at %d''' % token_pos) + + token_id, token_value, token_pos = self.token_stream.peek() + incr = None if token_id is Token.END else self._expression(stack_top - 1) + else: + raise ExtractorError('Invalid condition in for loop initialization at %d' % token_pos) + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.PCLOSE: + raise ExtractorError('''Expected ')' at %d''' % token_pos) + body = self._statement(stack_top - 1) + return (Token.FOR, init, cond, incr, body) + + def _do_loop(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + self.token_stream.pop() + body = self._statement(stack_top - 1) + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.ID and token_value != 'while': + raise ExtractorError('''Expected 'while' at %d''' % token_pos) + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.POPEN: + raise ExtractorError('''Expected '(' at %d''' % token_pos) + expr = self._expression(stack_top - 1) + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.PCLOSE: + raise ExtractorError('''Expected ')' at %d''' % token_pos) + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.END: + self.token_stream.pop() + else: + # FIXME automatic end insertion + raise ExtractorError('''Expected ';' at %d''' % peek_pos) + return (Token.DO, expr, body) + + def _while_loop(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + self.token_stream.pop() + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.POPEN: + raise ExtractorError('''Expected '(' at %d''' % token_pos) + expr = self._expression(stack_top - 1) + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.PCLOSE: + raise ExtractorError('''Expected ')' at %d''' % token_pos) + body = self._statement(stack_top) + return (Token.WHILE, expr, body) + + def _return_statement(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + self.token_stream.pop() + peek_id, peek_value, peek_pos = self.token_stream.peek() + # XXX no line break here + expr = self._expression(stack_top - 1) if peek_id is not Token.END else None + return (Token.RETURN, expr) + + def _with_statement(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + self.token_stream.pop() + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.POPEN: + raise ExtractorError('Missing expression at %d' % token_pos) + expr = self._expression(stack_top - 1) + self.token_stream.pop() # Token.PCLOSE + return (Token.WITH, expr, self._statement(stack_top - 1)) + + def _switch_statement(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + self.token_stream.pop() + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.POPEN: + raise ExtractorError('Missing expression at %d' % token_pos) + discriminant = self._expression(stack_top - 1) + self.token_stream.pop() # Token.PCLOSE + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.COPEN: + raise ExtractorError('Missing case block at %d' % token_pos) + open_pos = token_pos + has_default = False + block = [] + while True: + token_id, token_value, token_pos = self.token_stream.peek() + if token_id is Token.CCLOSE: + break + elif token_id is Token.ID and token_value == 'case': + self.token_stream.pop() + expr = self._expression(stack_top - 1) + + elif token_id is Token.ID and token_value == 'default': + if has_default: + raise ExtractorError('Multiple default clause') + self.token_stream.pop() + has_default = True + expr = None + + elif token_id is Token.END and self.token_stream.ended: + raise ExtractorError('Unbalanced parentheses at %d' % open_pos) + else: + raise ExtractorError('Unexpected sequence at %d, default or case clause is expected' % + token_pos) + + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.COLON: + raise ExtractorError('''Unexpected sequence at %d, ':' is expected''' % token_pos) + + statement_list = [] + while True: + token_id, token_value, token_pos = self.token_stream.peek() + if token_id == Token.CCLOSE or (token_id is Token.ID and (token_value in ('default', 'case'))): + break + elif token_id is Token.END and self.token_stream.ended: + raise ExtractorError('Unbalanced parentheses at %d' % open_pos) + statement_list.append(self._statement(stack_top - 1)) + + block.append((expr, statement_list)) + self.token_stream.pop() + return (Token.SWITCH, discriminant, block) + + def _try_statement(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + self.token_stream.pop() + token_id, token_value, token_pos = self.token_stream.peek() + if token_id is not Token.COPEN: + raise ExtractorError('Block is expected at %d' % token_pos) + try_block = self._statement(stack_top - 1) + token_id, token_value, token_pos = self.token_stream.pop() + catch_block = None + if token_id is Token.ID and token_value == 'catch': + token_id, token_value, token_pos = self.token_stream.peek() + if token_id is not Token.POPEN: + raise ExtractorError('Catch clause is missing an identifier at %d' % token_pos) + self.token_stream.pop() + self.token_stream.chk_id() + token_id, error_name, token_pos = self.token_stream.pop() + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.PCLOSE: + raise ExtractorError('Catch clause expects a single identifier at %d' % token_pos) + token_id, token_value, token_pos = self.token_stream.peek() + if token_id is not Token.COPEN: + raise ExtractorError('Block is expected at %d' % token_pos) + catch_block = (error_name, self._statement(stack_top - 1)) + finally_block = None + if token_id is Token.ID and token_value == 'finally': + token_id, token_value, token_pos = self.token_stream.peek() + if token_id is not Token.COPEN: + raise ExtractorError('Block is expected at %d' % token_pos) + finally_block = self._statement(stack_top - 1) + if catch_block is None and finally_block is None: + raise ExtractorError('Try statement is expecting catch or finally at %d' % token_pos) + return (Token.TRY, try_block, catch_block, finally_block) + + def _expression(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + expr_list = [] + has_another = True + while has_another: + expr_list.append(self._assign_expression(stack_top - 1)) + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.COMMA: + self.token_stream.pop() + elif peek_id is Token.ID and peek_value == 'yield': + # TODO parse yield + raise ExtractorError('Yield statement is not yet supported at %d' % peek_pos) + else: + has_another = False + return (Token.EXPR, expr_list) + + def _assign_expression(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + left = self._conditional_expression(stack_top - 1) + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.AOP: + self.token_stream.pop() + _, op = peek_value + right = self._assign_expression(stack_top - 1) + else: + op = None + right = None + return (Token.ASSIGN, op, left, right) + + def _member_expression(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.ID and peek_value == 'new': + self.token_stream.pop() + target = self._member_expression(stack_top - 1) + args = self._arguments(stack_top - 1) + # Rhino has check for args length + # Rhino has experimental syntax allowing an object literal to follow a new expression + else: + target = self._primary_expression(stack_top) + args = None + + return (Token.MEMBER, target, args, self._member_tail(stack_top - 1)) + + def _member_tail(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.DOT: + self.token_stream.pop() + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.DOT: + self.token_stream.pop() + peek_id, peek_value, peek_pos = self.token_stream.peek() + elif peek_id is Token.POPEN: + # TODO parse field query + raise ExtractorError('Field query is not yet supported at %d' % peek_pos) + + if peek_id is Token.ID: + self.token_stream.pop() + return (Token.FIELD, peek_value, self._member_tail(stack_top - 1)) + else: + raise ExtractorError('Identifier name expected at %d' % peek_pos) + elif peek_id is Token.SOPEN: + self.token_stream.pop() + index = self._expression(stack_top - 1) + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is Token.SCLOSE: + return (Token.ELEM, index, self._member_tail(stack_top - 1)) + else: + raise ExtractorError('Unexpected sequence at %d' % token_pos) + elif peek_id is Token.POPEN: + args = self._arguments(stack_top - 1) + return (Token.CALL, args, self._member_tail(stack_top - 1)) + else: + return None + + def _primary_expression(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + # TODO support let + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id in token_keys: + if peek_id is Token.ID: + # this + if peek_value == 'this': + self.token_stream.pop() + return (Token.RSV, 'this') + # function expr + elif peek_value == 'function': + return self._function(stack_top - 1, True) + # id + else: + self.token_stream.chk_id() + self.token_stream.pop() + return (Token.ID, peek_value) + # literals + else: + self.token_stream.pop() + return (peek_id, peek_value) + # array + elif peek_id is Token.SOPEN: + return self._array_literal(stack_top - 1) + # object + elif peek_id is Token.COPEN: + return self._object_literal(stack_top) + # expr + elif peek_id is Token.POPEN: + self.token_stream.pop() + open_pos = peek_pos + expr = self._expression(stack_top - 1) + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is not Token.PCLOSE: + raise ExtractorError('Unbalanced parentheses at %d' % open_pos) + self.token_stream.pop() + return expr + else: + raise ExtractorError('Syntax error at %d' % peek_pos) + + def _function(self, stack_top, is_expr=False): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + self.token_stream.pop() + token_id, token_value, token_pos = self.token_stream.peek() + + name = None + if token_id is Token.ID: + self.token_stream.chk_id() + token_id, name, token_pos = self.token_stream.pop() + token_id, token_value, token_pos = self.token_stream.peek() + elif not is_expr: + raise ExtractorError('Function declaration at %d is missing identifier' % token_pos) + + if token_id is not Token.POPEN: + raise ExtractorError('Expected argument list at %d' % token_pos) + + # args + self.token_stream.pop() + open_pos = token_pos + args = [] + while True: + token_id, token_value, token_pos = self.token_stream.peek() + if token_id is Token.PCLOSE: + self.token_stream.pop() + break + self.token_stream.chk_id() + self.token_stream.pop() + args.append(token_value) + token_id, token_value, token_pos = self.token_stream.peek() + if token_id is Token.COMMA: + self.token_stream.pop() + elif token_id is Token.PCLOSE: + pass + elif token_id is Token.END and self.token_stream.ended: + raise ExtractorError('Unbalanced parentheses at %d' % open_pos) + else: + raise ExtractorError('Expected , separator at %d' % token_pos) + + token_id, token_value, token_pos = self.token_stream.peek() + if token_id is not Token.COPEN: + raise ExtractorError('Expected function body at %d' % token_pos) + + return (Token.FUNC, name, args, (self._function_body(stack_top - 1))) + + def _function_body(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + token_id, token_value, open_pos = self.token_stream.pop() + body = [] + while True: + token_id, token_value, token_pos = self.token_stream.peek() + if token_id is Token.CCLOSE: + self.token_stream.pop() + break + elif token_id is Token.END and self.token_stream.ended: + raise ExtractorError('Unbalanced parentheses at %d' % open_pos) + body.append(self._source_element(stack_top - 1)) + + return body + + def _arguments(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.POPEN: + self.token_stream.pop() + open_pos = peek_pos + else: + return None + args = [] + while True: + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.PCLOSE: + self.token_stream.pop() + return args + # FIXME handle infor + args.append(self._assign_expression(stack_top - 1)) + # TODO parse generator expression + peek_id, peek_value, peek_pos = self.token_stream.peek() + + if peek_id is Token.COMMA: + self.token_stream.pop() + elif peek_id is Token.PCLOSE: + pass + elif peek_id is Token.END and self.token_stream.ended: + raise ExtractorError('Unbalanced parentheses at %d' % open_pos) + else: + raise ExtractorError('''Expected ',' separator at %d''' % peek_pos) + + def _array_literal(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + # XXX check no linebreak here + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is not Token.SOPEN: + raise ExtractorError('Array expected at %d' % peek_pos) + self.token_stream.pop() + elements = [] + + has_another = True + while has_another: + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.COMMA: + self.token_stream.pop() + elements.append(None) + elif peek_id is Token.SCLOSE: + self.token_stream.pop() + has_another = False + elif peek_id is Token.ID and peek_value == 'for': + # TODO parse array comprehension + raise ExtractorError('Array comprehension is not yet supported at %d' % peek_pos) + else: + elements.append(self._assign_expression(stack_top - 1)) + peek_id, peek_value, peek_pos = self.token_stream.pop() + if peek_id is Token.SCLOSE: + has_another = False + elif peek_id is not Token.COMMA: + raise ExtractorError('''Expected ',' after element at %d''' % peek_pos) + + return (Token.ARRAY, elements) + + def _object_literal(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + token_id, token_value, open_pos = self.token_stream.pop() + property_list = [] + while True: + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is Token.CCLOSE: + break + elif token_id is Token.COMMA: + continue + elif token_id is Token.ID and token_value in ('get', 'set'): + is_set = token_id is Token.ID and token_value == 'set' + + token_id, token_value, token_pos = self.token_stream.pop() + if token_id not in (Token.ID, Token.STR, Token.INT, Token.FLOAT): + raise ExtractorError('Property name is expected at %d' % token_pos) + property_name = token_value + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.POPEN: + raise ExtractorError('''Expected '(' at %d''' % token_pos) + + if is_set: + self.token_stream.chk_id() + token_id, arg, token_pos = self.token_stream.pop() + + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.PCLOSE: + raise ExtractorError('''Expected ')' at %d''' % token_pos) + + if is_set: + desc = (Token.PROPSET, arg, self._function_body(stack_top - 1)) + else: + desc = (Token.PROPGET, self._function_body(stack_top - 1)) + + elif token_id in (Token.ID, Token.STR, Token.INT, Token.FLOAT): + property_name = token_value + token_id, token_value, token_pos = self.token_stream.pop() + if token_id is not Token.COLON: + raise ExtractorError('Property name is expected at %d' % token_pos) + + desc = (Token.PROPVALUE, self._assign_expression(stack_top - 1)) + + elif self.token_stream.ended: + raise ExtractorError('Unmatched parentheses at %d' % open_pos) + else: + raise ExtractorError('Property assignment is expected at %d' % token_pos) + + property_list.append((property_name, desc)) + + return (Token.OBJECT, property_list) + + def _conditional_expression(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + expr = self._operator_expression(stack_top - 1) + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.HOOK: + hook_pos = peek_pos + true_expr = self._assign_expression(stack_top - 1) + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.COLON: + false_expr = self._assign_expression(stack_top - 1) + else: + raise ExtractorError('Missing : in conditional expression at %d' % hook_pos) + return (Token.COND, expr, true_expr, false_expr) + return expr + + def _operator_expression(self, stack_top): + if stack_top < 0: + raise ExtractorError('Recursion limit reached') + + # --<---------------------------------<-- op --<--------------------------<---- + # | | + # | --<-- prefix --<-- -->-- postfix -->-- | + # | | ^ ^ | ^ + # v v | | v | + # ->------------>----------->-- lefthand-side expression -->----------->------------>---| + # + # 20 grouping + # ... # handled by lefthandside_expression + # 17 postfix + # 16 unary + # 15 exponentiation # not yet found in grammar + # 14 mul + # 13 add + # 12 shift + # 11 rel + # 10 eq + # 9 band + # 8 bxor + # 7 bor + # 6 land + # 5 lor + # 4 cond # handled by conditional_expression + + out = [] + stack = [] + + while True: + had_inc = False + has_prefix = True + while has_prefix: + peek_id, peek_value, peek_pos = self.token_stream.peek() + if peek_id is Token.OP and peek_value[0] in (Token.ADD, Token.SUB): + # any binary operators will be consumed later + peek_id = Token.UOP + peek_value = convert_to_unary(peek_value) + if peek_id is Token.UOP: + name, op = peek_value + had_inc = name in (Token.INC, Token.DEC) + if had_inc: + peek_id = Token.PREFIX + while stack and stack[-1][0] > 16: + _, stack_id, stack_op = stack.pop() + out.append((stack_id, stack_op)) + stack.append((16, peek_id, op)) + self.token_stream.pop() + peek_id, peek_value, peek_pos = self.token_stream.peek() + if had_inc and peek_id is not Token.ID: + raise ExtractorError('Prefix operator has to be followed by an identifier at %d' % peek_pos) + has_prefix = peek_id is Token.UOP + else: + has_prefix = False + + left = self._member_expression(stack_top - 1) + out.append(left) + + peek_id, peek_value, peek_pos = self.token_stream.peek() + # postfix + if peek_id is Token.UOP: + if had_inc: + raise ExtractorError('''Can't have prefix and postfix operator at the same time at %d''' % peek_pos) + name, op = peek_value + if name in (Token.INC, Token.DEC): + peek_id = Token.POSTFIX + prec = 17 + else: + raise ExtractorError('Unexpected operator at %d' % peek_pos) + while stack and stack[-1][0] >= 17: + _, stack_id, stack_op = stack.pop() + out.append((stack_id, stack_op)) + stack.append((prec, peek_id, op)) + self.token_stream.pop() + peek_id, peek_value, peek_pos = self.token_stream.peek() + + if peek_id is Token.REL: + name, op = peek_value + prec = 11 + elif peek_id is Token.OP: + name, op = peek_value + if name in (Token.MUL, Token.DIV, Token.MOD): + prec = 14 + elif name in (Token.ADD, Token.SUB): + prec = 13 + elif name in (Token.RSHIFT, Token.LSHIFT, Token.URSHIFT): + prec = 12 + elif name is Token.BAND: + prec = 9 + elif name is Token.BXOR: + prec = 8 + elif name is Token.BOR: + prec = 7 + else: + raise ExtractorError('Unexpected operator at %d' % peek_pos) + elif peek_id is Token.LOP: + name, op = peek_value + prec = {Token.OR: 5, Token.AND: 6}[name] + else: + op = None + prec = 4 # empties stack + + while stack and stack[-1][0] >= prec: + _, stack_id, stack_op = stack.pop() + out.append((stack_id, stack_op)) + if op is None: + break + else: + stack.append((prec, peek_id, op)) + self.token_stream.pop() + + return (Token.OPEXPR, out) diff --git a/youtube_dl/jsinterp/tstream.py b/youtube_dl/jsinterp2/tstream.py similarity index 99% rename from youtube_dl/jsinterp/tstream.py rename to youtube_dl/jsinterp2/tstream.py index 8a37b53c2..55bb87985 100644 --- a/youtube_dl/jsinterp/tstream.py +++ b/youtube_dl/jsinterp2/tstream.py @@ -104,6 +104,7 @@ def convert_to_unary(token_value): class TokenStream(object): def __init__(self, code, start=0): + super(TokenStream, self).__init__() self.code = code self.ended = False self.peeked = []