diff --git a/test/jstests/unshift.py b/test/jstests/unshift.py index de76f2cab..02ab96874 100644 --- a/test/jstests/unshift.py +++ b/test/jstests/unshift.py @@ -1,6 +1,4 @@ -skip = {'p': 'Signed integers not yet supported'} - tests = [ { 'code': ''' diff --git a/youtube_dl/jsinterp/jsgrammar.py b/youtube_dl/jsinterp/jsgrammar.py index b44714bcb..a306df770 100644 --- a/youtube_dl/jsinterp/jsgrammar.py +++ b/youtube_dl/jsinterp/jsgrammar.py @@ -6,7 +6,7 @@ from collections import namedtuple _token_keys = ('COPEN', 'CCLOSE', 'POPEN', 'PCLOSE', 'SOPEN', 'SCLOSE', 'DOT', 'END', 'COMMA', 'HOOK', 'COLON', - 'AND', 'OR', 'INC', 'DEC', 'NOT', 'BNOT', 'DEL', 'VOID', 'TYPE', + 'AND', 'OR', 'PLUS', 'NEG', 'INC', 'DEC', 'NOT', 'BNOT', 'DEL', 'VOID', 'TYPE', 'LT', 'GT', 'LE', 'GE', 'EQ', 'NE', 'SEQ', 'SNE', 'IN', 'INSTANCEOF', 'BOR', 'BXOR', 'BAND', 'RSHIFT', 'LSHIFT', 'URSHIFT', 'SUB', 'ADD', 'MOD', 'DIV', 'MUL', 'OP', 'AOP', 'UOP', 'LOP', 'REL', @@ -48,7 +48,6 @@ _SINGLE_QUOTED_RE = r"""'(?:(?:\\'|\n)|[^'\n])*'""" _DOUBLE_QUOTED_RE = r'''"(?:(?:\\"|\n)|[^"\n])*"''' _STRING_RE = r'(?:%s)|(?:%s)' % (_SINGLE_QUOTED_RE, _DOUBLE_QUOTED_RE) -# FIXME signed values _INTEGER_RE = r'(?:%(hex)s)|(?:%(dec)s)|(?:%(oct)s)' % {'hex': __HEXADECIMAL_RE, 'dec': __DECIMAL_RE, 'oct': __OCTAL_RE} _FLOAT_RE = r'(?:(?:%(dec)s\.[0-9]*)|(?:\.[0-9]+))(?:[eE][+-]?[0-9]+)?' % {'dec': __DECIMAL_RE} @@ -62,15 +61,10 @@ _NULL_RE = r'null' _REGEX_FLAGS_RE = r'(?![gimy]*(?P[gimy])[gimy]*(?P=reflag))(?P<%s>[gimy]{0,4}\b)' % 'REFLAGS' _REGEX_RE = r'/(?!\*)(?P<%s>(?:[^/\n]|(?:\\/))*)/(?:(?:%s)|(?:\s|$))' % ('REBODY', _REGEX_FLAGS_RE) -_TOKENS = [ - (Token.NULL, _NULL_RE), - (Token.BOOL, _BOOL_RE), - (Token.ID, _NAME_RE), - (Token.STR, _STRING_RE), - (Token.INT, _INTEGER_RE), - (Token.FLOAT, _FLOAT_RE), - (Token.REGEX, _REGEX_RE) -] +token_keys = Token.NULL, Token.BOOL, Token.ID, Token.STR, Token.INT, Token.FLOAT, Token.REGEX + +_TOKENS = zip(token_keys, (_NULL_RE, _BOOL_RE, _NAME_RE, _STRING_RE, _INTEGER_RE, _FLOAT_RE, _REGEX_RE)) + COMMENT_RE = r'(?P<%s>/\*(?:(?!\*/)(?:\n|.))*\*/)' % Token.COMMENT TOKENS_RE = r'|'.join('(?P<%(id)s>%(value)s)' % {'id': name, 'value': value} diff --git a/youtube_dl/jsinterp/jsinterp.py b/youtube_dl/jsinterp/jsinterp.py index 52e3dc2bf..dae614945 100644 --- a/youtube_dl/jsinterp/jsinterp.py +++ b/youtube_dl/jsinterp/jsinterp.py @@ -4,9 +4,7 @@ import re from ..utils import ExtractorError from .tstream import TokenStream -from .jsgrammar import Token - -_token_keys = set((Token.NULL, Token.BOOL, Token.ID, Token.STR, Token.INT, Token.FLOAT, Token.REGEX)) +from .jsgrammar import Token, token_keys class Context(object): @@ -497,7 +495,7 @@ class JSInterpreter(object): # TODO support let peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id in _token_keys: + if peek_id in token_keys: if peek_id is Token.ID: # this if peek_value == 'this': @@ -924,7 +922,7 @@ class JSInterpreter(object): self.global_vars[expr[1]]) # literal - elif name in _token_keys: + elif name in token_keys: ref = Reference(expr[1]) elif name is Token.ARRAY: diff --git a/youtube_dl/jsinterp/tstream.py b/youtube_dl/jsinterp/tstream.py index 36a22d09e..50c137894 100644 --- a/youtube_dl/jsinterp/tstream.py +++ b/youtube_dl/jsinterp/tstream.py @@ -13,7 +13,8 @@ from .jsgrammar import ( RELATIONS_RE, ASSIGN_OPERATORS_RE, OPERATORS_RE, - Token + Token, + token_keys ) _PUNCTUATIONS = { @@ -34,10 +35,12 @@ _LOGICAL_OPERATORS = { '||': (Token.OR, lambda cur, right: cur or right) } _UNARY_OPERATORS = { + '+': (Token.PLUS, lambda cur: cur), + '-': (Token.NEG, lambda cur: cur * -1), '++': (Token.INC, lambda cur: cur + 1), '--': (Token.DEC, lambda cur: cur - 1), '!': (Token.NOT, operator.not_), - '~': (Token.BNOT, lambda cur: cur ^ -1), + '~': (Token.BNOT, operator.invert), # XXX define these operators 'delete': (Token.DEL, None), 'void': (Token.VOID, None), @@ -53,9 +56,8 @@ _RELATIONS = { '!=': (Token.NE, operator.ne), '===': (Token.SEQ, lambda cur, right: cur == right and type(cur) == type(right)), '!==': (Token.SNE, lambda cur, right: not cur == right or not type(cur) == type(right)), - # XXX define instanceof and in operators - 'in': (Token.IN, None), - 'instanceof': (Token.INSTANCEOF, None) + 'in': (Token.IN, operator.contains), + 'instanceof': (Token.INSTANCEOF, lambda cur, right: isinstance(cur, right)) } _OPERATORS = { '|': (Token.BOR, operator.or_), @@ -124,7 +126,6 @@ class TokenStream(object): elif token_id is Token.STR: yield (token_id, token_value[1:-1], pos) elif token_id is Token.INT: - # FIXME signed values root = ((16 if len(token_value) > 2 and token_value[1] in 'xX' else 8) if token_value.startswith('0') else 10) yield (token_id, int(token_value, root), pos) @@ -137,6 +138,10 @@ class TokenStream(object): elif token_id is Token.ID: yield (token_id, token_value, pos) elif token_id in _operator_lookup: + # FIXME signed values + if (token_id is Token.OP and token_value in ('-', '+') and + self._last[0] not in token_keys and self._last[0] is not Token.PCLOSE): + token_id = Token.UOP yield (token_id if token_value != 'in' else Token.IN, _operator_lookup[token_id][token_value], pos)