[jsinterp] Token class for tokens

2016-12-07 07:28:09 +01:00 · 2016-12-07 07:28:09 +01:00 · ce4a616c4a
commit ce4a616c4a
parent d422aefc03
3 changed files with 157 additions and 148 deletions
--- a/youtube_dl/jsinterp/jsgrammar.py
+++ b/youtube_dl/jsinterp/jsgrammar.py
@ -1,6 +1,20 @@
 from __future__ import unicode_literals

 import re
+from enum import Enum
+
+
+class Token(Enum):
+    COPEN, CCLOSE, POPEN, PCLOSE, SOPEN, SCLOSE = range(0,6)
+    DOT, END, COMMA, HOOK, COLON = range(6, 11)
+    AND, OR, INC, DEC, NOT, BNOT, DEL, VOID, TYPE = range(11, 20)
+    LT, GT, LE, GE, EQ, NE, SEQ, SNE = range(20, 28)
+    BOR, BXOR, BAND, RSHIFT, LSHIFT, URSHIFT, SUB, ADD, MOD, DIV, MUL = range(28, 39)
+    OP, AOP, UOP, LOP, REL = range(39, 44)
+    COMMENT, TOKEN, PUNCT = range(44, 47)
+    NULL, BOOL, ID, STR, INT, FLOAT, REGEX = range(47, 54)
+    reflag, rebody = 54, 55
+

 __DECIMAL_RE = r'(?:[1-9][0-9]*)|0'
 __OCTAL_RE = r'0[0-7]+'
@ -41,23 +55,24 @@ _REGEX_FLAGS_RE = r'(?![gimy]*(?P<reflag>[gimy])[gimy]*(?P=reflag))(?P<reflags>[
 _REGEX_RE = r'/(?!\*)(?P<rebody>(?:[^/\n]|(?:\\/))*)/(?:(?:%s)|(?:\s|$))' % _REGEX_FLAGS_RE

 _TOKENS = [
-    ('null', _NULL_RE),
-    ('bool', _BOOL_RE),
-    ('id', _NAME_RE),
-    ('str', _STRING_RE),
-    ('int', _INTEGER_RE),
-    ('float', _FLOAT_RE),
-    ('regex', _REGEX_RE)
+    (Token.NULL, _NULL_RE),
+    (Token.BOOL, _BOOL_RE),
+    (Token.ID, _NAME_RE),
+    (Token.STR, _STRING_RE),
+    (Token.INT, _INTEGER_RE),
+    (Token.FLOAT, _FLOAT_RE),
+    (Token.REGEX, _REGEX_RE)
 ]

-COMMENT_RE = r'(?P<comment>/\*(?:(?!\*/)(?:\n|.))*\*/)'
-TOKENS_RE = r'|'.join('(?P<%(id)s>%(value)s)' % {'id': name, 'value': value}
+COMMENT_RE = r'(?P<%s>/\*(?:(?!\*/)(?:\n|.))*\*/)' % Token.COMMENT.name
+TOKENS_RE = r'|'.join('(?P<%(id)s>%(value)s)' % {'id': name.name, 'value': value}
                      for name, value in _TOKENS)

-LOGICAL_OPERATORS_RE = r'(?P<lop>%s)' % r'|'.join(re.escape(value) for value in _logical_operator)
-UNARY_OPERATORS_RE = r'(?P<uop>%s)' % r'|'.join(re.escape(value) for value in _unary_operator)
-ASSIGN_OPERATORS_RE = r'(?P<aop>%s)' % r'|'.join(re.escape(value) if value != '=' else re.escape(value) + r'(?!\=)'
-                                                 for value in _assign_operator)
-OPERATORS_RE = r'(?P<op>%s)' % r'|'.join(re.escape(value) for value in _operator)
-RELATIONS_RE = r'(?P<rel>{0:s})'.format(r'|'.join(re.escape(value) for value in _relation))
-PUNCTUATIONS_RE = r'(?P<punc>%s)' % r'|'.join(re.escape(value) for value in _punctuations)
+LOGICAL_OPERATORS_RE = r'(?P<%s>%s)' % (Token.LOP.name, r'|'.join(re.escape(value) for value in _logical_operator))
+UNARY_OPERATORS_RE = r'(?P<%s>%s)' % (Token.UOP.name, r'|'.join(re.escape(value) for value in _unary_operator))
+ASSIGN_OPERATORS_RE = r'(?P<%s>%s)' % (Token.AOP.name,
+                                       r'|'.join(re.escape(value) if value != '=' else re.escape(value) + r'(?!\=)'
+                                                 for value in _assign_operator))
+OPERATORS_RE = r'(?P<%s>%s)' % (Token.OP.name, r'|'.join(re.escape(value) for value in _operator))
+RELATIONS_RE = r'(?P<%s>%s)' % (Token.REL.name, r'|'.join(re.escape(value) for value in _relation))
+PUNCTUATIONS_RE = r'(?P<%s>%s)' % (Token.PUNCT.name, r'|'.join(re.escape(value) for value in _punctuations))
--- a/youtube_dl/jsinterp/jsinterp.py
+++ b/youtube_dl/jsinterp/jsinterp.py
@ -4,8 +4,9 @@ import re

 from ..utils import ExtractorError
 from .tstream import TokenStream
+from .jsgrammar import Token

-_token_keys = 'null', 'bool', 'id', 'str', 'int', 'float', 'regex'
+_token_keys = Token.NULL, Token.BOOL, Token.ID, Token.STR, Token.INT, Token.FLOAT, Token.REGEX


 class JSInterpreter(object):
@ -26,24 +27,24 @@ class JSInterpreter(object):
        statement = None

        token_id, token_value, token_pos = token_stream.peek()
-        if token_id in ('cclose', 'end'):
+        if token_id in (Token.CCLOSE, Token.END):
            # empty statement goes straight here
            return statement
-        if token_id == 'id' and token_value == 'function':
+        if token_id is Token.ID and token_value == 'function':
            # TODO parse funcdecl
            raise ExtractorError('Function declaration is not yet supported at %d' % token_pos)
-        elif token_id == 'copen':
+        elif token_id is Token.COPEN:
            # block
            token_stream.pop()
            statement_list = []
            for s in self.statements(token_stream, stack_top - 1):
                statement_list.append(s)
                token_id, token_value, token_pos = token_stream.peek()
-                if token_id == 'cclose':
+                if token_id is Token.CCLOSE:
                    token_stream.pop()
                    break
            statement = ('block', statement_list)
-        elif token_id == 'id':
+        elif token_id is Token.ID:
            # TODO parse label
            if token_value == 'var':
                token_stream.pop()
@ -52,26 +53,26 @@ class JSInterpreter(object):
                has_another = True
                while has_another:
                    token_id, token_value, token_pos = token_stream.pop()
-                    if token_id != 'id':
+                    if token_id is not Token.ID:
                        raise ExtractorError('Missing variable name at %d' % token_pos)
                    token_stream.chk_id(last=True)
                    variables.append(token_value)

                    peek_id, peek_value, peek_pos = token_stream.peek()
-                    if peek_id == 'aop':
+                    if peek_id is Token.AOP:
                        token_stream.pop()
                        init.append(self._assign_expression(token_stream, stack_top - 1))
                        peek_id, peek_value, peek_pos = token_stream.peek()
                    else:
                        init.append(JSInterpreter.undefined)

-                    if peek_id == 'end':
+                    if peek_id is Token.END:
                        has_another = False
-                    elif peek_id == 'comma':
+                    elif peek_id is Token.COMMA:
                        pass
                    else:
                        # FIXME automatic end insertion
-                        # - token_id == cclose
+                        # - token_id is Token.CCLOSE
                        # - check line terminator
                        # - restricted token
                        raise ExtractorError('Unexpected sequence %s at %d' % (peek_value, peek_pos))
@ -89,7 +90,7 @@ class JSInterpreter(object):
                token_stream.pop()
                statement = ('return', self._expression(token_stream, stack_top - 1))
                peek_id, peek_value, peek_pos = token_stream.peek()
-                if peek_id != 'end':
+                if peek_id is not Token.END:
                    # FIXME automatic end insertion
                    raise ExtractorError('Unexpected sequence %s at %d' % (peek_value, peek_pos))
            elif token_value == 'with':
@ -113,13 +114,13 @@ class JSInterpreter(object):
            has_another = True
            while has_another:
                peek_id, peek_value, peek_pos = token_stream.peek()
-                if not (peek_id == 'copen' and peek_id == 'id' and peek_value == 'function'):
+                if not (peek_id is Token.COPEN and peek_id is Token.ID and peek_value == 'function'):
                    expr_list.append(self._assign_expression(token_stream, stack_top - 1))
                    peek_id, peek_value, peek_pos = token_stream.peek()

-                if peek_id == 'end':
+                if peek_id is Token.END:
                    has_another = False
-                elif peek_id == 'comma':
+                elif peek_id is Token.COMMA:
                    pass
                else:
                    # FIXME automatic end insertion
@ -144,9 +145,9 @@ class JSInterpreter(object):
        while has_another:
            exprs.append(self._assign_expression(token_stream, stack_top - 1))
            peek_id, peek_value, peek_pos = token_stream.peek()
-            if peek_id == 'comma':
+            if peek_id is Token.COMMA:
                token_stream.pop()
-            elif peek_id == 'id' and peek_value == 'yield':
+            elif peek_id is Token.ID and peek_value == 'yield':
                # TODO parse yield
                raise ExtractorError('Yield statement is not yet supported at %d' % peek_pos)
            else:
@ -159,7 +160,7 @@ class JSInterpreter(object):

        left = self._conditional_expression(token_stream, stack_top - 1)
        peek_id, peek_value, peek_pos = token_stream.peek()
-        if peek_id == 'aop':
+        if peek_id is Token.AOP:
            token_stream.pop()
            _, op = peek_value
            right = self._assign_expression(token_stream, stack_top - 1)
@ -170,7 +171,7 @@ class JSInterpreter(object):

    def _member_expression(self, token_stream, stack_top):
        peek_id, peek_value, peek_pos = token_stream.peek()
-        if peek_id == 'id' and peek_value == 'new':
+        if peek_id is Token.ID and peek_value == 'new':
            token_stream.pop()
            target = self._member_expression(token_stream, stack_top - 1)
            args = self._arguments(token_stream, stack_top - 1)
@ -187,30 +188,30 @@ class JSInterpreter(object):
            raise ExtractorError('Recursion limit reached')

        peek_id, peek_value, peek_pos = token_stream.peek()
-        if peek_id == 'dot':
+        if peek_id is Token.DOT:
            token_stream.pop()
            peek_id, peek_value, peek_pos = token_stream.peek()
-            if peek_id == 'dot':
+            if peek_id is Token.DOT:
                token_stream.pop()
                peek_id, peek_value, peek_pos = token_stream.peek()
-            elif peek_id == 'popen':
+            elif peek_id is Token.POPEN:
                # TODO handle field query
                raise ExtractorError('Field querry is not yet supported at %d' % peek_pos)

-            if peek_id == 'id':
+            if peek_id is Token.ID:
                token_stream.pop()
                return ('field', peek_value, self._member_tail(token_stream, stack_top - 1))
            else:
                raise ExtractorError('Identifier name expected at %d' % peek_pos)
-        elif peek_id == 'sopen':
+        elif peek_id is Token.POPEN:
            token_stream.pop()
            index = self._expression(token_stream, stack_top - 1)
            token_id, token_value, token_pos = token_stream.pop()
-            if token_id == 'sclose':
+            if token_id is Token.SCLOSE:
                return ('element', index, self._member_tail(token_stream, stack_top - 1))
            else:
                raise ExtractorError('Unexpected sequence at %d' % token_pos)
-        elif peek_id == 'popen':
+        elif peek_id is Token.POPEN:
            args = self._arguments(token_stream, stack_top - 1)
            return ('call', args, self._member_tail(token_stream, stack_top - 1))
        else:
@ -224,7 +225,7 @@ class JSInterpreter(object):
        peek_id, peek_value, peek_pos = token_stream.peek()
        if peek_id in _token_keys:
            token_stream.pop()
-            if peek_id == 'id':
+            if peek_id is Token.ID:
                # this
                if peek_value == 'this':
                    return ('rsv', 'this')
@ -235,24 +236,24 @@ class JSInterpreter(object):
                # id
                else:
                    token_stream.chk_id(last=True)
-                    return ('id', peek_value)
+                    return (Token.ID, peek_value)
            # literals
            else:
                return (peek_id, peek_value)
        # array
-        elif peek_id == 'sopen':
+        elif peek_id is Token.SOPEN:
            return self._array_literal(token_stream, stack_top - 1)
        # object
-        elif peek_id == 'copen':
+        elif peek_id is Token.SCLOSE:
            # TODO parse object
            raise ExtractorError('Object literals is not yet supported at %d' % peek_pos)
        # expr
-        elif peek_id == 'popen':
+        elif peek_id is Token.POPEN:
            token_stream.pop()
            open_pos = peek_pos
            expr = self._expression(token_stream, stack_top - 1)
            peek_id, peek_value, peek_pos = token_stream.peek()
-            if peek_id != 'pclose':
+            if peek_id is not Token.PCLOSE:
                raise ExtractorError('Unbalanced parentheses at %d' % open_pos)
            token_stream.pop()
            return ('expr', expr)
@ -265,7 +266,7 @@ class JSInterpreter(object):
            raise ExtractorError('Recursion limit reached')

        peek_id, peek_value, peek_pos = token_stream.peek()
-        if peek_id == 'popen':
+        if peek_id is Token.POPEN:
            token_stream.pop()
            open_pos = peek_pos
        else:
@ -273,7 +274,7 @@ class JSInterpreter(object):
        args = []
        while True:
            peek_id, peek_value, peek_pos = token_stream.peek()
-            if peek_id == 'pclose':
+            if peek_id is Token.PCLOSE:
                token_stream.pop()
                return args
            # FIXME handle infor
@ -281,7 +282,7 @@ class JSInterpreter(object):
            # TODO parse generator expression
            peek_id, peek_value, peek_pos = token_stream.peek()

-            if peek_id not in ('comma', 'pclose'):
+            if peek_id not in (Token.COMMA, Token.PCLOSE):
                raise ExtractorError('Unbalanced parentheses at %d' % open_pos)

    def _array_literal(self, token_stream, stack_top):
@ -290,7 +291,7 @@ class JSInterpreter(object):

        # TODO check no linebreak
        peek_id, peek_value, peek_pos = token_stream.peek()
-        if peek_id != 'sopen':
+        if peek_id is not Token.SOPEN:
            raise ExtractorError('Array expected at %d' % peek_pos)
        token_stream.pop()
        elements = []
@ -298,21 +299,21 @@ class JSInterpreter(object):
        has_another = True
        while has_another:
            peek_id, peek_value, peek_pos = token_stream.peek()
-            if peek_id == 'comma':
+            if peek_id is Token.COMMA:
                token_stream.pop()
                elements.append(None)
-            elif peek_id == 'sclose':
+            elif peek_id is Token.SCLOSE:
                token_stream.pop()
                has_another = False
-            elif peek_id == 'id' and peek_value == 'for':
+            elif peek_id is Token.ID and peek_value == 'for':
                # TODO parse array comprehension
                raise ExtractorError('Array comprehension is not yet supported at %d' % peek_pos)
            else:
                elements.append(self._assign_expression(token_stream, stack_top - 1))
                peek_id, peek_value, peek_pos = token_stream.pop()
-                if peek_id == 'sclose':
+                if peek_id is Token.SCLOSE:
                    has_another = False
-                elif peek_id != 'comma':
+                elif peek_id is not Token.COMMA:
                    raise ExtractorError('Expected , after element at %d' % peek_pos)

        return ('array', elements)
@ -323,11 +324,11 @@ class JSInterpreter(object):

        expr = self._operator_expression(token_stream, stack_top - 1)
        peek_id, peek_value, peek_pos = token_stream.peek()
-        if peek_id == 'hook':
+        if peek_id is Token.HOOK:
            hook_pos = peek_pos
            true_expr = self._assign_expression(token_stream, stack_top - 1)
            peek_id, peek_value, peek_pos = token_stream.peek()
-            if peek_id == 'colon':
+            if peek_id is Token.COLON:
                false_expr = self._assign_expression(token_stream, stack_top - 1)
            else:
                raise ExtractorError('Missing : in conditional expression at %d' % hook_pos)
@ -371,18 +372,18 @@ class JSInterpreter(object):
            has_prefix = True
            while has_prefix:
                peek_id, peek_value, peek_pos = token_stream.peek()
-                if peek_id == 'uop':
+                if peek_id is Token.UOP:
                    name, op = peek_value
-                    had_inc = name in ('inc', 'dec')
+                    had_inc = name in (Token.INC, Token.DEC)
                    while stack and stack[-1][0] > 16:
                        _, stack_id, stack_op = stack.pop()
                        out.append((stack_id, stack_op))
                    stack.append((16, peek_id, op))
                    token_stream.pop()
                    peek_id, peek_value, peek_pos = token_stream.peek()
-                    if had_inc and peek_id != 'id':
+                    if had_inc and peek_id is not Token.ID:
                        raise ExtractorError('Prefix operator has to be followed by an identifier at %d' % peek_pos)
-                    has_prefix = peek_id == 'uop'
+                    has_prefix = peek_id is Token.UOP
                else:
                    has_prefix = False

@ -391,11 +392,11 @@ class JSInterpreter(object):

            peek_id, peek_value, peek_pos = token_stream.peek()
            # postfix
-            if peek_id == 'uop':
+            if peek_id is Token.UOP:
                if had_inc:
                    raise ExtractorError('''Can't have prefix and postfix operator at the same time at %d''' % peek_pos)
                name, op = peek_value
-                if name in ('inc', 'dec'):
+                if name in (Token.INC, Token.DEC):
                    prec = 17
                else:
                    raise ExtractorError('Unexpected operator at %d' % peek_pos)
@ -406,27 +407,27 @@ class JSInterpreter(object):
                token_stream.pop()
                peek_id, peek_value, peek_pos = token_stream.peek()

-            if peek_id == 'rel':
+            if peek_id is Token.REL:
                name, op = peek_value
-            elif peek_id == 'op':
+            elif peek_id is Token.OP:
                name, op = peek_value
-                if name in ('mul', 'div', 'mod'):
+                if name in (Token.MUL, Token.DIV, Token.MOD):
                    prec = 14
-                elif name in ('add', 'sub'):
+                elif name in (Token.ADD, Token.SUB):
                    prec = 13
-                elif name.endswith('shift'):
+                elif name in (Token.RSHIFT, Token.LSHIFT, Token.URSHIFT):
                    prec = 12
-                elif name == 'band':
+                elif name is Token.BAND:
                    prec = 9
-                elif name == 'bxor':
+                elif name is Token.BXOR:
                    prec = 8
-                elif name == 'bor':
+                elif name is Token.BOR:
                    prec = 7
                else:
                    raise ExtractorError('Unexpected operator at %d' % peek_pos)
-            elif peek_id == 'lop':
+            elif peek_id is Token.LOP:
                name, op = peek_value
-                prec = {'or': 5, 'and': 6}[name]
+                prec = {Token.OR: 5, Token.AND: 6}[name]
            else:
                has_another = False
                prec = 4  # empties stack
@ -441,12 +442,12 @@ class JSInterpreter(object):
        return ('rpn', out)

    # TODO use context instead local_vars in argument
-
+    
    def getvalue(self, ref, local_vars):
-        if ref is None:
-            return None
+        if ref is None or ref is self.undefined or isinstance(ref, (int, float, str)):  # not Token
+            return ref
        ref_id, ref_value = ref
-        if ref_id == 'id':
+        if ref_id is Token.ID:
            return local_vars[ref_value]
        elif ref_id in _token_keys:
            return ref_value
@ -512,20 +513,11 @@ class JSInterpreter(object):
            rpn = expr[1]
            while rpn:
                token = rpn.pop(0)
-                if token[0] in ('op', 'aop', 'lop', 'rel'):
+                if token[0] in (Token.OP, Token.AOP, Token.UOP, Token.LOP, Token.REL):
                    right = stack.pop()
                    left = stack.pop()
-                    result = token[1](self.getvalue(left, local_vars), self.getvalue(right, local_vars))
-                    if type(result) == int:
-                        type_id = 'int'
-                    elif type(result) == float:
-                        type_id = 'float'
-                    elif type(result) == str:
-                        type_id = 'str'
-                    else:
-                        type_id = str(type(result))
-                    stack.append((type_id, result))
-                elif token[0] == 'uop':
+                    stack.append(token[1](self.getvalue(left, local_vars), self.getvalue(right, local_vars)))
+                elif token[0] is Token.UOP:
                    right = stack.pop()
                    stack.append(token[1](self.getvalue(right, local_vars)))
                else:
@ -551,7 +543,7 @@ class JSInterpreter(object):
                    # TODO interpret call
                    raise ExtractorError('''Can't interpret expression called %s''' % tail_name)
            return target
-        elif name == 'id':
+        elif name is Token.ID:
            return local_vars[expr[1]]

        # literal
--- a/youtube_dl/jsinterp/tstream.py
+++ b/youtube_dl/jsinterp/tstream.py
@ -12,71 +12,72 @@ from .jsgrammar import (
    UNARY_OPERATORS_RE,
    RELATIONS_RE,
    ASSIGN_OPERATORS_RE,
-    OPERATORS_RE
+    OPERATORS_RE,
+    Token
 )


 _PUNCTUATIONS = {
-    '{': 'copen',
-    '}': 'cclose',
-    '(': 'popen',
-    ')': 'pclose',
-    '[': 'sopen',
-    ']': 'sclose',
-    '.': 'dot',
-    ';': 'end',
-    ',': 'comma',
-    '?': 'hook',
-    ':': 'colon'
+    '{': Token.COPEN,
+    '}': Token.CCLOSE,
+    '(': Token.POPEN,
+    ')': Token.PCLOSE,
+    '[': Token.SOPEN,
+    ']': Token.SCLOSE,
+    '.': Token.DOT,
+    ';': Token.END,
+    ',': Token.COMMA,
+    '?': Token.HOOK,
+    ':': Token.COLON
 }
 _LOGICAL_OPERATORS = {
-    '&&': ('and', lambda cur, right: cur and right),
-    '||': ('or', lambda cur, right: cur or right)
+    '&&': (Token.AND, lambda cur, right: cur and right),
+    '||': (Token.OR, lambda cur, right: cur or right)
 }
 _UNARY_OPERATORS = {
-    '++': ('inc', lambda cur: cur + 1),
-    '--': ('dec', lambda cur: cur - 1),
-    '!': ('not', operator.not_),
-    '~': ('bnot', lambda cur: cur ^ -1),
+    '++': (Token.INC, lambda cur: cur + 1),
+    '--': (Token.DEC, lambda cur: cur - 1),
+    '!': (Token.NOT, operator.not_),
+    '~': (Token.BNOT, lambda cur: cur ^ -1),
    # XXX define these operators
-    'delete': ('del', None),
-    'void': ('void', None),
-    'typeof': ('type', lambda cur: type(cur))
+    'delete': (Token.DEL, None),
+    'void': (Token.VOID, None),
+    'typeof': (Token.TYPE, lambda cur: type(cur))
 }
 _RELATIONS = {
-    '<': ('lt', operator.lt),
-    '>': ('gt', operator.gt),
-    '<=': ('le', operator.le),
-    '>=': ('ge', operator.ge),
+    '<': (Token.LT, operator.lt),
+    '>': (Token.GT, operator.gt),
+    '<=': (Token.LE, operator.le),
+    '>=': (Token.GE, operator.ge),
    # XXX check python and JavaScript equality difference
-    '==': ('eq', operator.eq),
-    '!=': ('ne', operator.ne),
-    '===': ('seq', lambda cur, right: cur == right and type(cur) == type(right)),
-    '!==': ('sne', lambda cur, right: not cur == right or not type(cur) == type(right))
+    '==': (Token.EQ, operator.eq),
+    '!=': (Token.NE, operator.ne),
+    '===': (Token.SEQ, lambda cur, right: cur == right and type(cur) == type(right)),
+    '!==': (Token.SNE, lambda cur, right: not cur == right or not type(cur) == type(right))
 }
 _OPERATORS = {
-    '|': ('bor', operator.or_),
-    '^': ('bxor', operator.xor),
-    '&': ('band', operator.and_),
+    '|': (Token.BOR, operator.or_),
+    '^': (Token.BXOR, operator.xor),
+    '&': (Token.BAND, operator.and_),
    # NOTE convert to int before shift float
-    '>>': ('rshift', operator.rshift),
-    '<<': ('lshift', operator.lshift),
-    '>>>': ('urshift', lambda cur, right: cur >> right if cur >= 0 else (cur + 0x100000000) >> right),
-    '-': ('sub', operator.sub),
-    '+': ('add', operator.add),
-    '%': ('mod', operator.mod),
-    '/': ('div', operator.truediv),
-    '*': ('mul', operator.mul)
+    '>>': (Token.RSHIFT, operator.rshift),
+    '<<': (Token.LSHIFT, operator.lshift),
+    '>>>': (Token.URSHIFT, lambda cur, right: cur >> right if cur >= 0 else (cur + 0x100000000) >> right),
+    '-': (Token.SUB, operator.sub),
+    '+': (Token.ADD, operator.add),
+    '%': (Token.MOD, operator.mod),
+    '/': (Token.DIV, operator.truediv),
+    '*': (Token.MUL, operator.mul)
 }
 _ASSIGN_OPERATORS = dict((op + '=', ('set_%s' % token[0], token[1])) for op, token in _OPERATORS.items())
 _ASSIGN_OPERATORS['='] = ('set', lambda cur, right: right)

 _operator_lookup = {
-    'op': _OPERATORS,
-    'aop': _ASSIGN_OPERATORS,
-    'uop': _UNARY_OPERATORS,
-    'lop': _LOGICAL_OPERATORS,
-    'rel': _RELATIONS
+    Token.OP: _OPERATORS,
+    Token.AOP: _ASSIGN_OPERATORS,
+    Token.UOP: _UNARY_OPERATORS,
+    Token.LOP: _LOGICAL_OPERATORS,
+    Token.REL: _RELATIONS
 }
 # only to check ids
 _reserved_words = ('break', 'case', 'catch', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'finally',
@ -109,29 +110,30 @@ class TokenStream(object):
                token_id = feed_m.lastgroup
                token_value = feed_m.group(token_id)
                pos = feed_m.start(token_id)
+                token_id = Token[token_id]
                self.ended = feed_m.end() >= len(self.code)  # because how yield works
-                if token_id == 'comment':
+                if token_id is Token.COMMENT:
                    pass
                # TODO date
-                elif token_id == 'null':
+                elif token_id is Token.NULL:
                    yield (token_id, None, pos)
-                elif token_id == 'bool':
+                elif token_id is Token.BOOL:
                    yield (token_id, {'true': True, 'false': False}[token_value], pos)
-                elif token_id == 'str':
+                elif token_id is Token.STR:
                    yield (token_id, token_value, pos)
-                elif token_id == 'int':
+                elif token_id is Token.INT:
                    yield (token_id, int(token_value), pos)
-                elif token_id == 'float':
+                elif token_id is Token.FLOAT:
                    yield (token_id, float(token_value), pos)
-                elif token_id == 'regex':
+                elif token_id is Token.REGEX:
                    # TODO error handling
                    regex = re.compile(feed_m.group('rebody'))
                    yield (token_id, (regex, feed_m.group('reflags')), pos)
-                elif token_id == 'id':
+                elif token_id is Token.ID:
                    yield (token_id, token_value, pos)
                elif token_id in _operator_lookup:
                    yield (token_id, _operator_lookup[token_id][token_value], pos)
-                elif token_id == 'punc':
+                elif token_id is Token.PUNCT:
                    yield (_PUNCTUATIONS[token_value], token_value, pos)
                else:
                    raise ExtractorError('Unexpected token at %d' % pos)
@ -145,14 +147,14 @@ class TokenStream(object):
            name, value, pos = self._last
        else:
            name, value, pos = self.peek()
-        if name != 'id' or value in _reserved_words:
+        if name is not Token.ID or value in _reserved_words:
            raise ExtractorError('Invalid identifier at %d' % pos)

    def peek(self, count=1):
        for _ in range(count - len(self.peeked)):
            token = next(self._ts, None)
            if token is None:
-                self.peeked.append(('end', ';', len(self.code)))
+                self.peeked.append((Token.END, ';', len(self.code)))
            else:
                self.peeked.append(token)
        return self.peeked[count - 1]