diff --git a/youtube_dl/jsinterp/jsgrammar.py b/youtube_dl/jsinterp/jsgrammar.py index f26d5a45c..d9daf3362 100644 --- a/youtube_dl/jsinterp/jsgrammar.py +++ b/youtube_dl/jsinterp/jsgrammar.py @@ -2,28 +2,23 @@ from __future__ import unicode_literals import re +from collections import namedtuple -class T(object): +_token_keys = ('COPEN', 'CCLOSE', 'POPEN', 'PCLOSE', 'SOPEN', 'SCLOSE', + 'DOT', 'END', 'COMMA', 'HOOK', 'COLON', + 'AND', 'OR', 'INC', 'DEC', 'NOT', 'BNOT', 'DEL', 'VOID', 'TYPE', + 'LT', 'GT', 'LE', 'GE', 'EQ', 'NE', 'SEQ', 'SNE', + 'BOR', 'BXOR', 'BAND', 'RSHIFT', 'LSHIFT', 'URSHIFT', 'SUB', 'ADD', 'MOD', 'DIV', 'MUL', + 'OP', 'AOP', 'UOP', 'LOP', 'REL', + 'COMMENT', 'TOKEN', 'PUNCT', + 'NULL', 'BOOL', 'ID', 'STR', 'INT', 'FLOAT', 'REGEX', + 'REFLAGS', 'REBODY', + 'BLOCK', 'VAR', 'EXPR', 'IF', 'ITER', 'CONTINUE', 'BREAK', 'RETURN', 'WITH', 'LABEL', 'SWITCH', + 'THROW', 'TRY', 'DEBUG', + 'ASSIGN', 'MEMBER', 'FIELD', 'ELEM', 'CALL', 'ARRAY', 'COND', 'OPEXPR', + 'RSV') - COPEN, CCLOSE, POPEN, PCLOSE, SOPEN, SCLOSE = range(0,6) - DOT, END, COMMA, HOOK, COLON = range(6, 11) - AND, OR, INC, DEC, NOT, BNOT, DEL, VOID, TYPE = range(11, 20) - LT, GT, LE, GE, EQ, NE, SEQ, SNE = range(20, 28) - BOR, BXOR, BAND, RSHIFT, LSHIFT, URSHIFT, SUB, ADD, MOD, DIV, MUL = range(28, 39) - - OP, AOP, UOP, LOP, REL = range(39, 44) - COMMENT, TOKEN, PUNCT = range(44, 47) - NULL, BOOL, ID, STR, INT, FLOAT, REGEX = range(47, 54) - REFLAGS, REBODY = 54, 55 - - BLOCK, VAR, EXPR, IF, ITER, CONTINUE, BREAK, RETURN, WITH, LABEL, SWITCH, THROW, TRY, DEBUG = range(56, 70) - ASSIGN, MEMBER, FIELD, ELEM, CALL, ARRAY, COND, OPEXPR = range(70, 78) - RSV = 78 - - def __getitem__(self, item): - return self.__getattribute__(item) - -Token = T() +Token = namedtuple('Token', _token_keys)._make(_token_keys) __DECIMAL_RE = r'(?:[1-9][0-9]*)|0' __OCTAL_RE = r'0[0-7]+' @@ -64,24 +59,24 @@ _REGEX_FLAGS_RE = r'(?![gimy]*(?P[gimy])[gimy]*(?P=reflag))(?P<%s>[gimy] _REGEX_RE = r'/(?!\*)(?P<%s>(?:[^/\n]|(?:\\/))*)/(?:(?:%s)|(?:\s|$))' % ('REBODY', _REGEX_FLAGS_RE) _TOKENS = [ - ('NULL', _NULL_RE), - ('BOOL', _BOOL_RE), - ('ID', _NAME_RE), - ('STR', _STRING_RE), - ('INT', _INTEGER_RE), - ('FLOAT', _FLOAT_RE), - ('REGEX', _REGEX_RE) + (Token.NULL, _NULL_RE), + (Token.BOOL, _BOOL_RE), + (Token.ID, _NAME_RE), + (Token.STR, _STRING_RE), + (Token.INT, _INTEGER_RE), + (Token.FLOAT, _FLOAT_RE), + (Token.REGEX, _REGEX_RE) ] -COMMENT_RE = r'(?P<%s>/\*(?:(?!\*/)(?:\n|.))*\*/)' % 'COMMENT' +COMMENT_RE = r'(?P<%s>/\*(?:(?!\*/)(?:\n|.))*\*/)' % Token.COMMENT TOKENS_RE = r'|'.join('(?P<%(id)s>%(value)s)' % {'id': name, 'value': value} for name, value in _TOKENS) -LOGICAL_OPERATORS_RE = r'(?P<%s>%s)' % ('LOP', r'|'.join(re.escape(value) for value in _logical_operator)) -UNARY_OPERATORS_RE = r'(?P<%s>%s)' % ('UOP', r'|'.join(re.escape(value) for value in _unary_operator)) -ASSIGN_OPERATORS_RE = r'(?P<%s>%s)' % ('AOP', +LOGICAL_OPERATORS_RE = r'(?P<%s>%s)' % (Token.LOP, r'|'.join(re.escape(value) for value in _logical_operator)) +UNARY_OPERATORS_RE = r'(?P<%s>%s)' % (Token.UOP, r'|'.join(re.escape(value) for value in _unary_operator)) +ASSIGN_OPERATORS_RE = r'(?P<%s>%s)' % (Token.AOP, r'|'.join(re.escape(value) if value != '=' else re.escape(value) + r'(?!\=)' for value in _assign_operator)) -OPERATORS_RE = r'(?P<%s>%s)' % ('OP', r'|'.join(re.escape(value) for value in _operator)) -RELATIONS_RE = r'(?P<%s>%s)' % ('REL', r'|'.join(re.escape(value) for value in _relation)) -PUNCTUATIONS_RE = r'(?P<%s>%s)' % ('PUNCT', r'|'.join(re.escape(value) for value in _punctuations)) +OPERATORS_RE = r'(?P<%s>%s)' % (Token.OP, r'|'.join(re.escape(value) for value in _operator)) +RELATIONS_RE = r'(?P<%s>%s)' % (Token.REL, r'|'.join(re.escape(value) for value in _relation)) +PUNCTUATIONS_RE = r'(?P<%s>%s)' % (Token.PUNCT, r'|'.join(re.escape(value) for value in _punctuations)) diff --git a/youtube_dl/jsinterp/jsinterp.py b/youtube_dl/jsinterp/jsinterp.py index f5c2fd9af..bb7d5e572 100644 --- a/youtube_dl/jsinterp/jsinterp.py +++ b/youtube_dl/jsinterp/jsinterp.py @@ -239,7 +239,8 @@ class JSInterpreter(object): return (Token.ID, peek_value) # literals else: - return (peek_id, peek_value) + # TODO use tuple if CONST + return [peek_id, peek_value] # array elif peek_id is Token.SOPEN: return self._array_literal(token_stream, stack_top - 1) @@ -462,10 +463,13 @@ class JSInterpreter(object): else: raise ExtractorError('Unable to get value of reference type %s' % ref_id) - def putvalue(self, ref, value, local_vars): + @staticmethod + def putvalue(ref, value, local_vars): ref_id, ref_value = ref if ref_id is Token.ID: local_vars[ref_value] = value + elif ref_id in _token_keys: + ref[1] = value def interpret_statement(self, stmt, local_vars): if stmt is None: @@ -515,18 +519,17 @@ class JSInterpreter(object): if name is Token.ASSIGN: op, left, right = expr[1:] if op is None: - return self.interpret_expression(left, local_vars) + ref = self.interpret_expression(left, local_vars) else: # TODO handle undeclared variables (create propery) leftref = self.interpret_expression(left, local_vars) leftvalue = self.getvalue(leftref, local_vars) rightvalue = self.getvalue(self.interpret_expression(right, local_vars), local_vars) # TODO set array element - leftref = op(leftvalue, rightvalue) - return leftref + self.putvalue(leftref, op(leftvalue, rightvalue), local_vars) + ref = leftref elif name is Token.EXPR: ref, _ = self.interpret_statement(expr, local_vars) - return ref elif name is Token.OPEXPR: stack = [] rpn = expr[1][:] @@ -543,7 +546,7 @@ class JSInterpreter(object): stack.append(self.interpret_expression(token, local_vars)) result = stack.pop() if not stack: - return result + ref = result else: raise ExtractorError('Expression has too many values') @@ -565,16 +568,18 @@ class JSInterpreter(object): elif tail_name is Token.CALL: # TODO interpret call raise ExtractorError('''Can't interpret expression called %s''' % tail_name) - return target + ref = target elif name in (Token.ID, Token.ARRAY): - return self.getvalue(expr, local_vars) + ref = self.getvalue(expr, local_vars) # literal elif name in _token_keys: - return expr + ref = expr else: raise ExtractorError('''Can't interpret expression called %s''' % name) + return ref + def extract_object(self, objname): obj = {} obj_m = re.search( diff --git a/youtube_dl/jsinterp/tstream.py b/youtube_dl/jsinterp/tstream.py index 23fd2054e..4d456ccc5 100644 --- a/youtube_dl/jsinterp/tstream.py +++ b/youtube_dl/jsinterp/tstream.py @@ -110,7 +110,7 @@ class TokenStream(object): token_id = feed_m.lastgroup token_value = feed_m.group(token_id) pos = feed_m.start(token_id) - token_id = Token[token_id] + token_id = Token[Token.index(token_id)] self.ended = feed_m.end() >= len(self.code) # because how yield works if token_id is Token.COMMENT: pass