diff --git a/youtube_dl/jsinterp/__init__.py b/youtube_dl/jsinterp/__init__.py index 700ab03db..a7c3cf17b 100644 --- a/youtube_dl/jsinterp/__init__.py +++ b/youtube_dl/jsinterp/__init__.py @@ -1,3 +1,5 @@ from .jsinterp import JSInterpreter +from .jsgrammar import _NAME_RE -__all__ = ['JSInterpreter'] \ No newline at end of file +# ALERT stop usage of _NAME_RE! +__all__ = ['JSInterpreter', '_NAME_RE'] diff --git a/youtube_dl/jsinterp/jsinterp.py b/youtube_dl/jsinterp/jsinterp.py index 3ff0fc7bc..2d014a2f5 100644 --- a/youtube_dl/jsinterp/jsinterp.py +++ b/youtube_dl/jsinterp/jsinterp.py @@ -7,7 +7,6 @@ from .tstream import TokenStream _token_keys = 'null', 'bool', 'id', 'str', 'int', 'float', 'regex' - # TODO support json class JSInterpreter(object): undefined = object() @@ -59,7 +58,7 @@ class JSInterpreter(object): variables.append(token_value) peek_id, peek_value, peek_pos = token_stream.peek() - if peek_id == 'assign': + if peek_id == 'aop': token_stream.pop() init.append(self._assign_expression(token_stream, stack_top - 1)) peek_id, peek_value, peek_pos = token_stream.peek() @@ -86,7 +85,6 @@ class JSInterpreter(object): elif token_value in ('break', 'continue'): raise ExtractorError('Flow control is not yet supported at %d' % token_pos) elif token_value == 'return': - token_stream.pop() statement = ('return', self._expression(token_stream, stack_top - 1)) peek_id, peek_value, peek_pos = token_stream.peek() if peek_id != 'end': @@ -228,7 +226,7 @@ class JSInterpreter(object): raise ExtractorError('Function expression is not yet supported at %d' % peek_pos) # id else: - token_stream.chk_id() + token_stream.chk_id(last=True) return ('id', peek_value) # literals else: @@ -314,7 +312,7 @@ class JSInterpreter(object): else: raise ExtractorError('Missing : in conditional expression at %d' % hook_pos) return ('cond', expr, true_expr, false_expr) - return ('rpn', expr) + return expr def _operator_expression(self, token_stream, stack_top): # --<---------------------------------<-- op --<--------------------------<---- @@ -351,12 +349,12 @@ class JSInterpreter(object): while has_prefix: peek_id, peek_value, peek_pos = token_stream.peek() if peek_id == 'uop': - had_inc = peek_value in ('inc', 'dec') + name, op = peek_value + had_inc = name in ('inc', 'dec') while stack and stack[-1][0] < 16: - _, stack_op = stack.pop() - out.append(('op', stack_op)) - _, op = peek_value - stack.append((16, op)) + _, stack_id, stack_op = stack.pop() + out.append((stack_id, stack_op)) + stack.append((16, peek_id, op)) token_stream.pop() peek_id, peek_value, peek_pos = token_stream.peek() if had_inc and peek_id != 'id': @@ -379,9 +377,9 @@ class JSInterpreter(object): else: raise ExtractorError('Unexpected operator at %d' % peek_pos) while stack and stack[-1][0] <= 17: - _, stack_op = stack.pop() - out.append(('op', stack_op)) - stack.append((prec, op)) + _, stack_id, stack_op = stack.pop() + out.append((stack_id, stack_op)) + stack.append((prec, peek_id, op)) token_stream.pop() peek_id, peek_value, peek_pos = token_stream.peek() @@ -411,10 +409,10 @@ class JSInterpreter(object): prec = 21 # empties stack while stack and stack[-1][0] <= prec: - _, stack_op = stack.pop() - out.append(('op', stack_op)) + _, stack_id, stack_op = stack.pop() + out.append((stack_id, stack_op)) if has_another: - stack.append((prec, op)) + stack.append((prec, peek_id, op)) token_stream.pop() return ('rpn', out) @@ -466,8 +464,9 @@ class JSInterpreter(object): def resf(args): local_vars = dict(zip(argnames, args)) for stmt in self.statements(code): - res, abort = self.interpret_statement(stmt, local_vars) - if abort: - break - return res + pass + # res, abort = self.interpret_statement(stmt, local_vars) + # if abort: + # break + # return res return resf diff --git a/youtube_dl/jsinterp/tstream.py b/youtube_dl/jsinterp/tstream.py index fd4ec99cf..b899d7de6 100644 --- a/youtube_dl/jsinterp/tstream.py +++ b/youtube_dl/jsinterp/tstream.py @@ -71,12 +71,17 @@ _OPERATORS = { _ASSIGN_OPERATORS = dict((op + '=', ('set_%s' % token[0], token[1])) for op, token in _OPERATORS.items()) _ASSIGN_OPERATORS['='] = ('set', lambda cur, right: right) +_operator_lookup = { + 'op': _OPERATORS, + 'aop': _ASSIGN_OPERATORS, + 'uop': _UNARY_OPERATORS, + 'lop': _LOGICAL_OPERATORS, + 'rel': _RELATIONS +} # only to check ids -_RESERVED_WORDS = ( 'break', 'case', 'catch', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'finally', - 'for', 'function', 'if', 'in', 'instanceof', 'new', 'return', 'switch', 'this', 'throw', 'try', - 'typeof', 'var', 'void', 'while', 'with') - - +_reserved_words = ('break', 'case', 'catch', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'finally', + 'for', 'function', 'if', 'in', 'instanceof', 'new', 'return', 'switch', 'this', 'throw', 'try', + 'typeof', 'var', 'void', 'while', 'with') _input_element = re.compile(r'\s*(?:%(comment)s|%(token)s|%(punct)s|%(lop)s|%(uop)s|%(rel)s|%(aop)s|%(op)s)\s*' % { 'comment': COMMENT_RE, 'token': TOKENS_RE, @@ -98,12 +103,13 @@ class TokenStream(object): self._last = None def _next_token(self, pos=0): - while pos < len(self.code): + while not self.ended: feed_m = _input_element.match(self.code, pos) if feed_m is not None: token_id = feed_m.lastgroup token_value = feed_m.group(token_id) pos = feed_m.start(token_id) + self.ended = feed_m.end() >= len(self.code) # because how yield works if token_id == 'comment': pass # TODO date @@ -123,18 +129,10 @@ class TokenStream(object): yield (token_id, {'re': regex, 'flags': feed_m.group('reflags')}, pos) elif token_id == 'id': yield (token_id, token_value, pos) - elif token_id == 'op': - yield (token_id, _OPERATORS[token_value]) - elif token_id == 'aop': - yield (token_id, _ASSIGN_OPERATORS[token_value]) - elif token_id == 'rel': - yield (token_id, _RELATIONS[token_value]) - elif token_id == 'uop': - yield (token_id, _UNARY_OPERATORS[token_value]) - elif token_id == 'lop': - yield (token_id, _LOGICAL_OPERATORS[token_value]) + elif token_id in _operator_lookup: + yield (token_id, _operator_lookup[token_id][token_value], pos) elif token_id == 'punc': - yield (token_id, _PUNCTUATIONS[token_value], pos) + yield (_PUNCTUATIONS[token_value], token_value, pos) else: raise ExtractorError('Unexpected token at %d' % pos) pos = feed_m.end() @@ -147,14 +145,13 @@ class TokenStream(object): name, value, pos = self._last else: name, value, pos = self.peek() - if name in _RESERVED_WORDS: + if name != 'id' or value in _reserved_words: raise ExtractorError('Invalid identifier at %d' % pos) def peek(self, count=1): for _ in range(count - len(self.peeked)): token = next(self._ts, None) if token is None: - self.ended = True self.peeked.append(('end', ';', len(self.code))) else: self.peeked.append(token) @@ -167,4 +164,4 @@ class TokenStream(object): return self._last def last(self): - return self._last \ No newline at end of file + return self._last