diff --git a/youtube_dl/jsinterp/jsgrammar.py b/youtube_dl/jsinterp/jsgrammar.py index b50a21691..f26d5a45c 100644 --- a/youtube_dl/jsinterp/jsgrammar.py +++ b/youtube_dl/jsinterp/jsgrammar.py @@ -1,12 +1,10 @@ from __future__ import unicode_literals import re -from enum import Enum -# ALERT enum34 package dependency -# it's backported -class Token(Enum): +class T(object): + COPEN, CCLOSE, POPEN, PCLOSE, SOPEN, SCLOSE = range(0,6) DOT, END, COMMA, HOOK, COLON = range(6, 11) AND, OR, INC, DEC, NOT, BNOT, DEL, VOID, TYPE = range(11, 20) @@ -22,6 +20,10 @@ class Token(Enum): ASSIGN, MEMBER, FIELD, ELEM, CALL, ARRAY, COND, OPEXPR = range(70, 78) RSV = 78 + def __getitem__(self, item): + return self.__getattribute__(item) + +Token = T() __DECIMAL_RE = r'(?:[1-9][0-9]*)|0' __OCTAL_RE = r'0[0-7]+' @@ -58,28 +60,28 @@ _NULL_RE = r'null' # r'''/(?!\*) # (?:(?:\\(?:[tnvfr0.\\+*?^$\[\]{}()|/]|[0-7]{3}|x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|c[A-Z]|))|[^/\n])* # /(?:(?![gimy]*(?P[gimy])[gimy]*(?P=flag))[gimy]{0,4}\b|\s|$)''' -_REGEX_FLAGS_RE = r'(?![gimy]*(?P[gimy])[gimy]*(?P=reflag))(?P<%s>[gimy]{0,4}\b)' % Token.REFLAGS.name -_REGEX_RE = r'/(?!\*)(?P<%s>(?:[^/\n]|(?:\\/))*)/(?:(?:%s)|(?:\s|$))' % (Token.REBODY.name, _REGEX_FLAGS_RE) +_REGEX_FLAGS_RE = r'(?![gimy]*(?P[gimy])[gimy]*(?P=reflag))(?P<%s>[gimy]{0,4}\b)' % 'REFLAGS' +_REGEX_RE = r'/(?!\*)(?P<%s>(?:[^/\n]|(?:\\/))*)/(?:(?:%s)|(?:\s|$))' % ('REBODY', _REGEX_FLAGS_RE) _TOKENS = [ - (Token.NULL, _NULL_RE), - (Token.BOOL, _BOOL_RE), - (Token.ID, _NAME_RE), - (Token.STR, _STRING_RE), - (Token.INT, _INTEGER_RE), - (Token.FLOAT, _FLOAT_RE), - (Token.REGEX, _REGEX_RE) + ('NULL', _NULL_RE), + ('BOOL', _BOOL_RE), + ('ID', _NAME_RE), + ('STR', _STRING_RE), + ('INT', _INTEGER_RE), + ('FLOAT', _FLOAT_RE), + ('REGEX', _REGEX_RE) ] -COMMENT_RE = r'(?P<%s>/\*(?:(?!\*/)(?:\n|.))*\*/)' % Token.COMMENT.name -TOKENS_RE = r'|'.join('(?P<%(id)s>%(value)s)' % {'id': name.name, 'value': value} +COMMENT_RE = r'(?P<%s>/\*(?:(?!\*/)(?:\n|.))*\*/)' % 'COMMENT' +TOKENS_RE = r'|'.join('(?P<%(id)s>%(value)s)' % {'id': name, 'value': value} for name, value in _TOKENS) -LOGICAL_OPERATORS_RE = r'(?P<%s>%s)' % (Token.LOP.name, r'|'.join(re.escape(value) for value in _logical_operator)) -UNARY_OPERATORS_RE = r'(?P<%s>%s)' % (Token.UOP.name, r'|'.join(re.escape(value) for value in _unary_operator)) -ASSIGN_OPERATORS_RE = r'(?P<%s>%s)' % (Token.AOP.name, +LOGICAL_OPERATORS_RE = r'(?P<%s>%s)' % ('LOP', r'|'.join(re.escape(value) for value in _logical_operator)) +UNARY_OPERATORS_RE = r'(?P<%s>%s)' % ('UOP', r'|'.join(re.escape(value) for value in _unary_operator)) +ASSIGN_OPERATORS_RE = r'(?P<%s>%s)' % ('AOP', r'|'.join(re.escape(value) if value != '=' else re.escape(value) + r'(?!\=)' for value in _assign_operator)) -OPERATORS_RE = r'(?P<%s>%s)' % (Token.OP.name, r'|'.join(re.escape(value) for value in _operator)) -RELATIONS_RE = r'(?P<%s>%s)' % (Token.REL.name, r'|'.join(re.escape(value) for value in _relation)) -PUNCTUATIONS_RE = r'(?P<%s>%s)' % (Token.PUNCT.name, r'|'.join(re.escape(value) for value in _punctuations)) +OPERATORS_RE = r'(?P<%s>%s)' % ('OP', r'|'.join(re.escape(value) for value in _operator)) +RELATIONS_RE = r'(?P<%s>%s)' % ('REL', r'|'.join(re.escape(value) for value in _relation)) +PUNCTUATIONS_RE = r'(?P<%s>%s)' % ('PUNCT', r'|'.join(re.escape(value) for value in _punctuations)) diff --git a/youtube_dl/jsinterp/jsinterp.py b/youtube_dl/jsinterp/jsinterp.py index 5f0a7b247..f5c2fd9af 100644 --- a/youtube_dl/jsinterp/jsinterp.py +++ b/youtube_dl/jsinterp/jsinterp.py @@ -444,7 +444,7 @@ class JSInterpreter(object): # TODO use context instead local_vars in argument def getvalue(self, ref, local_vars): - if ref is None or ref is self.undefined or isinstance(ref, (int, float, str)): # not Token + if ref is None or ref is self.undefined or isinstance(ref, (int, float, str)): return ref ref_id, ref_value = ref if ref_id is Token.ID: @@ -452,8 +452,20 @@ class JSInterpreter(object): elif ref_id in _token_keys: return ref_value elif ref_id is Token.EXPR: - ref, abort = self.interpret_statement(ref_value, local_vars) + ref, _ = self.interpret_statement(ref_value, local_vars) return self.getvalue(ref, local_vars) + elif ref_id is Token.ARRAY: + array = [] + for expr in ref_value: + array.append(self.interpret_expression(expr, local_vars)) + return array + else: + raise ExtractorError('Unable to get value of reference type %s' % ref_id) + + def putvalue(self, ref, value, local_vars): + ref_id, ref_value = ref + if ref_id is Token.ID: + local_vars[ref_value] = value def interpret_statement(self, stmt, local_vars): if stmt is None: @@ -483,6 +495,10 @@ class JSInterpreter(object): # TODO use context instead returning abort ref, abort = self.interpret_statement(stmt[1], local_vars) ref = self.getvalue(ref, local_vars) + if isinstance(ref, list): + # TODO deal with nested arrays + ref = [self.getvalue(elem, local_vars) for elem in ref] + abort = True # with # label @@ -501,16 +517,19 @@ class JSInterpreter(object): if op is None: return self.interpret_expression(left, local_vars) else: - left = self.interpret_expression(left, local_vars) # TODO handle undeclared variables (create propery) - leftvalue = self.getvalue(left, local_vars) + leftref = self.interpret_expression(left, local_vars) + leftvalue = self.getvalue(leftref, local_vars) rightvalue = self.getvalue(self.interpret_expression(right, local_vars), local_vars) - local_vars[left[1]] = op(leftvalue, rightvalue) - return left - + # TODO set array element + leftref = op(leftvalue, rightvalue) + return leftref + elif name is Token.EXPR: + ref, _ = self.interpret_statement(expr, local_vars) + return ref elif name is Token.OPEXPR: stack = [] - rpn = expr[1] + rpn = expr[1][:] while rpn: token = rpn.pop(0) if token[0] in (Token.OP, Token.AOP, Token.UOP, Token.LOP, Token.REL): @@ -538,24 +557,20 @@ class JSInterpreter(object): raise ExtractorError('''Can't interpret expression called %s''' % tail_name) elif tail_name is Token.ELEM: # TODO interpret element - raise ExtractorError('''Can't interpret expression called %s''' % tail_name) + # raise ExtractorError('''Can't interpret expression called %s''' % tail_name) + ret, _ = self.interpret_statement(tail_value, local_vars) + index = self.getvalue(ret, local_vars) + target = self.getvalue(target, local_vars) + target = self.interpret_expression((Token.MEMBER, target[index], args, tail), local_vars) elif tail_name is Token.CALL: # TODO interpret call raise ExtractorError('''Can't interpret expression called %s''' % tail_name) return target - elif name is Token.ID: - return local_vars[expr[1]] - + elif name in (Token.ID, Token.ARRAY): + return self.getvalue(expr, local_vars) # literal elif name in _token_keys: - return expr[1] - - elif name is Token.ARRAY: - array = [] - elms = expr[1] - for expr in elms: - array.append(self.interpret_expression(expr, local_vars)) - return array + return expr else: raise ExtractorError('''Can't interpret expression called %s''' % name)