From 86f7fcbe6975e97ca9b3c40c624a6df15e730556 Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Tue, 20 Mar 2018 23:54:13 -0400 Subject: [PATCH] Numeric parser (for jsinterp) --- parse-numeric.py | 116 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 parse-numeric.py diff --git a/parse-numeric.py b/parse-numeric.py new file mode 100644 index 000000000..6c41f8c3d --- /dev/null +++ b/parse-numeric.py @@ -0,0 +1,116 @@ +from __future__ import print_function +import math +import re + +regex = re.compile(r'''(?:\s+)? # StrNumericLiteral.StrWhiteSpace (optional) + ( # StringNumericLitral.StrDecimalLiteral + (?P[\+\-])? # optional sign + (?PInfinity)| + [\+\-]?(?PNaN)| # not documented, but Node returns NaN + [\+\-]?(?: # DecimalDigits . DecimalDigits(opt) ExponentPart(opt) + (?:\d+)? # DecimalDigits + (?:\.) + (?:\d+)? # DecimalDigits + (?:[eE] # ExponentPart.ExponentIndicator + [\+\=]? # optional sign + (?:\d+) + )? + )| + 0(?P[bB])(?P[01]+)| + 0(?P[oO])(?P[01234567]+)| + 0(?P[xX])(?P[0-9a-fA-F]+) + )(?:\s+)?$''', re.X) + + +def getnan(): + try: + return math.nan + except AttributeError: + pass + return float('nan') + + +def getinf(mult=1): + try: + ret = math.inf + except AttributeError: + ret = float('inf') + return mult * ret + + +def can_be_int(inp, out): + return str(out)[-2:] == '.0' and ('.0' not in inp and not inp.endswith('.')) + + +def conditional_int(inp): + # Values with leading zeros like 01.0 and 01. should fail + if len(inp) > 1 and inp[0] == '0': + raise SyntaxError('unexpected number; given "{}"'.format(inp)) + out = float(inp) + return int(str(out)[:-2], 10) if can_be_int(inp, out) else out + + +def parse_numeric(x): + m = re.match(regex, x) + if not m: + raise SyntaxError('invalid nor unexpected token; given "{}"'.format(x)) + + ret = m.group(0).strip() + groups = m.groupdict() + + if groups['inf']: + sign = -1 if groups['inf_sign'] == '-' else 1 + ret = getinf(sign) + elif groups['nan']: + ret = getnan() + else: + base = key = None + if groups['binary']: + base = 2 + key = 'binary_number' + elif groups['octal']: + base = 8 + key = 'octal_number' + elif groups['hex']: + base = 16 + key = 'hex_number' + if key and base: + ret = int(groups[key], base) + else: + try: + ret = conditional_int(ret) + except ValueError as e: + raise SyntaxError('invalid or unexpected token: given "{}"'.format(x)) + print('{} -> {}'.format(x, ret)) + return ret + +parse_numeric('.1') +parse_numeric('1.') +parse_numeric('Infinity') +parse_numeric('-Infinity') +parse_numeric('+Infinity') +parse_numeric('NaN') +parse_numeric('-NaN') +parse_numeric('+NaN') +try: + parse_numeric('01.0') +except SyntaxError as e: + print('01.0 -> {}'.format(e)) +try: + parse_numeric('01.') +except SyntaxError as e: + print('01. -> {}'.format(e)) +parse_numeric('1.0') +parse_numeric('1.1') +parse_numeric('1.e5') +parse_numeric('1.E5') +parse_numeric('1.1e5') +parse_numeric('1.1E6') +parse_numeric('-1.1E6') +parse_numeric('+1.1') +parse_numeric('0b101') +parse_numeric('0B101') +parse_numeric('0o755') +parse_numeric('0O755') +parse_numeric('0xaf') +parse_numeric('0XDEADBEEF9')