From 976aa0d5350ae2c02089ceea4b0ae027f115a7af Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 27 Oct 2015 09:26:05 -0700 Subject: [PATCH 1/2] Implement a basic macro-assembler for test cases. --- ml-proto/.gitignore | 1 + ml-proto/masm | 9 ++ ml-proto/masm.py | 57 ++++++++++ ml-proto/runtests.py | 15 +++ ml-proto/sexp.py | 234 +++++++++++++++++++++++++++++++++++++++ ml-proto/test/.gitignore | 3 +- ml-proto/test/fac.mast | 21 ++++ 7 files changed, 339 insertions(+), 1 deletion(-) create mode 100755 ml-proto/masm create mode 100644 ml-proto/masm.py create mode 100644 ml-proto/sexp.py create mode 100644 ml-proto/test/fac.mast diff --git a/ml-proto/.gitignore b/ml-proto/.gitignore index 4cd5717649..a364441292 100644 --- a/ml-proto/.gitignore +++ b/ml-proto/.gitignore @@ -2,6 +2,7 @@ *.byte *.opt *.zip +*.pyc _build wasm unopt diff --git a/ml-proto/masm b/ml-proto/masm new file mode 100755 index 0000000000..5825c53ecd --- /dev/null +++ b/ml-proto/masm @@ -0,0 +1,9 @@ +#!/usr/bin/env python + +# Standalone command-line interface for running the macro-assembler. + +import sys +import masm + +for arg in sys.argv[1:]: + masm.transform_file(arg, arg.replace('.mast', '.generated.wast')) diff --git a/ml-proto/masm.py b/ml-proto/masm.py new file mode 100644 index 0000000000..ccfaf8eae6 --- /dev/null +++ b/ml-proto/masm.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +import sexp + +class LowerWhileLoop(sexp.IdentityTransform): + """High-level-style top-test "while" loop expansion.""" + + def transform_list_postorder(self, x): + if not x.is_operator('while'): + return x + if x.num_operands() != 2: + raise "a while loop should have exactly 2 operands" + + condition = x.get_operand(0) + body = x.get_operand(1) + + # While the code may look scary at first, consider: + # Top-test while loops are suboptimal, so what's really happening here + # is that the macro is optimizing the loop for you. Second, it'll be + # shorter once we get br_if. And third, there's probably a way to write + # a python API to make the actual python code here shorter. I'm fairly + # inexperienced in python and would welcome suggestions on how best to + # do that. + s = sexp.SExp + return s([s('if'), + condition, + s([s('loop'), s('$exit'), + body, + s([s('if'), + s([s('i32.eq'), + condition, + s([s('i32.const'), s(0)]), + ]), + s([s('break'), s('$exit')]) + ]) + ]) + ]) + +def transform_expr(fromExpr): + toExpr = sexp.transform(fromExpr, LowerWhileLoop()) + return toExpr + +def transform_string(fromStr): + fromExpr = sexp.parse_toplevel(fromStr) + toExpr = transform_expr(fromExpr) + toStr = sexp.toplevel_to_string_with_template(toExpr, fromStr) + return toStr + +def transform_file(fromPath, toPath): + fromFile = open(fromPath, 'r') + fromStr = fromFile.read() + fromFile.close() + toStr = transform_string(fromStr) + toFile = open(toPath, 'w') + toFile.write(';; Generated by masm.py from ' + fromPath + '\n') + toFile.write(toStr) + toFile.close() diff --git a/ml-proto/runtests.py b/ml-proto/runtests.py index f9bf5ecfc7..609dcdf8fc 100755 --- a/ml-proto/runtests.py +++ b/ml-proto/runtests.py @@ -6,9 +6,20 @@ import subprocess import glob import sys +import masm class RunTests(unittest.TestCase): def _runTestFile(self, shortName, fileName, interpreterPath): + if fileName.endswith('.mast'): + wastPath = fileName.replace(".mast", ".generated.wast") + try: + os.remove(wastPath) + except OSError: + pass + + masm.transform_file(fileName, wastPath) + fileName = wastPath + logPath = fileName.replace("test/", "test/output/").replace(".wast", ".wast.log") try: os.remove(logPath) @@ -72,6 +83,10 @@ def rebuild_interpreter(path): else: find_interpreter(interpreterPath) + for generated in glob.glob("test/*.generated.wast"): + os.remove(generated) + testFiles = glob.glob("test/*.wast") + testFiles += glob.glob("test/*.mast") generate_test_cases(RunTests, interpreterPath, testFiles) unittest.main() diff --git a/ml-proto/sexp.py b/ml-proto/sexp.py new file mode 100644 index 0000000000..d0128c87a8 --- /dev/null +++ b/ml-proto/sexp.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python + +class SExp(object): + def __init__(self, thing, begin=-1, end=-1): + assert (begin == -1) == (end == -1) + assert not isinstance(thing, SExp) + self.thing = thing + self.begin = begin + self.end = end + + def __repr__(self): + return str(self.thing) + ':' + str(self.begin) + ':' + str(self.end) + + def __str__(self): + return str(self.thing) + + def has_location(self): + return self.begin != -1 + + def is_operator(self, name): + return isinstance(self.thing, list) and len(self.thing) > 0 and self.thing[0].thing == name + + def num_operands(self): + return len(self.thing) - 1 + + def get_operand(self, index): + return self.thing[index + 1] + + def strip(self): + if isinstance(self.thing, list): + for item in self.thing: + item.strip() + self.begin = -1 + self.end = -1 + +def isspace(c): + return c == ' ' or c == '\t' or c == '\n' + +def isdigit(c): + return c >= '0' and c <= '9' + +def isalnum(c): + return isdigit(c) or (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') + +def skip_whitespace_and_comments(s, i): + l = len(s) + while i < l: + if isspace(s[i]): + i += 1 + # fixme + #while i < l and isspace(s[i]): + #i += 1 + elif s[i] == ';' and i+1 < l and s[i+1] == ';': + i += 2 + while i < l and s[i] != '\n': + i += 1 + else: + break + return i + +def parse_string_literal(s, i): + j = i + assert s[j] == '"' + j += 1 + while s[j] != '"': + if s[j] == '\\': + j += 1 + j += 1 + assert s[j] == '"' + j += 1 + return (SExp(s[i:j], i, j), j) + +def parse_number_literal(s, i): + j = i + while isalnum(s[j]) or s[j] in ['.', '+', '-']: + j += 1 + t = s[i:j] + if t.find('.') == -1: + x = int(t) + elif t.startswith('0x'): + x = float(t) + else: + x = float(t) + return (SExp(x, i, j), j) + +def parse_atom(s, i): + if s[i] == '"': + return parse_string_literal(s, i) + if isdigit(s[i]) or s[i] == '+' or s[i] == '-' or s[i:4] == 'nan(' or s[i:i+8] == 'infinity': + return parse_number_literal(s, i) + + j = i + while not isspace(s[j]) and s[j] != ')': + j += 1 + + return (SExp(s[i:j], i, j), j) + +def parse_list(s, i): + items = [] + j = i + assert s[j] == '(' + j += 1 + while s[j] != ')': + (result, j) = parse_at_position(s, j) + items.append(result) + j = skip_whitespace_and_comments(s, j) + assert s[j] == ')' + j += 1 + return (SExp(items, i, j), j) + +def parse_at_position(s, i): + i = skip_whitespace_and_comments(s, i) + if s[i] == '(': + return parse_list(s, i) + else: + return parse_atom(s, i) + +def parse(s): + (result, _) = parse_at_position(s, 0) + return result + +def parse_toplevel(s): + i = 0 + j = i + items = [] + while j < len(s): + (result, j) = parse_at_position(s, j) + items.append(result) + j = skip_whitespace_and_comments(s, j) + return SExp(items, i, j) + +class IdentityTransform(object): + def transform_list_preorder(self, sexp): + return sexp + def transform_list_postorder(self, sexp): + return sexp + def transform_int(self, sexp): + return sexp + def transform_float(self, sexp): + return sexp + def transform_str(self, sexp): + return sexp + def transform_keyword(self, sexp): + return sexp + +def transform(sexp, visitor): + if isinstance(sexp.thing, list): + sexp = visitor.transform_list_preorder(sexp) + new_list = [] + for item in sexp.thing: + new_item = transform(item, visitor) + new_list.append(new_item) + sexp = SExp(new_list, sexp.begin, sexp.end) + new_sexp = visitor.transform_list_postorder(sexp) + elif isinstance(sexp.thing, int): + new_sexp = visitor.transform_int(sexp) + elif isinstance(sexp.thing, float): + new_sexp = visitor.transform_float(sexp) + elif isinstance(sexp.thing, str): + if sexp.thing[0] == '"': + new_sexp = visitor.transform_str(sexp) + else: + new_sexp = visitor.transform_keyword(sexp) + else: + raise "unknown thing to transform" + if new_sexp != sexp: + # For convenience, allow the transform to return a sexp without setting + # the begin/end. Clear out any nodes that it scavenged, and set the + # outermost position. + new_sexp.strip() + new_sexp.begin = sexp.begin + new_sexp.end = sexp.end + return new_sexp + +class ToStringWithTemplateVisitor(object): + def __init__(self): + self.s = '' + self.current = 0 + +def to_string_with_template_recursion(sexp, s, visitor, toplevel): + if sexp.has_location(): + substr = s[visitor.current:sexp.begin] + visitor.s += substr + visitor.current += len(substr) + if isinstance(sexp.thing, list): + if not toplevel: + visitor.s += '(' + visitor.current += 1 + for item in sexp.thing: + to_string_with_template_recursion(item, s, visitor, False) + if not item.has_location(): + visitor.s += ' ' + if not sexp.thing[-1].has_location(): + visitor.s = visitor.s[:-1] + if toplevel: + if sexp.thing[-1].has_location() and sexp.has_location(): + visitor.s += s[visitor.current:sexp.end] + else: + if sexp.thing[-1].has_location() and sexp.has_location(): + visitor.s += s[visitor.current:(sexp.end - 1)] + visitor.s += ')' + visitor.current += 1 + elif isinstance(sexp.thing, int): + visitor.s += str(sexp.thing) + elif isinstance(sexp.thing, float): + visitor.s += str(sexp.thing) + elif isinstance(sexp.thing, str): + visitor.s += sexp.thing + else: + raise "unknown thing" + if sexp.has_location(): + visitor.current = sexp.end + +def to_string_with_template(sexp, s): + """Convert the given s-expression to a string. + + This function takes an extra string argument which is the original string + from which the given s-expression was parsed, and to which its begin and + end indices refer. Whitespace and comments from the original string are + preserved in the resulting string whenever possible. + """ + visitor = ToStringWithTemplateVisitor() + to_string_with_template_recursion(sexp, s, visitor, False) + return visitor.s + +def toplevel_to_string_with_template(sexp, s): + """Convert the given toplevel s-expression to a string. + + This function is like to_string_with_template except that it specially + formats toplevel lists which are not enclosed by explicit parens. + """ + visitor = ToStringWithTemplateVisitor() + to_string_with_template_recursion(sexp, s, visitor, True) + return visitor.s diff --git a/ml-proto/test/.gitignore b/ml-proto/test/.gitignore index 6caf68aff4..87627bb1bc 100644 --- a/ml-proto/test/.gitignore +++ b/ml-proto/test/.gitignore @@ -1 +1,2 @@ -output \ No newline at end of file +output +*.generated.wast diff --git a/ml-proto/test/fac.mast b/ml-proto/test/fac.mast new file mode 100644 index 0000000000..6a616c5015 --- /dev/null +++ b/ml-proto/test/fac.mast @@ -0,0 +1,21 @@ +(module + ;; Factorial function computed using a while loop, implemented in the + ;; macro-assembler. + (func $fac-while (param $n i64) (result i64) + (local $i i64) + (local $res i64) + (set_local $i (get_local $n)) + (set_local $res (i64.const 1)) + (while (i64.ne (get_local $i) (i64.const 0)) + (block + (set_local $res (i64.mul (get_local $i) (get_local $res))) + (set_local $i (i64.sub (get_local $i) (i64.const 1))) + ) + ) + (return (get_local $res)) + ) + + (export "fac-while" $fac-while) +) + +(assert_return (invoke "fac-while" (i64.const 25)) (i64.const 7034535277573963776)) From 7eea28e2e9c0f7472c15704b4432ca2a5bc30076 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Wed, 4 Nov 2015 15:37:59 -0800 Subject: [PATCH 2/2] Remove an unneeded special-case rule in literal parsing. --- ml-proto/sexp.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ml-proto/sexp.py b/ml-proto/sexp.py index d0128c87a8..4a8b95fba3 100644 --- a/ml-proto/sexp.py +++ b/ml-proto/sexp.py @@ -77,8 +77,6 @@ def parse_number_literal(s, i): t = s[i:j] if t.find('.') == -1: x = int(t) - elif t.startswith('0x'): - x = float(t) else: x = float(t) return (SExp(x, i, j), j)