From d14bea8bf2acd2a5f558f59cb4b621349beacb86 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 3 Sep 2020 19:26:35 -0700 Subject: [PATCH] Poppy IR wast parsing and validation Adds and IR Profile to each function so the validator can determine which validation rules to apply and adds an flag to have the wast parser set the profile to Poppy for testing purposes. --- src/binaryen-c.cpp | 2 +- src/ir/stack-utils.cpp | 23 ++++ src/ir/stack-utils.h | 14 +- src/passes/RemoveNonJSOps.cpp | 3 +- src/tools/tool-options.h | 9 +- src/tools/wasm-as.cpp | 2 +- src/tools/wasm-opt.cpp | 1 + src/tools/wasm-shell.cpp | 4 +- src/tools/wasm2js.cpp | 5 +- src/wasm-io.h | 4 + src/wasm-s-parser.h | 2 + src/wasm.h | 3 + src/wasm/wasm-io.cpp | 8 +- src/wasm/wasm-s-parser.cpp | 4 +- src/wasm/wasm-validator.cpp | 102 +++++++++++++++ test/unit/test_poppy_validation.py | 203 +++++++++++++++++++++++++++++ 16 files changed, 374 insertions(+), 15 deletions(-) create mode 100644 test/unit/test_poppy_validation.py diff --git a/src/binaryen-c.cpp b/src/binaryen-c.cpp index 101af51721d..574950c9d7f 100644 --- a/src/binaryen-c.cpp +++ b/src/binaryen-c.cpp @@ -3421,7 +3421,7 @@ BinaryenModuleRef BinaryenModuleParse(const char* text) { try { SExpressionParser parser(const_cast(text)); Element& root = *parser.root; - SExpressionWasmBuilder builder(*wasm, *root[0]); + SExpressionWasmBuilder builder(*wasm, *root[0], IRProfile::Normal); } catch (ParseException& p) { p.dump(std::cerr); Fatal() << "error in parsing wasm text"; diff --git a/src/ir/stack-utils.cpp b/src/ir/stack-utils.cpp index ef871040b39..bc0fd2eb454 100644 --- a/src/ir/stack-utils.cpp +++ b/src/ir/stack-utils.cpp @@ -15,6 +15,7 @@ */ #include "stack-utils.h" +#include "ir/properties.h" namespace wasm { @@ -30,6 +31,28 @@ void removeNops(Block* block) { block->list.resize(newIndex); } +bool mayBeUnreachable(Expression* expr) { + if (Properties::isControlFlowStructure(expr)) { + return true; + } + switch (expr->_id) { + case Expression::Id::BreakId: + return expr->cast()->condition == nullptr; + case Expression::Id::CallId: + return expr->cast()->isReturn; + case Expression::Id::CallIndirectId: + return expr->cast()->isReturn; + case Expression::Id::ReturnId: + case Expression::Id::SwitchId: + case Expression::Id::UnreachableId: + case Expression::Id::ThrowId: + case Expression::Id::RethrowId: + return true; + default: + return false; + } +} + } // namespace StackUtils StackSignature::StackSignature(Expression* expr) { diff --git a/src/ir/stack-utils.h b/src/ir/stack-utils.h index 89ec7d47e18..fc23b6080ff 100644 --- a/src/ir/stack-utils.h +++ b/src/ir/stack-utils.h @@ -33,7 +33,10 @@ // stack type of each instruction. Pops may not have `unreachable` type. // // 4. Only control flow structures and instructions that have polymorphic -// unreachable behavior in WebAssembly may have unreachable type. +// unreachable behavior in WebAssembly may have unreachable type. Blocks may +// be unreachable when they are not branch targets and when they have an +// unreachable child. Note that this means a block may be unreachable even +// if it would otherwise have a concrete type, unlike in Binaryen IR. // // For example, the following Binaryen IR Function: // @@ -58,7 +61,10 @@ // ) // // Notice that the sequence of instructions in the block is now identical to the -// sequence of instructions in raw WebAssembly. +// sequence of instructions in raw WebAssembly. Also note that Poppy IR's +// validation rules are largely additional on top of the normal Binaryen IR +// validation rules, with the only exceptions being block body validation and +// block unreahchability rules. // #ifndef wasm_ir_stack_h @@ -75,6 +81,10 @@ namespace StackUtils { // Iterate through `block` and remove nops. void removeNops(Block* block); +// Whether `expr` may be unreachable in Poppy IR. True for control flow +// structures and polymorphic unreachable instructions. +bool mayBeUnreachable(Expression* expr); + } // namespace StackUtils // Stack signatures are like regular function signatures, but they are used to diff --git a/src/passes/RemoveNonJSOps.cpp b/src/passes/RemoveNonJSOps.cpp index e2054a5d218..abd3526c48a 100644 --- a/src/passes/RemoveNonJSOps.cpp +++ b/src/passes/RemoveNonJSOps.cpp @@ -79,7 +79,8 @@ struct RemoveNonJSOpsPass : public WalkerPass> { std::string input(IntrinsicsModuleWast); SExpressionParser parser(const_cast(input.c_str())); Element& root = *parser.root; - SExpressionWasmBuilder builder(intrinsicsModule, *root[0]); + SExpressionWasmBuilder builder( + intrinsicsModule, *root[0], IRProfile::Normal); std::set neededFunctions; diff --git a/src/tools/tool-options.h b/src/tools/tool-options.h index 6f0592a4531..f04b953044e 100644 --- a/src/tools/tool-options.h +++ b/src/tools/tool-options.h @@ -31,6 +31,7 @@ struct ToolOptions : public Options { PassOptions passOptions; bool quiet = false; + IRProfile profile = IRProfile::Normal; ToolOptions(const std::string& command, const std::string& description) : Options(command, description) { @@ -67,7 +68,13 @@ struct ToolOptions : public Options { "-q", "Emit less verbose output and hide trivial warnings.", Arguments::Zero, - [this](Options*, const std::string&) { quiet = true; }); + [this](Options*, const std::string&) { quiet = true; }) + .add( + "--experimental-poppy", + "", + "Parse wast files as Poppy IR for testing purposes.", + Arguments::Zero, + [this](Options*, const std::string&) { profile = IRProfile::Poppy; }); (*this) .addFeature(FeatureSet::SignExt, "sign extension operations") .addFeature(FeatureSet::Atomics, "atomic operations") diff --git a/src/tools/wasm-as.cpp b/src/tools/wasm-as.cpp index b4e1e18faa8..3a907c4c327 100644 --- a/src/tools/wasm-as.cpp +++ b/src/tools/wasm-as.cpp @@ -110,7 +110,7 @@ int main(int argc, const char* argv[]) { if (options.debug) { std::cerr << "w-parsing..." << std::endl; } - SExpressionWasmBuilder builder(wasm, *root[0]); + SExpressionWasmBuilder builder(wasm, *root[0], options.profile); } catch (ParseException& p) { p.dump(std::cerr); Fatal() << "error in parsing input"; diff --git a/src/tools/wasm-opt.cpp b/src/tools/wasm-opt.cpp index 41302eac8e9..24ea9c31cb3 100644 --- a/src/tools/wasm-opt.cpp +++ b/src/tools/wasm-opt.cpp @@ -230,6 +230,7 @@ int main(int argc, const char* argv[]) { // asked to remove it. reader.setDWARF(options.passOptions.debugInfo && !willRemoveDebugInfo(options.passes)); + reader.setProfile(options.profile); try { reader.read(options.extra["infile"], wasm, inputSourceMapFilename); } catch (ParseException& p) { diff --git a/src/tools/wasm-shell.cpp b/src/tools/wasm-shell.cpp index 68881559895..80229554b1f 100644 --- a/src/tools/wasm-shell.cpp +++ b/src/tools/wasm-shell.cpp @@ -146,7 +146,7 @@ static void run_asserts(Name moduleName, std::unique_ptr builder; try { builder = std::unique_ptr( - new SExpressionWasmBuilder(wasm, *curr[1])); + new SExpressionWasmBuilder(wasm, *curr[1], IRProfile::Normal)); } catch (const ParseException&) { invalid = true; } @@ -306,7 +306,7 @@ int main(int argc, const char* argv[]) { auto module = wasm::make_unique(); Name moduleName; auto builder = wasm::make_unique( - *module, *root[i], &moduleName); + *module, *root[i], IRProfile::Normal, &moduleName); builders[moduleName].swap(builder); modules[moduleName].swap(module); i++; diff --git a/src/tools/wasm2js.cpp b/src/tools/wasm2js.cpp index fb78336d062..524be4c5cf5 100644 --- a/src/tools/wasm2js.cpp +++ b/src/tools/wasm2js.cpp @@ -825,7 +825,7 @@ void AssertionEmitter::emit() { asmModule = Name(moduleNameS.str().c_str()); Module wasm; options.applyFeatures(wasm); - SExpressionWasmBuilder builder(wasm, e); + SExpressionWasmBuilder builder(wasm, e, options.profile); emitWasm(wasm, out, flags, options.passOptions, funcName); continue; } @@ -972,7 +972,8 @@ int main(int argc, const char* argv[]) { if (options.debug) { std::cerr << "w-parsing..." << std::endl; } - sexprBuilder = make_unique(wasm, *(*root)[0]); + sexprBuilder = + make_unique(wasm, *(*root)[0], options.profile); } } catch (ParseException& p) { p.dump(std::cerr); diff --git a/src/wasm-io.h b/src/wasm-io.h index 77d2506c49c..fbc8947c690 100644 --- a/src/wasm-io.h +++ b/src/wasm-io.h @@ -33,6 +33,8 @@ class ModuleReader { // the binary, so that we can update DWARF sections later when writing. void setDWARF(bool DWARF_) { DWARF = DWARF_; } + void setProfile(IRProfile profile_) { profile = profile_; } + // read text void readText(std::string filename, Module& wasm); // read binary @@ -49,6 +51,8 @@ class ModuleReader { private: bool DWARF = false; + IRProfile profile = IRProfile::Normal; + void readStdin(Module& wasm, std::string sourceMapFilename); void readBinaryData(std::vector& input, diff --git a/src/wasm-s-parser.h b/src/wasm-s-parser.h index 6cbef559986..75cd4103b04 100644 --- a/src/wasm-s-parser.h +++ b/src/wasm-s-parser.h @@ -111,6 +111,7 @@ class SExpressionParser { class SExpressionWasmBuilder { Module& wasm; MixedArena& allocator; + IRProfile profile; std::vector signatures; std::unordered_map signatureIndices; std::vector functionNames; @@ -127,6 +128,7 @@ class SExpressionWasmBuilder { // Assumes control of and modifies the input. SExpressionWasmBuilder(Module& wasm, Element& module, + IRProfile profile, Name* moduleName = nullptr); private: diff --git a/src/wasm.h b/src/wasm.h index 3d658d3670e..4d30528df2c 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -62,6 +62,8 @@ struct Address { } }; +enum class IRProfile { Normal, Poppy }; + // Operators enum UnaryOp { @@ -1260,6 +1262,7 @@ class Function : public Importable { public: Name name; Signature sig; // parameters and return value + IRProfile profile = IRProfile::Normal; std::vector vars; // non-param locals // The body of the function diff --git a/src/wasm/wasm-io.cpp b/src/wasm/wasm-io.cpp index 8fa714d9cac..79687d4696c 100644 --- a/src/wasm/wasm-io.cpp +++ b/src/wasm/wasm-io.cpp @@ -33,16 +33,16 @@ namespace wasm { #define DEBUG_TYPE "writer" -static void readTextData(std::string& input, Module& wasm) { +static void readTextData(std::string& input, Module& wasm, IRProfile profile) { SExpressionParser parser(const_cast(input.c_str())); Element& root = *parser.root; - SExpressionWasmBuilder builder(wasm, *root[0]); + SExpressionWasmBuilder builder(wasm, *root[0], profile); } void ModuleReader::readText(std::string filename, Module& wasm) { BYN_TRACE("reading text from " << filename << "\n"); auto input(read_file(filename, Flags::Text)); - readTextData(input, wasm); + readTextData(input, wasm, profile); } void ModuleReader::readBinaryData(std::vector& input, @@ -113,7 +113,7 @@ void ModuleReader::readStdin(Module& wasm, std::string sourceMapFilename) { s.write(input.data(), input.size()); s << '\0'; std::string input_str = s.str(); - readTextData(input_str, wasm); + readTextData(input_str, wasm, profile); } } diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp index 931cd1bf0d6..26c07e67d27 100644 --- a/src/wasm/wasm-s-parser.cpp +++ b/src/wasm/wasm-s-parser.cpp @@ -314,8 +314,9 @@ Element* SExpressionParser::parseString() { SExpressionWasmBuilder::SExpressionWasmBuilder(Module& wasm, Element& module, + IRProfile profile, Name* moduleName) - : wasm(wasm), allocator(wasm.allocator) { + : wasm(wasm), allocator(wasm.allocator), profile(profile) { if (module.size() == 0) { throw ParseException("empty toplevel, expected module"); } @@ -795,6 +796,7 @@ void SExpressionWasmBuilder::parseFunction(Element& s, bool preParseImport) { // make a new function currFunction = std::unique_ptr(Builder(wasm).makeFunction( name, std::move(params), sig.results, std::move(vars))); + currFunction->profile = profile; // parse body Block* autoBlock = nullptr; // may need to add a block for the very top level diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index 3e1a057ce5f..f56c46bdcf3 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -22,6 +22,7 @@ #include "ir/features.h" #include "ir/global-utils.h" #include "ir/module-utils.h" +#include "ir/stack-utils.h" #include "ir/utils.h" #include "support/colors.h" #include "wasm-printing.h" @@ -246,6 +247,13 @@ struct FunctionValidator : public WalkerPass> { public: // visitors + void validatePoppyExpression(Expression* curr); + + static void visitPoppyExpression(FunctionValidator* self, + Expression** currp) { + self->validatePoppyExpression(*currp); + } + static void visitPreBlock(FunctionValidator* self, Expression** currp) { auto* curr = (*currp)->cast(); if (curr->name.is()) { @@ -254,6 +262,8 @@ struct FunctionValidator : public WalkerPass> { } void visitBlock(Block* curr); + void validateNormalBlockElements(Block* curr); + void validatePoppyBlockElements(Block* curr); static void visitPreLoop(FunctionValidator* self, Expression** currp) { auto* curr = (*currp)->cast(); @@ -276,6 +286,11 @@ struct FunctionValidator : public WalkerPass> { if (curr->is()) { self->pushTask(visitPreLoop, currp); } + if (auto* func = self->getFunction()) { + if (func->profile == IRProfile::Poppy) { + self->pushTask(visitPoppyExpression, currp); + } + } } void noteBreak(Name name, Expression* value, Expression* curr); @@ -386,6 +401,39 @@ void FunctionValidator::noteLabelName(Name name) { "names in Binaryen IR must be unique - IR generators must ensure that"); } +void FunctionValidator::validatePoppyExpression(Expression* curr) { + if (curr->type == Type::unreachable) { + shouldBeTrue(StackUtils::mayBeUnreachable(curr), + curr, + "Only control flow structures and unreachable polymorphic" + " instructions may be unreachable in Poppy IR"); + } + if (Properties::isControlFlowStructure(curr)) { + // Check that control flow children (except If conditions) are blocks + if (auto* if_ = curr->dynCast()) { + shouldBeTrue( + if_->condition->is(), curr, "Expected condition to be a Pop"); + shouldBeTrue(if_->ifTrue->is(), + curr, + "Expected control flow child to be a block"); + shouldBeTrue(!if_->ifFalse || if_->ifFalse->is(), + curr, + "Expected control flow child to be a block"); + } else if (!curr->is()) { + for (auto* child : ChildIterator(curr)) { + shouldBeTrue(child->is(), + curr, + "Expected control flow child to be a block"); + } + } + } else { + // Check that all children are Pops + for (auto* child : ChildIterator(curr)) { + shouldBeTrue(child->is(), curr, "Unexpected non-Pop child"); + } + } +} + void FunctionValidator::visitBlock(Block* curr) { if (!getModule()->features.hasMultivalue()) { shouldBeTrue(!curr->type.isTuple(), @@ -439,6 +487,17 @@ void FunctionValidator::visitBlock(Block* curr) { } breakInfos.erase(iter); } + switch (getFunction()->profile) { + case IRProfile::Normal: + validateNormalBlockElements(curr); + break; + case IRProfile::Poppy: + validatePoppyBlockElements(curr); + break; + } +} + +void FunctionValidator::validateNormalBlockElements(Block* curr) { if (curr->list.size() > 1) { for (Index i = 0; i < curr->list.size() - 1; i++) { if (!shouldBeTrue( @@ -482,6 +541,45 @@ void FunctionValidator::visitBlock(Block* curr) { } } +void FunctionValidator::validatePoppyBlockElements(Block* curr) { + StackSignature blockSig; + for (size_t i = 0; i < curr->list.size(); ++i) { + Expression* expr = curr->list[i]; + if (!shouldBeTrue( + !expr->is(), expr, "Unexpected top-level pop in block")) { + return; + } + StackSignature sig(expr); + if (!shouldBeTrue(blockSig.composes(sig), + curr, + "block element has incompatible type") && + !info.quiet) { + getStream() << "(on index " << i << ":\n" + << expr << "\n), required: " << sig.params << ", available: "; + if (blockSig.unreachable) { + getStream() << "unreachable, "; + } + getStream() << blockSig.results << "\n"; + return; + } + blockSig += sig; + } + if (curr->type == Type::unreachable) { + shouldBeTrue(blockSig.unreachable, + curr, + "unreachable block should have unreachable element"); + } else { + if (!shouldBeTrue(blockSig.satisfies(Signature(Type::none, curr->type)), + curr, + "block contents should satisfy block type") && + !info.quiet) { + getStream() << "contents: " << blockSig.results + << (blockSig.unreachable ? " [unreachable]" : "") << "\n" + << "expected: " << curr->type << "\n"; + } + } +} + void FunctionValidator::visitLoop(Loop* curr) { if (curr->name.is()) { noteLabelName(curr->name); @@ -1989,6 +2087,10 @@ void FunctionValidator::visitFunction(Function* curr) { shouldBeTrue(features <= getModule()->features, curr, "all used types should be allowed"); + if (curr->profile == IRProfile::Poppy) { + shouldBeTrue( + curr->body->is(), curr->body, "Function body must be a block"); + } // if function has no result, it is ignored // if body is unreachable, it might be e.g. a return shouldBeSubTypeOrFirstIsUnreachable( diff --git a/test/unit/test_poppy_validation.py b/test/unit/test_poppy_validation.py new file mode 100644 index 00000000000..3dc2907d864 --- /dev/null +++ b/test/unit/test_poppy_validation.py @@ -0,0 +1,203 @@ +import os + +from scripts.test import shared +from . import utils + + +class PoppyValidationTest(utils.BinaryenTestCase): + def check_invalid(self, module, error): + p = shared.run_process(shared.WASM_OPT + + ['--experimental-poppy', '--print', '-o', os.devnull], + input=module, check=False, capture_output=True) + self.assertIn(error, p.stderr) + self.assertIn('Fatal: error validating input', p.stderr) + self.assertNotEqual(p.returncode, 0) + + def check_valid(self, module): + p = shared.run_process(shared.WASM_OPT + + ['--experimental-poppy', '--print', '-o', os.devnull], + input=module, check=False, capture_output=True) + self.assertEqual(p.stderr, "") + self.assertEqual(p.returncode, 0) + + def test_top_level_pop(self): + module = ''' + (module + (func $foo (result i32) + (block (result i32) + (i32.const 0) + (i32.pop) + ) + ) + ) + ''' + self.check_invalid(module, "Unexpected top-level pop in block") + + def test_top_level_pop_fixed(self): + module = ''' + (module + (func $foo (result i32) + (block (result i32) + (i32.const 0) + ) + ) + ) + ''' + self.check_valid(module) + + def test_incompatible_type(self): + module = ''' + (module + (func $foo (result i32) + (f32.const 42) + (i32.const 42) + (i32.add + (i32.pop) + (i32.pop) + ) + ) + ) + ''' + self.check_invalid(module, "block element has incompatible type") + self.check_invalid(module, "required: (i32 i32), available: (f32 i32)") + + def test_incorrect_pop_type(self): + module = ''' + (module + (func $foo (result i32) + (i32.const 42) + (i32.const 42) + (i32.add + (i32.pop) + (f32.pop) + ) + ) + ) + ''' + self.check_invalid(module, "binary child types must be equal") + + def test_incompatible_type_fixed(self): + module = ''' + (module + (func $foo (result i32) + (i32.const 42) + (i32.const 42) + (i32.add + (i32.pop) + (i32.pop) + ) + ) + ) + ''' + self.check_valid(module) + + def test_incorrect_block_type(self): + module = ''' + (module + (func $foo (result i32) + (f32.const 42) + (nop) + ) + ) + ''' + self.check_invalid(module, "block contents should satisfy block type") + + def test_nonblock_body(self): + module = ''' + (module + (func $foo (result f32) + (f32.const 42) + ) + ) + ''' + self.check_invalid(module, "Function body must be a block") + + def test_nonpop_if_condition(self): + module = ''' + (module + (func $foo + (nop) + (i32.const 1) + (if + (i32.const 42) + (block) + ) + ) + ) + ''' + self.check_invalid(module, "Expected condition to be a Pop") + + def test_nonblock_if_true(self): + module = ''' + (module + (func $foo + (nop) + (i32.const 1) + (if + (i32.pop) + (nop) + ) + ) + ) + ''' + self.check_invalid(module, "Expected control flow child to be a block") + + def test_nonblock_if_false(self): + module = ''' + (module + (func $foo + (nop) + (i32.const 1) + (if + (i32.pop) + (block) + (nop) + ) + ) + ) + ''' + self.check_invalid(module, "Expected control flow child to be a block") + + def test_nonblock_if_fixed(self): + module = ''' + (module + (func $foo + (nop) + (i32.const 1) + (if + (i32.pop) + (block) + (block) + ) + ) + ) + ''' + self.check_valid(module) + + def test_nonblock_loop_body(self): + module = ''' + (module + (func $foo + (nop) + (loop + (nop) + ) + ) + ) + ''' + self.check_invalid(module, "Expected control flow child to be a block") + + def test_nonpop_child(self): + module = ''' + (module + (func $foo (result i32) + (i32.const 42) + (i32.const 5) + (i32.add + (i32.pop) + (i32.const -1) + ) + ) + ) + ''' + self.check_invalid(module, "Unexpected non-Pop child")