From f05060f078da4529dea3443f73cbd1ae19d0ff21 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Sat, 20 Jun 2020 18:14:11 -0700 Subject: [PATCH 01/27] Add nested pass runner constructor --- src/pass.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/pass.h b/src/pass.h index 8b8e73fa00a..dcb6cdcc132 100644 --- a/src/pass.h +++ b/src/pass.h @@ -153,6 +153,12 @@ struct PassRunner { PassRunner(const PassRunner&) = delete; PassRunner& operator=(const PassRunner&) = delete; + // But we can make it easy to create a nested runner + // TODO: Go through and use this in more places + explicit PassRunner(const PassRunner* runner) + : wasm(runner->wasm), allocator(runner->allocator), + options(runner->options), isNested(true) {} + void setDebug(bool debug) { options.debug = debug; // validate everything by default if debugging From 1b2606ccd410f3ad1698c08257c795344eb19f14 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 12 Jun 2020 18:05:41 -0700 Subject: [PATCH 02/27] Move Stack IR out of default passes This avoids Stack IR from being generated and thrown away in the middle of an optimization pipeline. This change is partially preparation for a future in which tuple lowering is performed as a part of lowering to Stack IR or something similar. In that case, the lowering would add locals and trying to throw away the Stack IR (or similar construct) would become an error. --- src/binaryen-c.cpp | 2 ++ src/pass.h | 5 +++++ src/passes/pass.cpp | 3 +++ src/tools/optimization-options.h | 19 ++++++++++++++++++- src/tools/wasm-opt.cpp | 1 + ...O2_precompute-propagate_print-stack-ir.txt | 11 +++++------ test/passes/O3_inlining.txt | 2 +- test/passes/fannkuch3_manyopts_dwarf.bin.txt | 6 +++--- test/unit/test_stack_ir.py | 4 ++-- 9 files changed, 40 insertions(+), 13 deletions(-) diff --git a/src/binaryen-c.cpp b/src/binaryen-c.cpp index 2201f97bd3b..62ebdd45855 100644 --- a/src/binaryen-c.cpp +++ b/src/binaryen-c.cpp @@ -3454,6 +3454,7 @@ void BinaryenModuleOptimize(BinaryenModuleRef module) { PassRunner passRunner((Module*)module); passRunner.options = globalPassOptions; passRunner.addDefaultOptimizationPasses(); + passRunner.addDefaultPreWritingPasses(); passRunner.run(); } @@ -3706,6 +3707,7 @@ void BinaryenFunctionOptimize(BinaryenFunctionRef func, PassRunner passRunner((Module*)module); passRunner.options = globalPassOptions; passRunner.addDefaultOptimizationPasses(); + passRunner.addDefaultPreWritingPasses(); passRunner.runOnFunction((Function*)func); } void BinaryenFunctionRunPasses(BinaryenFunctionRef func, diff --git a/src/pass.h b/src/pass.h index dcb6cdcc132..de2ed1e4f22 100644 --- a/src/pass.h +++ b/src/pass.h @@ -204,6 +204,11 @@ struct PassRunner { // afterwards. void addDefaultGlobalOptimizationPostPasses(); + // Adds optimizations that should only be run immediately prior to module + // writing. After these passes the module may be in Poppy IR. This is not + // called as part of `addDefaultOptimizationPasses`. + void addDefaultPreWritingPasses(); + // Run the passes on the module void run(); diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 73e5094e85c..cc61c5995df 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -511,6 +511,9 @@ void PassRunner::addDefaultGlobalOptimizationPostPasses() { add("remove-unused-module-elements"); // may allow more inlining/dae/etc., need --converge for that add("directize"); +} + +void PassRunner::addDefaultPreWritingPasses() { // perform Stack IR optimizations here, at the very end of the // optimization pipeline if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) { diff --git a/src/tools/optimization-options.h b/src/tools/optimization-options.h index ef5a7d67a57..bf114af0945 100644 --- a/src/tools/optimization-options.h +++ b/src/tools/optimization-options.h @@ -208,12 +208,29 @@ struct OptimizationOptions : public ToolOptions { if (debug) { passRunner.setDebug(true); } - for (auto& pass : passes) { + // If the last thing to do is inspect the module, do it after the + // pre-writing passes + // TODO: Make this less hacky? + int preWritingIndex = -1; + if (runningDefaultOptimizationPasses()) { + preWritingIndex = passes.size() - 1; + while (preWritingIndex > 0 && + (passes[preWritingIndex] == "print" || + passes[preWritingIndex] == "print-stack-ir" || + passes[preWritingIndex] == "metrics")) { + --preWritingIndex; + } + } + for (int i = 0, size = passes.size(); i < size; ++i) { + auto& pass = passes[i]; if (pass == DEFAULT_OPT_PASSES) { passRunner.addDefaultOptimizationPasses(); } else { passRunner.add(pass); } + if (i == preWritingIndex) { + passRunner.addDefaultPreWritingPasses(); + } } passRunner.run(); } diff --git a/src/tools/wasm-opt.cpp b/src/tools/wasm-opt.cpp index 41302eac8e9..1356863d900 100644 --- a/src/tools/wasm-opt.cpp +++ b/src/tools/wasm-opt.cpp @@ -329,6 +329,7 @@ int main(int argc, const char* argv[]) { } }; runPasses(); + // TODO: Fix this to read the binary at the beginning of every round. if (converge) { // Keep on running passes to convergence, defined as binary // size no longer decreasing. diff --git a/test/passes/O2_precompute-propagate_print-stack-ir.txt b/test/passes/O2_precompute-propagate_print-stack-ir.txt index 24c82559ead..fa6b04e1c14 100644 --- a/test/passes/O2_precompute-propagate_print-stack-ir.txt +++ b/test/passes/O2_precompute-propagate_print-stack-ir.txt @@ -3,17 +3,16 @@ (export "func" (func $0)) (func $0 (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i64) (result i64) (local $4 i32) - (local.set $3 - (i64.const 2147483647) - ) - (nop) - (i64.const 2147483647) + i64.const 2147483647 + local.set $3 + nop + i64.const 2147483647 ) ) (module (type $i32_i32_i32_i64_=>_i64 (func (param i32 i32 i32 i64) (result i64))) (export "func" (func $0)) - (func $0 (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i64) (result i64) + (func $0 (; has Stack IR ;) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i64) (result i64) (local $4 i32) (local.set $3 (i64.const 2147483647) diff --git a/test/passes/O3_inlining.txt b/test/passes/O3_inlining.txt index 79efb08a3a4..b7bc3d54448 100644 --- a/test/passes/O3_inlining.txt +++ b/test/passes/O3_inlining.txt @@ -3,7 +3,7 @@ (memory $0 1 1) (global $global$1 (mut i32) (i32.const 100)) (export "func_217" (func $1)) - (func $1 (param $0 i32) + (func $1 (; has Stack IR ;) (param $0 i32) (if (global.get $global$1) (unreachable) diff --git a/test/passes/fannkuch3_manyopts_dwarf.bin.txt b/test/passes/fannkuch3_manyopts_dwarf.bin.txt index e9029b6b831..4a0c4846d59 100644 --- a/test/passes/fannkuch3_manyopts_dwarf.bin.txt +++ b/test/passes/fannkuch3_manyopts_dwarf.bin.txt @@ -4729,11 +4729,11 @@ file_names[ 4]: (export "__wasm_call_ctors" (func $__wasm_call_ctors)) (export "main" (func $main)) (export "__data_end" (global $global$1)) - (func $__wasm_call_ctors + (func $__wasm_call_ctors (; has Stack IR ;) ;; code offset: 0x3 (nop) ) - (func $fannkuch_worker\28void*\29 (param $0 i32) (result i32) + (func $fannkuch_worker\28void*\29 (; has Stack IR ;) (param $0 i32) (result i32) (local $1 i32) (local $2 i32) (local $3 i32) @@ -5895,7 +5895,7 @@ file_names[ 4]: ;; code offset: 0x38b (local.get $5) ) - (func $main (param $0 i32) (param $1 i32) (result i32) + (func $main (; has Stack IR ;) (param $0 i32) (param $1 i32) (result i32) (local $2 i32) (local $3 i32) (local $4 i32) diff --git a/test/unit/test_stack_ir.py b/test/unit/test_stack_ir.py index 78086d0ccee..6fad67897f3 100644 --- a/test/unit/test_stack_ir.py +++ b/test/unit/test_stack_ir.py @@ -7,8 +7,8 @@ class StackIRTest(utils.BinaryenTestCase): # test that stack IR opts make a difference. def test_stack_ir_opts(self): path = self.input_path('stack_ir.wat') - opt = shared.run_process(shared.WASM_OPT + [path, '-O', '--generate-stack-ir', '--optimize-stack-ir', '--print-stack-ir', '-o', 'a.wasm'], capture_output=True).stdout - nonopt = shared.run_process(shared.WASM_OPT + [path, '-O', '--generate-stack-ir', '--print-stack-ir', '-o', 'b.wasm'], capture_output=True).stdout + opt = shared.run_process(shared.WASM_OPT + [path, '--shrink-level=1', '--generate-stack-ir', '--optimize-stack-ir', '--print-stack-ir', '-o', 'a.wasm'], capture_output=True).stdout + nonopt = shared.run_process(shared.WASM_OPT + [path, '--shrink-level=1', '--generate-stack-ir', '--print-stack-ir', '-o', 'b.wasm'], capture_output=True).stdout # see a difference in the printed stack IR (the optimizations let us # remove a pair of local.set/get) self.assertNotEqual(opt, nonopt) From c4888515d5d05aeccbebdd59a2b2d2438d843e20 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Tue, 21 Apr 2020 17:15:10 -0700 Subject: [PATCH 03/27] Add stacky mode to BinaryenIR As an alternative to StackIR, adds a "stacky" mode to the normal BinaryenIR in which normal instructions are completely flat and have only pops as their children. This means that all instructions in blocks may have arbitrary concrete types, not just the last instructions. The potential benefits of introducing a new mode for Binaryen IR are that many passes will be agnostic to whether they run on the stacky or unstacky code and stacky optimizations might be easier to write. This will require further comparative exploration. The next steps in this direction are: - Perform the actual tuple lowering in Stackify.cpp - Add a new validation mode that checks invariants for stacky code - Testing and fuzzing support - Reimplement the StackIR optimizations on the stacky BinaryenIR - Add checking to the pass runner to ensure passes get only code forms they can handle. No more work will be done on this PR until tuples are made to work with the current StackIR. Then maybe we can look more concretely at the tradeoffs involved. --- src/binaryen-c.cpp | 5 + src/ir/iteration.h | 1 + src/passes/CMakeLists.txt | 1 + src/passes/Precompute.cpp | 11 +++ src/passes/Stackify.cpp | 192 ++++++++++++++++++++++++++++++++++++ src/passes/pass.cpp | 3 + src/passes/passes.h | 1 + src/support/small_vector.h | 3 + src/wasm-interpreter.h | 62 +++++++++--- src/wasm-traversal.h | 2 +- src/wasm.h | 1 + src/wasm/wasm-validator.cpp | 2 +- 12 files changed, 267 insertions(+), 17 deletions(-) create mode 100644 src/passes/Stackify.cpp diff --git a/src/binaryen-c.cpp b/src/binaryen-c.cpp index 62ebdd45855..96fdd7fce66 100644 --- a/src/binaryen-c.cpp +++ b/src/binaryen-c.cpp @@ -3997,6 +3997,11 @@ class CExpressionRunner final Index maxLoopIterations) : ConstantExpressionRunner( module, flags, maxDepth, maxLoopIterations) {} + + bool isStacky() { + WASM_UNREACHABLE("TODO: support stackiness by tracking current function in " + "traversing expression runner"); + } }; } // namespace wasm diff --git a/src/ir/iteration.h b/src/ir/iteration.h index fd275f7490b..a3cb42ce80d 100644 --- a/src/ir/iteration.h +++ b/src/ir/iteration.h @@ -80,6 +80,7 @@ class ChildIterator { Iterator begin() const { return Iterator(*this, 0); } Iterator end() const { return Iterator(*this, children.size()); } + Index size() const { return children.size(); } }; // Returns true if the current expression contains a certain kind of expression, diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index c22e8589ae6..ed31c39f3ef 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -57,6 +57,7 @@ set(passes_SOURCES PrintFeatures.cpp PrintFunctionMap.cpp RoundTrip.cpp + Stackify.cpp StackIR.cpp Strip.cpp StripTargetFeatures.cpp diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index 695b62ab200..14522786e30 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -87,6 +87,17 @@ class PrecomputingExpressionRunner return ConstantExpressionRunner< PrecomputingExpressionRunner>::visitLocalGet(curr); } + + Flow visitPop(Pop* curr) { + if (this->valueStack.empty()) { + return Flow(NONCONSTANT_FLOW); + } + return ConstantExpressionRunner::visitPop( + curr); + } + + // TODO: Accept stacky code? + bool isStacky() { return false; } }; struct Precompute diff --git a/src/passes/Stackify.cpp b/src/passes/Stackify.cpp new file mode 100644 index 00000000000..73fca552bc2 --- /dev/null +++ b/src/passes/Stackify.cpp @@ -0,0 +1,192 @@ +/* + * Copyright 2020 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// TODO: documentation + +#include "ir/properties.h" +#include "pass.h" +#include "wasm-stack.h" + +namespace wasm { + +namespace { + +struct Poppifier : PostWalker { + bool scanned = false; + MixedArena& allocator; + Poppifier(MixedArena& allocator) : allocator(allocator) {} + + static void scan(Poppifier* self, Expression** currp) { + if (!self->scanned) { + self->scanned = true; + PostWalker::scan(self, currp); + } else { + *currp = Builder(self->allocator).makePop((*currp)->type); + } + } + + // Replace the children of `expr` with pops. + void poppify(Expression* expr) { + scanned = false; + walk(expr); + } +}; + +struct Scope { + enum Kind { Func, Block, Loop, If, Else, Try, Catch } kind; + std::vector instrs; + Scope(Kind kind) : kind(kind) {} +}; + +struct Stackifier : BinaryenIRWriter { + MixedArena& allocator; + std::vector scopeStack; + + Stackifier(Function* func, MixedArena& allocator) + : BinaryenIRWriter(func), allocator(allocator) { + // Start with a scope to emit top-level instructions into + scopeStack.emplace_back(Scope::Func); + } + + void patchInstrs(Expression*& expr, const std::vector& instrs) { + if (instrs.size() == 1) { + expr = instrs[0]; + } else if (auto* block = expr->dynCast()) { + block->list.set(instrs); + } else { + expr = Builder(allocator).makeBlock(instrs); + } + } + + void emit(Expression* curr) { + // Control flow structures introduce new scopes. The instructions collected + // for the new scope will be patched back into the original Expression when + // the scope ends. + if (Properties::isControlFlowStructure(curr)) { + Scope::Kind kind; + switch (curr->_id) { + case Expression::BlockId: + kind = Scope::Block; + break; + case Expression::LoopId: + kind = Scope::Loop; + break; + case Expression::IfId: + // The condition has already been emitted + curr->cast()->condition = Builder(allocator).makePop(Type::i32); + kind = Scope::If; + break; + case Expression::TryId: + kind = Scope::Try; + break; + default: + WASM_UNREACHABLE("Unexpected control flow structure"); + } + scopeStack.emplace_back(kind); + } else { + // TODO: Lower tuple instructions + // Replace all children (which have already been emitted) with pops and + // emit the current instruction into the current scope. + Poppifier(allocator).poppify(curr); + scopeStack.back().instrs.push_back(curr); + } + }; + + void emitHeader() {} + + void emitIfElse(If* curr) { + auto& scope = scopeStack.back(); + assert(scope.kind == Scope::If); + patchInstrs(curr->ifTrue, scope.instrs); + scope.instrs.clear(); + scope.kind = Scope::Else; + } + + void emitCatch(Try* curr) { + auto& scope = scopeStack.back(); + assert(scope.kind == Scope::Try); + patchInstrs(curr->body, scope.instrs); + scope.instrs.clear(); + scope.kind = Scope::Catch; + } + + void emitScopeEnd(Expression* curr) { + auto& scope = scopeStack.back(); + switch (scope.kind) { + case Scope::Block: + patchInstrs(curr, scope.instrs); + break; + case Scope::Loop: + patchInstrs(curr->cast()->body, scope.instrs); + break; + case Scope::If: + patchInstrs(curr->cast()->ifTrue, scope.instrs); + break; + case Scope::Else: + patchInstrs(curr->cast()->ifFalse, scope.instrs); + break; + case Scope::Catch: + patchInstrs(curr->cast()->catchBody, scope.instrs); + break; + case Scope::Try: + WASM_UNREACHABLE("try without catch"); + case Scope::Func: + WASM_UNREACHABLE("unexpected end of function"); + } + scopeStack.pop_back(); + scopeStack.back().instrs.push_back(curr); + } + + void emitFunctionEnd() { + auto& scope = scopeStack.back(); + assert(scope.kind == Scope::Func); + // If there is only one instruction, it must already be the body so patching + // it into itself could cause a cycle in the IR. But if there are multiple + // instructions because the top-level expression in the original IR was not + // a block, they need to be injected into a new block. + if (scope.instrs.size() > 1) { + patchInstrs(func->body, scope.instrs); + } else if (scope.instrs.size() == 1) { + assert(func->body == scope.instrs[0] || + (func->body->cast()->list.size() == 1 && + func->body->cast()->list[0] == scope.instrs[0])); + } + } + + void emitUnreachable() { + scopeStack.back().instrs.push_back(Builder(allocator).makeUnreachable()); + } + + void emitDebugLocation(Expression* curr) {} +}; + +} // anonymous namespace + +class StackifyPass : public Pass { + bool isFunctionParallel() override { return true; } + void + runOnFunction(PassRunner* runner, Module* module, Function* func) override { + if (!func->isStacky) { + Stackifier(func, module->allocator).write(); + func->isStacky = true; + } + } + Pass* create() override { return new StackifyPass(); } +}; + +Pass* createStackifyPass() { return new StackifyPass(); } + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index cc61c5995df..fedaf7276e8 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -350,6 +350,9 @@ void PassRegistry::registerPasses() { "ssa-nomerge", "ssa-ify variables so that they have a single assignment, ignoring merges", createSSAifyNoMergePass); + registerPass("stackify", + "Unfold all expressions into a flat stack machine format", + createStackifyPass); registerPass( "strip", "deprecated; same as strip-debug", createStripDebugPass); registerPass("stack-check", diff --git a/src/passes/passes.h b/src/passes/passes.h index 5eb6cef3c23..e98dbc8a956 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -112,6 +112,7 @@ Pass* createSimplifyLocalsNoTeePass(); Pass* createSimplifyLocalsNoStructurePass(); Pass* createSimplifyLocalsNoTeeNoStructurePass(); Pass* createStackCheckPass(); +Pass* createStackifyPass(); Pass* createStripDebugPass(); Pass* createStripDWARFPass(); Pass* createStripProducersPass(); diff --git a/src/support/small_vector.h b/src/support/small_vector.h index 5791a039804..a8816c9e4e6 100644 --- a/src/support/small_vector.h +++ b/src/support/small_vector.h @@ -133,6 +133,9 @@ template class SmallVector { typedef T value_type; typedef long difference_type; typedef T& reference; + typedef T* pointer; + // TODO: Make bidirectional + typedef std::forward_iterator_tag iterator_category; Parent* parent; size_t index; diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 4f0d22e58aa..dc6e2324d40 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -162,6 +162,10 @@ class ExpressionRunner : public OverriddenVisitor { // Maximum iterations before giving up on a loop. Index maxLoopIterations; + // Multivalue support. + // TODO: make this Literal instead of Literals once tuples are split up + std::vector valueStack; + Flow generateArguments(const ExpressionList& operands, LiteralList& arguments) { NOTE_ENTER_("generateArguments"); @@ -177,6 +181,8 @@ class ExpressionRunner : public OverriddenVisitor { return Flow(); } + bool isStacky() { return static_cast(this)->isStacky(); } + public: // Indicates no limit of maxDepth or maxLoopIterations. static const Index NO_LIMIT = 0; @@ -190,6 +196,11 @@ class ExpressionRunner : public OverriddenVisitor { if (maxDepth != NO_LIMIT && depth > maxDepth) { trap("interpreter recursion limit"); } + if (isStacky() && !Properties::isControlFlowStructure(curr)) { + // Reverse the popped values so they come out in the right order + Index numChildren = ChildIterator(curr).size(); + std::reverse(valueStack.end() - numChildren, valueStack.end()); + } auto ret = OverriddenVisitor::visit(curr); if (!ret.breaking()) { Type type = ret.getType(); @@ -202,8 +213,15 @@ class ExpressionRunner : public OverriddenVisitor { } #endif assert(Type::isSubType(type, curr->type)); + if (isStacky() && !curr->is()) { + // TODO: once tuples are lowered away, push results individually + // valueStack.insert(valueStack.cend(), ret.values.begin(), + // ret.values.end()); + valueStack.push_back(ret.values); + } } } + // TODO: handle removing valueStack entries when breaking depth--; return ret; } @@ -1239,7 +1257,13 @@ class ExpressionRunner : public OverriddenVisitor { Flow visitSIMDLoadSplat(SIMDLoad* curr) { WASM_UNREACHABLE("unimp"); } Flow visitSIMDLoadExtend(SIMDLoad* curr) { WASM_UNREACHABLE("unimp"); } Flow visitSIMDLoadZero(SIMDLoad* curr) { WASM_UNREACHABLE("unimp"); } - Flow visitPop(Pop* curr) { WASM_UNREACHABLE("unimp"); } + Flow visitPop(Pop* curr) { + NOTE_ENTER("Pop"); + assert(!valueStack.empty()); + auto ret = this->valueStack.back(); + this->valueStack.pop_back(); + return ret; + } Flow visitRefNull(RefNull* curr) { NOTE_ENTER("RefNull"); return Literal::makeNullref(); @@ -1537,9 +1561,24 @@ class ConstantExpressionRunner : public ExpressionRunner { NOTE_ENTER("SIMDLoadExtend"); return Flow(NONCONSTANT_FLOW); } - Flow visitPop(Pop* curr) { - NOTE_ENTER("Pop"); - return Flow(NONCONSTANT_FLOW); + Flow visitRefNull(RefNull* curr) { + NOTE_ENTER("RefNull"); + return Literal::makeNullref(); + } + Flow visitRefIsNull(RefIsNull* curr) { + NOTE_ENTER("RefIsNull"); + Flow flow = this->visit(curr->value); + if (flow.breaking()) { + return flow; + } + Literal value = flow.getSingleValue(); + NOTE_EVAL1(value); + return Literal(value.type == Type::nullref); + } + Flow visitRefFunc(RefFunc* curr) { + NOTE_ENTER("RefFunc"); + NOTE_NAME(curr->func); + return Literal::makeFuncref(curr->func); } Flow visitTry(Try* curr) { NOTE_ENTER("Try"); @@ -1565,6 +1604,7 @@ class InitializerExpressionRunner : ExpressionRunner>(maxDepth), globals(globals) {} + bool isStacky() { return false; } Flow visitGlobalGet(GlobalGet* curr) { return Flow(globals[curr->name]); } }; @@ -1749,9 +1789,6 @@ template class ModuleInstanceBase { // Values of globals GlobalManager globals; - // Multivalue ABI support (see push/pop). - std::vector multiValues; - ModuleInstanceBase(Module& wasm, ExternalInterface* externalInterface) : wasm(wasm), externalInterface(externalInterface) { // import globals from the outside @@ -1923,6 +1960,8 @@ template class ModuleInstanceBase { : ExpressionRunner(maxDepth), instance(instance), scope(scope) {} + bool isStacky() { return scope.function->isStacky; } + Flow visitCall(Call* curr) { NOTE_ENTER("Call"); NOTE_NAME(curr->target); @@ -2454,17 +2493,10 @@ template class ModuleInstanceBase { try { return this->visit(curr->body); } catch (const WasmException& e) { - instance.multiValues.push_back(e.exn); + this->valueStack.push_back({e.exn}); return this->visit(curr->catchBody); } } - Flow visitPop(Pop* curr) { - NOTE_ENTER("Pop"); - assert(!instance.multiValues.empty()); - auto ret = instance.multiValues.back(); - instance.multiValues.pop_back(); - return ret; - } void trap(const char* why) override { instance.externalInterface->trap(why); diff --git a/src/wasm-traversal.h b/src/wasm-traversal.h index fc6b1cbb89a..8cf496ed697 100644 --- a/src/wasm-traversal.h +++ b/src/wasm-traversal.h @@ -547,7 +547,7 @@ struct UnifiedExpressionVisitor : public Visitor { // template struct Walker : public VisitorType { - // Useful methods for visitor implementions + // Useful methods for visitor implementations // Replace the current node. You can call this in your visit*() methods. // Note that the visit*() for the result node is not called for you (i.e., diff --git a/src/wasm.h b/src/wasm.h index 3d658d3670e..913eb89fb2b 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -1274,6 +1274,7 @@ class Function : public Importable { // stack IR. The Pass system will throw away Stack IR if a pass is run // that declares it may modify Binaryen IR. std::unique_ptr stackIR; + bool isStacky = false; // local names. these are optional. std::map localNames; diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index 4921dafdd35..c20bc768e0e 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -440,7 +440,7 @@ void FunctionValidator::visitBlock(Block* curr) { } breakInfos.erase(iter); } - if (curr->list.size() > 1) { + if (curr->list.size() > 1 && !getFunction()->isStacky) { for (Index i = 0; i < curr->list.size() - 1; i++) { if (!shouldBeTrue( !curr->list[i]->type.isConcrete(), From c3cf70ba8b3b69eff09a6cc5e89d9f7c432f9368 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 22 May 2020 19:03:23 -0700 Subject: [PATCH 04/27] WIP: unstackfy compiles but is untested --- src/passes/CMakeLists.txt | 1 + src/passes/Unstackify.cpp | 285 ++++++++++++++++++++++++++++++++++++++ src/passes/pass.cpp | 3 + src/passes/passes.h | 1 + 4 files changed, 290 insertions(+) create mode 100644 src/passes/Unstackify.cpp diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index ed31c39f3ef..9bfee38ad03 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -81,6 +81,7 @@ set(passes_SOURCES SpillPointers.cpp StackCheck.cpp SSAify.cpp + Unstackify.cpp Untee.cpp Vacuum.cpp ${CMAKE_CURRENT_BINARY_DIR}/WasmIntrinsics.cpp diff --git a/src/passes/Unstackify.cpp b/src/passes/Unstackify.cpp new file mode 100644 index 00000000000..954322351fd --- /dev/null +++ b/src/passes/Unstackify.cpp @@ -0,0 +1,285 @@ +/* + * Copyright 2020 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// TODO: documentation + +#include "ir/iteration.h" +#include "ir/properties.h" +#include "pass.h" +#include "wasm-builder.h" + +namespace wasm { + +namespace { + +struct Unstackifier + : PostWalker> { + using Super = + PostWalker>; + + Unstackifier(Function* func, MixedArena& allocator) + : func(func), allocator(allocator) {} + + // Unstackify `root` in place + void unstackify(Expression* root); + + static void scan(Unstackifier* self, Expression** currp); + static void doPreVisitExpression(Unstackifier* self, Expression** currp); + + void preVisitExpression(Expression* curr); + void visitExpression(Expression* curr); + +private: + Expression* root = nullptr; + Function* func; + MixedArena& allocator; + + std::vector expressionStack; + void pushExpression(Expression* curr); + Expression* popNonVoidExpression(); + Expression* popExpression(); + Expression* popExpression(Type type); + // Expression* popBlockOrSingleton(Type type, unsigned numPops); + + // Keep track of the number of expressions on the stack at the beginning of + // each block so we know how many should be implicitly dropped when we get to + // the end of a block. + std::vector scopeStack; + void startScope(); + size_t endScope(); +}; + +void Unstackifier::pushExpression(Expression* curr) { + if (curr->type.isMulti()) { + // Store tuple to local and push individual extracted values + Builder builder(allocator); + Index tuple = builder.addVar(func, curr->type); + expressionStack.push_back(builder.makeLocalSet(tuple, curr)); + const auto& types = curr->type.expand(); + for (Index i = 0; i < types.size(); ++i) { + expressionStack.push_back( + builder.makeTupleExtract(builder.makeLocalGet(tuple, curr->type), i)); + } + } else { + expressionStack.push_back(curr); + } +} + +Expression* Unstackifier::popExpression() { + // In unreachable code, trying to pop past the polymorphic stack area + // results in receiving unreachables + if (expressionStack.empty()) { + assert(false && "Empty expression stack"); // TODO: determine if dead + return allocator.alloc(); + } + auto* popped = expressionStack.back(); + expressionStack.pop_back(); + assert(!popped->type.isMulti()); + return popped; +} + +Expression* Unstackifier::popNonVoidExpression() { + auto* ret = popExpression(); + if (ret->type != Type::none) { + return ret; + } + + // Pop expressions until we find a non-none typed expression + std::vector expressions; + while (true) { + auto* curr = popExpression(); + if (curr->type == Type::unreachable) { + // The previously popped expressions won't be reached, so we can just + // return this one. + return curr; + } + expressions.push_back(curr); + if (curr->type != Type::none) { + break; + } + } + + // Create a block to hold all the popped expressions + Builder builder(allocator); + auto* block = builder.makeBlock(); + for (auto it = expressions.rbegin(); it != expressions.rend(); ++it) { + block->list.push_back(*it); + } + + // Add a variable to retrieve the concrete value at the end of the block + auto type = block->list[0]->type; + assert(type.isConcrete()); + auto local = builder.addVar(func, type); + block->list[0] = builder.makeLocalSet(local, block->list[0]); + block->list.push_back(builder.makeLocalGet(local, type)); + block->type = type; + return block; +} + +Expression* Unstackifier::popExpression(Type type) { + if (type == Type::none || type == Type::unreachable) { + auto* ret = popExpression(); + assert(Type::isSubType(ret->type, type) || + ret->type == Type::unreachable); + return ret; + } + if (type.isSingle()) { + auto* ret = popNonVoidExpression(); + assert(Type::isSubType(ret->type, type) || + ret->type == Type::unreachable); + return ret; + } + + assert(type.isMulti() && "Unexpected popped type"); + + // Construct a tuple with the correct number of elements + auto numElems = type.size(); + Builder builder(allocator); + std::vector elements; + elements.resize(numElems); + for (size_t i = 0; i < numElems; i++) { + auto* elem = popNonVoidExpression(); + if (elem->type == Type::unreachable) { + // All the previously-popped items cannot be reached, so ignore them. We + // cannot continue popping because there might not be enough items on the + // expression stack after an unreachable expression. Any remaining + // elements can stay unperturbed on the stack and will be explicitly + // dropped when the current block is finished. + return elem; + } + elements[numElems - i - 1] = elem; + } + return Builder(allocator).makeTupleMake(std::move(elements)); +} + +// TODO: properly adapt WasmBinaryBuilder::getBlockOrSingleton +// Expression* Unstackifier::popBlockOrSingleton(Type type, bool withExnRef) { +// } + +void Unstackifier::scan(Unstackifier* self, Expression** currp) { + Super::scan(self, currp); + self->pushTask(Unstackifier::doPreVisitExpression, currp); +} + +void Unstackifier::doPreVisitExpression(Unstackifier* self, + Expression** currp) { + self->preVisitExpression(*currp); +} + +void Unstackifier::startScope() { + scopeStack.push_back(expressionStack.size()); +} + + +size_t Unstackifier::endScope() { + assert(scopeStack.size() > 0); + auto ret = scopeStack.back(); + scopeStack.pop_back(); + return ret; +} + +void Unstackifier::preVisitExpression(Expression* curr) { + if (curr->is()) { + // Record the number of expressions going into the current block + startScope(); + } else if (!Properties::isControlFlowStructure(curr)) { + // Adjust the expression stack so that the child pops are filled in the + // correct order. Otherwise the order of children would be reversed. Control + // flow children are not pops, so do not need to be reversed (except `if` + // conditions, but reversing the single condition would not do anything). + auto numChildren = ChildIterator(curr).size(); + std::reverse(expressionStack.end() - numChildren, expressionStack.end()); + } +} + +void Unstackifier::visitExpression(Expression* curr) { + // Replace pops with their corresponding expressions + if (auto* pop = curr->dynCast()) { + replaceCurrent(popExpression(pop->type)); + return; + } + + // We have finished processing this instruction's children. Manually pull in + // any control flow children that would not have been pops. + if (auto* if_ = curr->dynCast()) { + if (if_->ifFalse) { + if_->ifFalse = popExpression(if_->type); + } + if_->ifTrue = popExpression(if_->type); + } else if (auto* loop = curr->dynCast()) { + loop->body = popExpression(loop->type); + } else if (auto* try_ = curr->dynCast()) { + try_->catchBody = popExpression(try_->type); + try_->body = popExpression(try_->type); + } else if (auto* block = curr->dynCast()) { + // Replace block contents with their unstackified forms + Expression* results = nullptr; + if (block->type.isConcrete()) { + results = popExpression(block->type); + } + + + auto numExpressions = endScope(); + assert(expressionStack.size() >= numExpressions); + block->list.clear(); + for (size_t i = numExpressions; i < expressionStack.size(); ++i) { + auto* item = expressionStack[i]; + // Make implicit drops explicit + if (item->type.isConcrete()) { + item = Builder(allocator).makeDrop(item); + } + block->list.push_back(item); + } + if (results != nullptr) { + block->list.push_back(results); + } + expressionStack.resize(numExpressions); + } + + // This expression has been fully stackified. If we are not done, make it + // available to be the child of a future expression. + if (curr != root) { + pushExpression(curr); + } +} + +void Unstackifier::unstackify(Expression* root) { + // Don't set the function on the parent walker to avoid changing debuginfo + this->root = root; + walk(root); + this->root = nullptr; + + assert(scopeStack.size() == 0); + assert(expressionStack.size() == 0); +} + +} // anonymous namespace + +class UnstackifyPass : public Pass { + bool isFunctionParallel() override { return true; } + void + runOnFunction(PassRunner* runner, Module* module, Function* func) override { + if (func->isStacky) { + Unstackifier(func, module->allocator).unstackify(func->body); + func->isStacky = false; + } + } + Pass* create() override { return new UnstackifyPass(); } +}; + +Pass* createUnstackifyPass() { return new UnstackifyPass(); } + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index fedaf7276e8..e0bfd0dddcc 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -374,6 +374,9 @@ void PassRegistry::registerPasses() { registerPass("trap-mode-js", "replace trapping operations with js semantics", createTrapModeJS); + registerPass("unstackify", + "Fold stackified code into the default AST format", + createUnstackifyPass); registerPass("untee", "removes local.tees, replacing them with sets and gets", createUnteePass); diff --git a/src/passes/passes.h b/src/passes/passes.h index e98dbc8a956..598e479dc31 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -125,6 +125,7 @@ Pass* createSSAifyPass(); Pass* createSSAifyNoMergePass(); Pass* createTrapModeClamp(); Pass* createTrapModeJS(); +Pass* createUnstackifyPass(); Pass* createUnteePass(); Pass* createVacuumPass(); From e5e116576dce841930da92962de71e65c141a5b7 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 18 Jun 2020 23:03:57 -0700 Subject: [PATCH 05/27] Tuple lowering in Stackify --- src/passes/Stackify.cpp | 356 +++++++++++++++++++++++++++++----------- 1 file changed, 261 insertions(+), 95 deletions(-) diff --git a/src/passes/Stackify.cpp b/src/passes/Stackify.cpp index 73fca552bc2..b26f9155ac0 100644 --- a/src/passes/Stackify.cpp +++ b/src/passes/Stackify.cpp @@ -55,123 +55,289 @@ struct Stackifier : BinaryenIRWriter { MixedArena& allocator; std::vector scopeStack; - Stackifier(Function* func, MixedArena& allocator) - : BinaryenIRWriter(func), allocator(allocator) { - // Start with a scope to emit top-level instructions into - scopeStack.emplace_back(Scope::Func); - } + // Maps tuple locals to the new locals that will hold their elements + std::unordered_map> tupleVars; - void patchInstrs(Expression*& expr, const std::vector& instrs) { - if (instrs.size() == 1) { - expr = instrs[0]; - } else if (auto* block = expr->dynCast()) { - block->list.set(instrs); - } else { - expr = Builder(allocator).makeBlock(instrs); - } - } + // Records the scratch local to be used for tuple.extracts of each type + std::unordered_map scratchLocals; - void emit(Expression* curr) { - // Control flow structures introduce new scopes. The instructions collected - // for the new scope will be patched back into the original Expression when - // the scope ends. - if (Properties::isControlFlowStructure(curr)) { - Scope::Kind kind; - switch (curr->_id) { - case Expression::BlockId: - kind = Scope::Block; - break; - case Expression::LoopId: - kind = Scope::Loop; - break; - case Expression::IfId: - // The condition has already been emitted - curr->cast()->condition = Builder(allocator).makePop(Type::i32); - kind = Scope::If; - break; - case Expression::TryId: - kind = Scope::Try; - break; - default: - WASM_UNREACHABLE("Unexpected control flow structure"); - } - scopeStack.emplace_back(kind); - } else { - // TODO: Lower tuple instructions - // Replace all children (which have already been emitted) with pops and - // emit the current instruction into the current scope. - Poppifier(allocator).poppify(curr); - scopeStack.back().instrs.push_back(curr); - } - }; + Stackifier(Function* func, MixedArena& allocator); + + Index getScratchLocal(Type type); + void patchInstrs(Expression*& expr); + // BinaryIRWriter methods + void emit(Expression* curr); void emitHeader() {} + void emitIfElse(If* curr); + void emitCatch(Try* curr); + void emitScopeEnd(Expression* curr); + void emitFunctionEnd(); + void emitUnreachable(); + void emitDebugLocation(Expression* curr) {} - void emitIfElse(If* curr) { - auto& scope = scopeStack.back(); - assert(scope.kind == Scope::If); - patchInstrs(curr->ifTrue, scope.instrs); - scope.instrs.clear(); - scope.kind = Scope::Else; + // Tuple lowering methods + void emitTupleExtract(TupleExtract* curr); + void emitDrop(Drop* curr); + void emitLocalGet(LocalGet* curr); + void emitLocalSet(LocalSet* curr); + void emitGlobalGet(GlobalGet* curr); + void emitGlobalSet(GlobalSet* curr); +}; + +Stackifier::Stackifier(Function* func, MixedArena& allocator) + : BinaryenIRWriter(func), allocator(allocator) { + // Start with a scope to emit top-level instructions into + scopeStack.emplace_back(Scope::Func); + + // Map each tuple local to a set of expanded locals + for (size_t i = 0, end = func->vars.size(); i < end; ++i) { + if (func->vars[i].isMulti()) { + auto& vars = tupleVars[Index(i)]; + for (auto type : func->vars[i].expand()) { + vars.push_back(Builder(allocator).addVar(func, type)); + } + } } +} - void emitCatch(Try* curr) { - auto& scope = scopeStack.back(); - assert(scope.kind == Scope::Try); - patchInstrs(curr->body, scope.instrs); - scope.instrs.clear(); - scope.kind = Scope::Catch; +Index Stackifier::getScratchLocal(Type type) { + // If there is no scratch local for `type`, allocate a new one + auto insert = scratchLocals.insert({type, Index(-1)}); + if (insert.second) { + insert.first->second = Builder(allocator).addVar(func, type); } + return insert.first->second; +} - void emitScopeEnd(Expression* curr) { - auto& scope = scopeStack.back(); - switch (scope.kind) { - case Scope::Block: - patchInstrs(curr, scope.instrs); - break; - case Scope::Loop: - patchInstrs(curr->cast()->body, scope.instrs); +void Stackifier::patchInstrs(Expression*& expr) { + Builder builder(allocator); + auto& instrs = scopeStack.back().instrs; + if (auto* block = expr->dynCast()) { + // Conservatively keep blocks to avoid dropping important labels, but do + // not patch a block into itself, which would otherwise happen when + // emitting if/else or try/catch arms and function bodies. + if (instrs.size() != 1 || instrs[0] != block) { + block->list.set(instrs); + } + } else if (instrs.size() == 1) { + // Otherwise do not insert blocks containing single instructions + assert(expr == instrs[0] && "not actually sure about this"); + expr = instrs[0]; + } else { + expr = builder.makeBlock(instrs, expr->type); + } + instrs.clear(); +} + +void Stackifier::emit(Expression* curr) { + Builder builder(allocator); + // Control flow structures introduce new scopes. The instructions collected + // for the new scope will be patched back into the original Expression when + // the scope ends. + if (Properties::isControlFlowStructure(curr)) { + Scope::Kind kind; + switch (curr->_id) { + case Expression::BlockId: + kind = Scope::Block; break; - case Scope::If: - patchInstrs(curr->cast()->ifTrue, scope.instrs); + case Expression::LoopId: + kind = Scope::Loop; break; - case Scope::Else: - patchInstrs(curr->cast()->ifFalse, scope.instrs); + case Expression::IfId: + // The condition has already been emitted + curr->cast()->condition = builder.makePop(Type::i32); + kind = Scope::If; break; - case Scope::Catch: - patchInstrs(curr->cast()->catchBody, scope.instrs); + case Expression::TryId: + kind = Scope::Try; break; - case Scope::Try: - WASM_UNREACHABLE("try without catch"); - case Scope::Func: - WASM_UNREACHABLE("unexpected end of function"); + default: + WASM_UNREACHABLE("Unexpected control flow structure"); } - scopeStack.pop_back(); + scopeStack.emplace_back(kind); + } else if (curr->is()) { + // Turns into nothing when stackified + return; + } else if (auto* extract = curr->dynCast()) { + emitTupleExtract(extract); + } else if (auto* drop = curr->dynCast()) { + emitDrop(drop); + } else if (auto* get = curr->dynCast()) { + emitLocalGet(get); + } else if (auto* set = curr->dynCast()) { + emitLocalSet(set); + } else if (auto* get = curr->dynCast()) { + emitGlobalGet(get); + } else if (auto* set = curr->dynCast()) { + emitGlobalSet(set); + } else { + // Replace all children (which have already been emitted) with pops and + // emit the current instruction into the current scope. + Poppifier(allocator).poppify(curr); scopeStack.back().instrs.push_back(curr); } +}; - void emitFunctionEnd() { - auto& scope = scopeStack.back(); - assert(scope.kind == Scope::Func); - // If there is only one instruction, it must already be the body so patching - // it into itself could cause a cycle in the IR. But if there are multiple - // instructions because the top-level expression in the original IR was not - // a block, they need to be injected into a new block. - if (scope.instrs.size() > 1) { - patchInstrs(func->body, scope.instrs); - } else if (scope.instrs.size() == 1) { - assert(func->body == scope.instrs[0] || - (func->body->cast()->list.size() == 1 && - func->body->cast()->list[0] == scope.instrs[0])); +void Stackifier::emitIfElse(If* curr) { + auto& scope = scopeStack.back(); + assert(scope.kind == Scope::If); + patchInstrs(curr->ifTrue); + scope.kind = Scope::Else; +} + +void Stackifier::emitCatch(Try* curr) { + auto& scope = scopeStack.back(); + assert(scope.kind == Scope::Try); + patchInstrs(curr->body); + scope.kind = Scope::Catch; +} + +void Stackifier::emitScopeEnd(Expression* curr) { + auto& scope = scopeStack.back(); + switch (scope.kind) { + case Scope::Block: + patchInstrs(curr); + break; + case Scope::Loop: + patchInstrs(curr->cast()->body); + break; + case Scope::If: + patchInstrs(curr->cast()->ifTrue); + break; + case Scope::Else: + patchInstrs(curr->cast()->ifFalse); + break; + case Scope::Catch: + patchInstrs(curr->cast()->catchBody); + break; + case Scope::Try: + WASM_UNREACHABLE("try without catch"); + case Scope::Func: + WASM_UNREACHABLE("unexpected end of function"); + } + scopeStack.pop_back(); + scopeStack.back().instrs.push_back(curr); +} + +void Stackifier::emitFunctionEnd() { + auto& scope = scopeStack.back(); + assert(scope.kind == Scope::Func); + // // If there is only one instruction, it must already be the body so + // patching + // // it into itself could cause a cycle in the IR. But if there are multiple + // // instructions because the top-level expression in the original IR was not + // // a block, they need to be injected into a new block. + // if (scope.instrs.size() > 1) { + // patchInstrs(func->body); + // } else if (scope.instrs.size() == 1) { + // assert(func->body == scope.instrs[0] || + // (func->body->cast()->list.size() == 1 && + // func->body->cast()->list[0] == scope.instrs[0])); + // } + patchInstrs(func->body); +} + +void Stackifier::emitUnreachable() { + Builder builder(allocator); + auto& instrs = scopeStack.back().instrs; + instrs.push_back(builder.makeUnreachable()); +} + +void Stackifier::emitTupleExtract(TupleExtract* curr) { + Builder builder(allocator); + auto& instrs = scopeStack.back().instrs; + const auto& types = curr->tuple->type.expand(); + // Drop all the values after the one we want + for (size_t i = types.size() - 1; i > curr->index; --i) { + instrs.push_back(builder.makeDrop(builder.makePop(types[i]))); + } + // If the extracted value is the only one left, we're done + if (curr->index == 0) { + return; + } + // Otherwise, save it to a scratch local and drop the others values + auto type = types[curr->index]; + Index scratch = getScratchLocal(type); + instrs.push_back(builder.makeLocalSet(scratch, builder.makePop(type))); + for (size_t i = curr->index - 1; i != size_t(-1); --i) { + instrs.push_back(builder.makeDrop(builder.makePop(types[i]))); + } + // Retrieve the saved value + instrs.push_back(builder.makeLocalGet(scratch, type)); +} + +void Stackifier::emitDrop(Drop* curr) { + Builder builder(allocator); + auto& instrs = scopeStack.back().instrs; + if (curr->value->type.isMulti()) { + // Drop each element individually + const auto& types = curr->value->type.expand(); + for (auto it = types.rbegin(), end = types.rend(); it != end; ++it) { + instrs.push_back(builder.makeDrop(builder.makePop(*it))); } + } else { + curr->value = builder.makePop(curr->value->type); + instrs.push_back(curr); } +} - void emitUnreachable() { - scopeStack.back().instrs.push_back(Builder(allocator).makeUnreachable()); +void Stackifier::emitLocalGet(LocalGet* curr) { + Builder builder(allocator); + auto& instrs = scopeStack.back().instrs; + if (curr->type.isMulti()) { + const auto& types = curr->type.expand(); + const auto& elems = tupleVars[curr->index]; + for (size_t i = 0; i < types.size(); ++i) { + instrs.push_back(builder.makeLocalGet(elems[i], types[i])); + } + } else { + instrs.push_back(curr); } +} - void emitDebugLocation(Expression* curr) {} -}; +void Stackifier::emitLocalSet(LocalSet* curr) { + Builder builder(allocator); + auto& instrs = scopeStack.back().instrs; + if (curr->value->type.isMulti()) { + const auto& types = curr->value->type.expand(); + const auto& elems = tupleVars[curr->index]; + // Add the unconditional sets + for (size_t i = types.size() - 1; i >= 1; --i) { + instrs.push_back( + builder.makeLocalSet(elems[i], builder.makePop(types[i]))); + } + if (curr->isTee()) { + // Use a tee followed by gets to retrieve the tuple + instrs.push_back( + builder.makeLocalTee(elems[0], builder.makePop(types[0]), types[0])); + for (size_t i = 1; i < types.size(); ++i) { + instrs.push_back(builder.makeLocalGet(elems[i], types[i])); + } + } else { + // Otherwise just add the last set + instrs.push_back( + builder.makeLocalSet(elems[0], builder.makePop(types[0]))); + } + } else { + curr->value = builder.makePop(curr->value->type); + instrs.push_back(curr); + } +} + +void Stackifier::emitGlobalGet(GlobalGet* curr) { + auto& instrs = scopeStack.back().instrs; + assert(!curr->type.isMulti() && "not implemented for tuples"); + instrs.push_back(curr); +} + +void Stackifier::emitGlobalSet(GlobalSet* curr) { + Builder builder(allocator); + auto& instrs = scopeStack.back().instrs; + assert(!curr->value->type.isMulti() && "not implemented for tuples"); + curr->value = builder.makePop(curr->value->type); + instrs.push_back(curr); +} } // anonymous namespace From 91ede54c9587c25115d55ee965714907f0e0d4a2 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 19 Jun 2020 00:13:00 -0700 Subject: [PATCH 06/27] Properly handle subtypes in Unstackify --- src/passes/Unstackify.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/passes/Unstackify.cpp b/src/passes/Unstackify.cpp index 954322351fd..84dbaa86cb9 100644 --- a/src/passes/Unstackify.cpp +++ b/src/passes/Unstackify.cpp @@ -132,14 +132,12 @@ Expression* Unstackifier::popNonVoidExpression() { Expression* Unstackifier::popExpression(Type type) { if (type == Type::none || type == Type::unreachable) { auto* ret = popExpression(); - assert(Type::isSubType(ret->type, type) || - ret->type == Type::unreachable); + assert(Type::isSubType(ret->type, type) || ret->type == Type::unreachable); return ret; } if (type.isSingle()) { auto* ret = popNonVoidExpression(); - assert(Type::isSubType(ret->type, type) || - ret->type == Type::unreachable); + assert(Type::isSubType(ret->type, type) || ret->type == Type::unreachable); return ret; } @@ -183,7 +181,6 @@ void Unstackifier::startScope() { scopeStack.push_back(expressionStack.size()); } - size_t Unstackifier::endScope() { assert(scopeStack.size() > 0); auto ret = scopeStack.back(); @@ -231,7 +228,6 @@ void Unstackifier::visitExpression(Expression* curr) { results = popExpression(block->type); } - auto numExpressions = endScope(); assert(expressionStack.size() >= numExpressions); block->list.clear(); From 767e8004ff0d6b9e15b2e889a73f0d7ce07bdba3 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 19 Jun 2020 00:16:01 -0700 Subject: [PATCH 07/27] Add stack dce and unused block opts --- src/passes/CMakeLists.txt | 2 ++ src/passes/StackDCE.cpp | 38 ++++++++++++++++++++++++++ src/passes/StackRemoveBlocks.cpp | 46 ++++++++++++++++++++++++++++++++ src/passes/pass.cpp | 6 +++++ src/passes/passes.h | 2 ++ 5 files changed, 94 insertions(+) create mode 100644 src/passes/StackDCE.cpp create mode 100644 src/passes/StackRemoveBlocks.cpp diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 9bfee38ad03..6cb9ee64bec 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -58,7 +58,9 @@ set(passes_SOURCES PrintFunctionMap.cpp RoundTrip.cpp Stackify.cpp + StackDCE.cpp StackIR.cpp + StackRemoveBlocks.cpp Strip.cpp StripTargetFeatures.cpp RedundantSetElimination.cpp diff --git a/src/passes/StackDCE.cpp b/src/passes/StackDCE.cpp new file mode 100644 index 00000000000..6c562846a7e --- /dev/null +++ b/src/passes/StackDCE.cpp @@ -0,0 +1,38 @@ +/* + * Copyright 2020 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// TODO: documentation + +#include "pass.h" + +namespace wasm { + +struct StackDCEPass : public WalkerPass> { + bool isFunctionParallel() override { return true; } + Pass* create() override { return new StackDCEPass; } + void visitBlock(Block* curr) { + for (size_t i = 0, size = curr->list.size(); i < size; ++i) { + if (curr->list[i]->type == Type::unreachable) { + curr->list.resize(i + 1); + return; + } + } + } +}; + +Pass* createStackDCEPass() { return new StackDCEPass(); } + +} // namespace wasm diff --git a/src/passes/StackRemoveBlocks.cpp b/src/passes/StackRemoveBlocks.cpp new file mode 100644 index 00000000000..717d1f28932 --- /dev/null +++ b/src/passes/StackRemoveBlocks.cpp @@ -0,0 +1,46 @@ +/* + * Copyright 2020 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// TODO: documentation + +#include "ir/branch-utils.h" +#include "pass.h" + +namespace wasm { + +struct StackRemoveBlocksPass + : public WalkerPass> { + bool isFunctionParallel() override { return true; } + Pass* create() override { return new StackRemoveBlocksPass; } + void visitBlock(Block* curr) { + for (size_t i = 0; i < curr->list.size(); ++i) { + if (auto* block = curr->list[i]->dynCast()) { + if (!BranchUtils::BranchSeeker::has(block, block->name)) { + // TODO: implement `insert` on arena vectors directly + std::vector insts(curr->list.begin(), curr->list.end()); + insts.erase(insts.begin() + i); + insts.insert( + insts.begin() + i, block->list.begin(), block->list.end()); + curr->list.set(insts); + } + } + } + } +}; + +Pass* createStackRemoveBlocksPass() { return new StackRemoveBlocksPass(); } + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index e0bfd0dddcc..252766edd2d 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -353,6 +353,12 @@ void PassRegistry::registerPasses() { registerPass("stackify", "Unfold all expressions into a flat stack machine format", createStackifyPass); + registerPass("stack-dce", + "Remove unreachable code in a stackified module", + createStackDCEPass); + registerPass("stack-remove-blocks", + "Remove unneeded blocks in a stackified module", + createStackRemoveBlocksPass); registerPass( "strip", "deprecated; same as strip-debug", createStripDebugPass); registerPass("stack-check", diff --git a/src/passes/passes.h b/src/passes/passes.h index 598e479dc31..bc150df3b54 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -113,6 +113,8 @@ Pass* createSimplifyLocalsNoStructurePass(); Pass* createSimplifyLocalsNoTeeNoStructurePass(); Pass* createStackCheckPass(); Pass* createStackifyPass(); +Pass* createStackDCEPass(); +Pass* createStackRemoveBlocksPass(); Pass* createStripDebugPass(); Pass* createStripDWARFPass(); Pass* createStripProducersPass(); From b4ca906980917b529d0d96fe66dc4e19743298d7 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 19 Jun 2020 10:20:14 -0700 Subject: [PATCH 08/27] minimal validator change --- src/wasm/wasm-validator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index c20bc768e0e..63b78e5c34d 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -454,7 +454,7 @@ void FunctionValidator::visitBlock(Block* curr) { } } } - if (curr->list.size() > 0) { + if (curr->list.size() > 0 && !getFunction()->isStacky) { auto backType = curr->list.back()->type; if (!curr->type.isConcrete()) { shouldBeFalse(backType.isConcrete(), From 30b53c0c12b55b8b2d711ef728c4e608d643b2f6 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 19 Jun 2020 10:20:53 -0700 Subject: [PATCH 09/27] Experimental fuzzer integration --- scripts/fuzz_opt.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index bf1f7f40933..fce580c481c 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -828,6 +828,7 @@ def write_commands(commands, filename): ["--simplify-locals-notee"], ["--simplify-locals-notee-nostructure"], ["--ssa"], + ["--stackify", "--unstackify"], ["--vacuum"], ] @@ -851,6 +852,10 @@ def randomize_opt_flags(): if random.random() < 0.5: pos = random.randint(0, len(flag_groups)) flag_groups = flag_groups[:pos] + [['--roundtrip']] + flag_groups[pos:] + # maybe add an extra stackification round trip + if True: + pos = random.randint(0, len(flag_groups)) + flag_groups = flag_groups[:pos] + [['--stackify', '--unstackify']] + flag_groups[pos:] ret = [flag for group in flag_groups for flag in group] # modifiers (if not already implied by a -O? option) if '-O' not in str(ret): From 7e9e07d548e10e8a458eeb7156428ab73a4eff62 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 19 Jun 2020 22:05:17 -0700 Subject: [PATCH 10/27] Passes for stackifying locals and drops --- src/ir/CMakeLists.txt | 1 + src/ir/StackUtils.cpp | 88 +++++++++++++++++++++++++++++++++ src/ir/stack-utils.h | 91 +++++++++++++++++++++++++++++++++++ src/passes/CMakeLists.txt | 2 + src/passes/StackifyDrops.cpp | 57 ++++++++++++++++++++++ src/passes/StackifyLocals.cpp | 81 +++++++++++++++++++++++++++++++ src/passes/pass.cpp | 6 +++ src/passes/passes.h | 2 + test/multivalue.wast | 20 ++++---- 9 files changed, 338 insertions(+), 10 deletions(-) create mode 100644 src/ir/StackUtils.cpp create mode 100644 src/ir/stack-utils.h create mode 100644 src/passes/StackifyDrops.cpp create mode 100644 src/passes/StackifyLocals.cpp diff --git a/src/ir/CMakeLists.txt b/src/ir/CMakeLists.txt index 70a57a4ab31..d9a36588f40 100644 --- a/src/ir/CMakeLists.txt +++ b/src/ir/CMakeLists.txt @@ -4,6 +4,7 @@ set(ir_SOURCES ExpressionManipulator.cpp LocalGraph.cpp ReFinalize.cpp + StackUtils.cpp ${ir_HEADERS} ) add_library(ir OBJECT ${ir_SOURCES}) diff --git a/src/ir/StackUtils.cpp b/src/ir/StackUtils.cpp new file mode 100644 index 00000000000..ae84c9ec347 --- /dev/null +++ b/src/ir/StackUtils.cpp @@ -0,0 +1,88 @@ +/* + * Copyright 2020 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "stack-utils.h" + +namespace wasm { + +namespace StackUtils { + +void compact(Block* block) { + size_t newIndex = 0; + for (size_t i = 0, size = block->list.size(); i < size; ++i) { + if (!block->list[i]->is()) { + block->list[newIndex++] = block->list[i]; + } + } + block->list.resize(newIndex); +} + +Signature getStackSignature(Expression* curr) { + Type input = Type::none; + if (Properties::isControlFlowStructure(curr)) { + if (curr->is()) { + input = Type::i32; + } + } else { + std::vector inputs; + for (auto* child : ChildIterator(curr)) { + const auto& types = child->type.expand(); + inputs.insert(inputs.end(), types.begin(), types.end()); + } + input = Type(inputs); + } + return Signature(input, curr->type); +} + +StackFlow::StackFlow(Block* curr) { + std::vector values; + bool unreachable = false; + for (auto* expr : curr->list) { + size_t consumed = getStackSignature(expr).params.size(); + size_t produced = expr->type == Type::unreachable ? 0 : expr->type.size(); + srcs[expr] = std::vector(consumed); + dests[expr] = std::vector(produced); + if (unreachable) { + continue; + } + assert(consumed <= values.size() && "block parameters not implemented"); + for (Index i = 0; i < consumed; ++i) { + auto& src = values[values.size() - consumed + i]; + srcs[expr][i] = src; + dests[src.expr][src.index] = {expr, i}; + } + values.resize(values.size() - consumed); + for (Index i = 0; i < produced; ++i) { + values.push_back({expr, i}); + } + if (expr->type == Type::unreachable) { + unreachable = true; + } + } + // If the end of the block can be reached, set the block as the destination + // for its return values + if (!unreachable) { + assert(values.size() == curr->type.size()); + for (Index i = 0, size = values.size(); i < size; ++i) { + auto& loc = values[i]; + dests[loc.expr][loc.index] = {curr, i}; + } + } +} + +} // namespace StackUtils + +} // namespace wasm diff --git a/src/ir/stack-utils.h b/src/ir/stack-utils.h new file mode 100644 index 00000000000..47d7a3a6856 --- /dev/null +++ b/src/ir/stack-utils.h @@ -0,0 +1,91 @@ +/* + * Copyright 2020 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_ir_stack_h +#define wasm_ir_stack_h + +#include "ir/properties.h" +#include "wasm.h" + +namespace wasm { + +namespace StackUtils { + +// Iterate through `block` and remove nops. +void compact(Block* block); + +// Compute the input and output types of an instruction. If the instruction does +// not return, the output type will be `unreachable`. +Signature getStackSignature(Expression* curr); + +// Compute the input and output types of a sequence of instructions. If an +// instruction in the sequence does not return, the input type will represent +// all values consumed up to that point and the output type will be +// `unreachable`. +template +Signature getStackSignature(InputIt first, InputIt last) { + // The input types for this region, constructed in reverse + std::vector inputs; + std::vector stack; + for (InputIt expr = first; expr != last; ++expr) { + auto sig = getStackSignature(*expr); + const auto& params = sig.params.expand(); + for (auto type = params.rbegin(); type != params.rend(); ++type) { + if (stack.size() == 0) { + inputs.push_back(*type); + } else { + assert(stack.back() == *type); + stack.pop_back(); + } + } + if (sig.results == Type::unreachable) { + std::reverse(inputs.begin(), inputs.end()); + return Signature(Type(inputs), Type::unreachable); + } + for (auto type : sig.results.expand()) { + stack.push_back(type); + } + } + std::reverse(inputs.begin(), inputs.end()); + return Signature(Type(inputs), Type(stack)); +} + +// Calculates stack machine data flow, associating the sources and destinations +// of all values in a block. +struct StackFlow { + // Either an input location or an output location. An input location + // represents the `index`th input into instruction `expr` and an ouput + // location represents the `index`th output from instruction `expr`. + struct Location { + Expression* expr; + Index index; + }; + + // Maps each instruction to the set of output locations supplying its inputs. + std::unordered_map> srcs; + + // Maps each instruction to the set of input locations consuming its results. + std::unordered_map> dests; + + // Calculates `srcs` and `dests`. + StackFlow(Block* curr); +}; + +} // namespace StackUtils + +} // namespace wasm + +#endif // wasm_ir_stack_h diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 6cb9ee64bec..6c7c0d92dea 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -58,6 +58,8 @@ set(passes_SOURCES PrintFunctionMap.cpp RoundTrip.cpp Stackify.cpp + StackifyDrops.cpp + StackifyLocals.cpp StackDCE.cpp StackIR.cpp StackRemoveBlocks.cpp diff --git a/src/passes/StackifyDrops.cpp b/src/passes/StackifyDrops.cpp new file mode 100644 index 00000000000..84b791ca194 --- /dev/null +++ b/src/passes/StackifyDrops.cpp @@ -0,0 +1,57 @@ +/* + * Copyright 2020 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// TODO: documentation + +#include "ir/effects.h" +#include "ir/stack-utils.h" +#include "pass.h" + +namespace wasm { + +struct StackifyDropsPass : public WalkerPass> { + bool isFunctionParallel() override { return true; } + Pass* create() override { return new StackifyDropsPass; } + + void visitBlock(Block* curr) { + StackUtils::StackFlow flow(curr); + for (auto* expr : curr->list) { + auto& dests = flow.dests[expr]; + bool allDrops = std::all_of( + dests.begin(), dests.end(), [](StackUtils::StackFlow::Location& loc) { + return loc.expr == nullptr || loc.expr->is(); + }); + if (!allDrops) { + continue; + } + if (EffectAnalyzer(getPassOptions(), getModule()->features, expr) + .hasSideEffects()) { + continue; + } + ExpressionManipulator::nop(expr); + for (auto& loc : dests) { + if (loc.expr != nullptr) { + ExpressionManipulator::nop(loc.expr); + } + } + } + StackUtils::compact(curr); + } +}; + +Pass* createStackifyDropsPass() { return new StackifyDropsPass(); } + +} // namespace wasm diff --git a/src/passes/StackifyLocals.cpp b/src/passes/StackifyLocals.cpp new file mode 100644 index 00000000000..daf78c7dc5c --- /dev/null +++ b/src/passes/StackifyLocals.cpp @@ -0,0 +1,81 @@ +/* + * Copyright 2020 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// TODO: documentation + +#include "ir/local-graph.h" +#include "ir/stack-utils.h" +#include "pass.h" + +namespace wasm { + +struct StackifyLocalsPass : public WalkerPass> { + bool isFunctionParallel() override { return true; } + Pass* create() override { return new StackifyLocalsPass; } + + std::unique_ptr localGraph; + + void doWalkFunction(Function* curr) { + localGraph = std::make_unique(curr); + localGraph->computeInfluences(); + super::doWalkFunction(curr); + } + + void visitBlock(Block* curr) { + // Maps sets in this block to their indexes in the block list + std::unordered_map localSets; + for (Index i = 0; i < curr->list.size(); ++i) { + if (auto* set = curr->list[i]->dynCast()) { + localSets.emplace(set, i); + continue; + } + if (auto* get = curr->list[i]->dynCast()) { + // Check that there is a single set providing this value + auto& sets = localGraph->getSetses[get]; + if (sets.size() != 1) { + continue; + } + auto* set = *sets.begin(); + // Check that the set is in this block. + auto it = localSets.find(set); + if (it == localSets.end()) { + continue; + } + auto setIndex = it->second; + // Check that the intervening instructions are stack neutral + auto sig = StackUtils::getStackSignature( + curr->list.begin() + setIndex + 1, curr->list.begin() + i); + if (sig != Signature()) { + continue; + } + // Check how many uses there are to determine how to optimize + auto& gets = localGraph->setInfluences[set]; + if (gets.size() == 1) { + assert(*gets.begin() == get); + ExpressionManipulator::nop(set); + ExpressionManipulator::nop(get); + } else { + // TODO: still remove the get, but make the `set` a `tee` + } + } + } + StackUtils::compact(curr); + } +}; + +Pass* createStackifyLocalsPass() { return new StackifyLocalsPass(); } + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 252766edd2d..f97790ff0b2 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -353,6 +353,12 @@ void PassRegistry::registerPasses() { registerPass("stackify", "Unfold all expressions into a flat stack machine format", createStackifyPass); + registerPass("stackify-locals", + "Remove unnecessary local accesses in a stackified module", + createStackifyLocalsPass); + registerPass("stackify-drops", + "Remove dropped values from a stackified module", + createStackifyDropsPass); registerPass("stack-dce", "Remove unreachable code in a stackified module", createStackDCEPass); diff --git a/src/passes/passes.h b/src/passes/passes.h index bc150df3b54..1fd6ae28dc6 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -113,6 +113,8 @@ Pass* createSimplifyLocalsNoStructurePass(); Pass* createSimplifyLocalsNoTeeNoStructurePass(); Pass* createStackCheckPass(); Pass* createStackifyPass(); +Pass* createStackifyLocalsPass(); +Pass* createStackifyDropsPass(); Pass* createStackDCEPass(); Pass* createStackRemoveBlocksPass(); Pass* createStripDebugPass(); diff --git a/test/multivalue.wast b/test/multivalue.wast index 01e1237f37f..5055e3ba814 100644 --- a/test/multivalue.wast +++ b/test/multivalue.wast @@ -52,16 +52,16 @@ ) ) - ;; Test multivalue globals - (func $global (result i32 i64) - (global.set $g1 - (tuple.make - (i32.const 42) - (i64.const 7) - ) - ) - (global.get $g2) - ) + ;; ;; Test multivalue globals + ;; (func $global (result i32 i64) + ;; (global.set $g1 + ;; (tuple.make + ;; (i32.const 42) + ;; (i64.const 7) + ;; ) + ;; ) + ;; (global.get $g2) + ;; ) ;; Test lowering of multivalue drops (func $drop-call From 88b9826146d19916b77aae48496c2054e972309e Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Sat, 20 Jun 2020 17:52:51 -0700 Subject: [PATCH 11/27] Stackify tuples --- src/passes/Stackify.cpp | 82 ++++++++++++++++++++++++++++++----------- test/multivalue.wast | 20 +++++----- 2 files changed, 70 insertions(+), 32 deletions(-) diff --git a/src/passes/Stackify.cpp b/src/passes/Stackify.cpp index b26f9155ac0..0785af195c9 100644 --- a/src/passes/Stackify.cpp +++ b/src/passes/Stackify.cpp @@ -24,6 +24,10 @@ namespace wasm { namespace { +Name getGlobalElem(Name global, Index i) { + return Name(std::string(global.c_str()) + '$' + std::to_string(i)); +} + struct Poppifier : PostWalker { bool scanned = false; MixedArena& allocator; @@ -223,18 +227,6 @@ void Stackifier::emitScopeEnd(Expression* curr) { void Stackifier::emitFunctionEnd() { auto& scope = scopeStack.back(); assert(scope.kind == Scope::Func); - // // If there is only one instruction, it must already be the body so - // patching - // // it into itself could cause a cycle in the IR. But if there are multiple - // // instructions because the top-level expression in the original IR was not - // // a block, they need to be injected into a new block. - // if (scope.instrs.size() > 1) { - // patchInstrs(func->body); - // } else if (scope.instrs.size() == 1) { - // assert(func->body == scope.instrs[0] || - // (func->body->cast()->list.size() == 1 && - // func->body->cast()->list[0] == scope.instrs[0])); - // } patchInstrs(func->body); } @@ -326,22 +318,35 @@ void Stackifier::emitLocalSet(LocalSet* curr) { } void Stackifier::emitGlobalGet(GlobalGet* curr) { + Builder builder(allocator); auto& instrs = scopeStack.back().instrs; - assert(!curr->type.isMulti() && "not implemented for tuples"); - instrs.push_back(curr); + if (curr->type.isMulti()) { + const auto& types = curr->type.expand(); + for (Index i = 0; i < types.size(); ++i) { + instrs.push_back( + builder.makeGlobalGet(getGlobalElem(curr->name, i), types[i])); + } + } else { + instrs.push_back(curr); + } } void Stackifier::emitGlobalSet(GlobalSet* curr) { Builder builder(allocator); auto& instrs = scopeStack.back().instrs; - assert(!curr->value->type.isMulti() && "not implemented for tuples"); - curr->value = builder.makePop(curr->value->type); - instrs.push_back(curr); + if (curr->value->type.isMulti()) { + const auto& types = curr->value->type.expand(); + for (Index i = types.size(); i > 0; --i) { + instrs.push_back(builder.makeGlobalSet(getGlobalElem(curr->name, i - 1), + builder.makePop(types[i - 1]))); + } + } else { + curr->value = builder.makePop(curr->value->type); + instrs.push_back(curr); + } } -} // anonymous namespace - -class StackifyPass : public Pass { +class StackifyFunctionsPass : public Pass { bool isFunctionParallel() override { return true; } void runOnFunction(PassRunner* runner, Module* module, Function* func) override { @@ -350,9 +355,42 @@ class StackifyPass : public Pass { func->isStacky = true; } } - Pass* create() override { return new StackifyPass(); } + Pass* create() override { return new StackifyFunctionsPass(); } +}; + +} // anonymous namespace + +class StackifyPass : public Pass { + void run(PassRunner* runner, Module* module) { + lowerTupleGlobals(module); + PassRunner subRunner(runner); + subRunner.add(std::make_unique()); + subRunner.run(); + } + + void lowerTupleGlobals(Module* module) { + Builder builder(*module); + for (int g = module->globals.size() - 1; g >= 0; --g) { + auto& global = *module->globals[g]; + if (global.type.isMulti()) { + assert(!global.imported()); + const auto& types = global.type.expand(); + for (Index i = 0; i < types.size(); ++i) { + Expression* init = global.init == nullptr + ? nullptr + : global.init->cast()->operands[i]; + auto mutability = + global.mutable_ ? Builder::Mutable : Builder::Immutable; + auto* elem = builder.makeGlobal( + getGlobalElem(global.name, i), types[i], init, mutability); + module->addGlobal(elem); + } + module->removeGlobal(global.name); + } + } + } }; -Pass* createStackifyPass() { return new StackifyPass(); } +Pass* createStackifyPass() { return new StackifyPass; } } // namespace wasm diff --git a/test/multivalue.wast b/test/multivalue.wast index 5055e3ba814..01e1237f37f 100644 --- a/test/multivalue.wast +++ b/test/multivalue.wast @@ -52,16 +52,16 @@ ) ) - ;; ;; Test multivalue globals - ;; (func $global (result i32 i64) - ;; (global.set $g1 - ;; (tuple.make - ;; (i32.const 42) - ;; (i64.const 7) - ;; ) - ;; ) - ;; (global.get $g2) - ;; ) + ;; Test multivalue globals + (func $global (result i32 i64) + (global.set $g1 + (tuple.make + (i32.const 42) + (i64.const 7) + ) + ) + (global.get $g2) + ) ;; Test lowering of multivalue drops (func $drop-call From 5f6715f9c27f5253644945ccee568096324fcbd6 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Sun, 21 Jun 2020 19:36:01 -0700 Subject: [PATCH 12/27] Misc fixes and BINARYEN_USE_STACKIFY --- src/passes/Print.cpp | 4 ++-- src/passes/StackDCE.cpp | 12 +++++++++++- src/passes/StackifyDrops.cpp | 1 + src/passes/StackifyLocals.cpp | 9 +++++---- src/passes/pass.cpp | 20 ++++++++++++++++++-- src/wasm-stack.h | 7 ++++++- src/wasm/wasm-validator.cpp | 2 +- 7 files changed, 44 insertions(+), 11 deletions(-) diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index d90e7f95835..b20b20b7a45 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2621,12 +2621,12 @@ WasmPrinter::printStackIR(StackIR* ir, std::ostream& o, Function* func) { } switch (inst->op) { case StackInst::Basic: { - doIndent(); // Pop is a pseudo instruction and should not be printed in the stack IR // format to make it valid wat form. if (inst->origin->is()) { - break; + continue; } + doIndent(); PrintExpressionContents(func, o).visit(inst->origin); break; } diff --git a/src/passes/StackDCE.cpp b/src/passes/StackDCE.cpp index 6c562846a7e..64bc8d39706 100644 --- a/src/passes/StackDCE.cpp +++ b/src/passes/StackDCE.cpp @@ -16,6 +16,7 @@ // TODO: documentation +#include "ir/properties.h" #include "pass.h" namespace wasm { @@ -26,7 +27,16 @@ struct StackDCEPass : public WalkerPass> { void visitBlock(Block* curr) { for (size_t i = 0, size = curr->list.size(); i < size; ++i) { if (curr->list[i]->type == Type::unreachable) { - curr->list.resize(i + 1); + if (Properties::isControlFlowStructure(curr->list[i])) { + // Conservatively keep the following `unreachable` for proper typing + // TODO: Add a pass to re-type blocks and remove these unreachables + assert(i + 1 < curr->list.size() && + curr->list[i + 1]->is() && + "Expected an unreachable after unreachable control flow"); + curr->list.resize(i + 2); + } else { + curr->list.resize(i + 1); + } return; } } diff --git a/src/passes/StackifyDrops.cpp b/src/passes/StackifyDrops.cpp index 84b791ca194..d6afdb80077 100644 --- a/src/passes/StackifyDrops.cpp +++ b/src/passes/StackifyDrops.cpp @@ -37,6 +37,7 @@ struct StackifyDropsPass : public WalkerPass> { if (!allDrops) { continue; } + // TODO: downgrade tees to gets if their values aren't used if (EffectAnalyzer(getPassOptions(), getModule()->features, expr) .hasSideEffects()) { continue; diff --git a/src/passes/StackifyLocals.cpp b/src/passes/StackifyLocals.cpp index daf78c7dc5c..1f61e30193d 100644 --- a/src/passes/StackifyLocals.cpp +++ b/src/passes/StackifyLocals.cpp @@ -39,10 +39,11 @@ struct StackifyLocalsPass : public WalkerPass> { std::unordered_map localSets; for (Index i = 0; i < curr->list.size(); ++i) { if (auto* set = curr->list[i]->dynCast()) { - localSets.emplace(set, i); - continue; - } - if (auto* get = curr->list[i]->dynCast()) { + // Tees can't be stackified, so ignore them + if (!set->isTee()) { + localSets.emplace(set, i); + } + } else if (auto* get = curr->list[i]->dynCast()) { // Check that there is a single set providing this value auto& sets = localGraph->getSetses[get]; if (sets.size() != 1) { diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index f97790ff0b2..6ec87c34dab 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -535,8 +535,24 @@ void PassRunner::addDefaultPreWritingPasses() { // perform Stack IR optimizations here, at the very end of the // optimization pipeline if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) { - add("generate-stack-ir"); - add("optimize-stack-ir"); + char* useStackifyVar = getenv("BINARYEN_USE_STACKIFY"); + int useStackify = useStackifyVar ? atoi(useStackifyVar) : 0; + if (useStackify) { + std::cerr << "Using new stackify pipeline\n"; + add("stackify"); + add("stack-dce"); + if (options.optimizeLevel >= 3 || options.shrinkLevel >= 1) { + add("stackify-locals"); + } + add("stack-remove-blocks"); + add("stack-dce"); + // Generate stack IR so print-stack-ir tests can't tell the difference + // add("generate-stack-ir"); + } else { + std::cerr << "Using old StackIR pipeline\n"; + add("generate-stack-ir"); + add("optimize-stack-ir"); + } } } diff --git a/src/wasm-stack.h b/src/wasm-stack.h index d8061822513..03fff2df1ca 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -851,7 +851,12 @@ class BinaryenIRToBinaryWriter } writer.emitFunctionEnd(); } - void emitUnreachable() { writer.emitUnreachable(); } + void emitUnreachable() { + // Stacky code already explicitly contains these guard unreachables + if (!func->isStacky) { + writer.emitUnreachable(); + } + } void emitDebugLocation(Expression* curr) { if (sourceMap) { parent.writeDebugLocation(curr, func); diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index 63b78e5c34d..20081657b52 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -428,7 +428,7 @@ void FunctionValidator::visitBlock(Block* curr) { } shouldBeTrue( info.arity != BreakInfo::PoisonArity, curr, "break arities must match"); - if (curr->list.size() > 0) { + if (curr->list.size() > 0 && !getFunction()->isStacky) { auto last = curr->list.back()->type; if (last == Type::none) { shouldBeTrue(info.arity == Index(0), From a1fdfa01260b9497fd2299a22d5566197b801537 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Mon, 22 Jun 2020 00:12:16 -0700 Subject: [PATCH 13/27] Add future pipeline and downgrade tees --- src/passes/StackifyDrops.cpp | 9 +++++++++ src/passes/pass.cpp | 14 ++++++++++++-- test/multivalue.wast | 4 +++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/passes/StackifyDrops.cpp b/src/passes/StackifyDrops.cpp index d6afdb80077..3362a1c31a2 100644 --- a/src/passes/StackifyDrops.cpp +++ b/src/passes/StackifyDrops.cpp @@ -37,6 +37,15 @@ struct StackifyDropsPass : public WalkerPass> { if (!allDrops) { continue; } + if (auto* set = expr->dynCast()) { + if (set->isTee()) { + set->makeSet(); + if (auto* drop = dests.begin()->expr) { + ExpressionManipulator::nop(drop); + } + } + continue; + } // TODO: downgrade tees to gets if their values aren't used if (EffectAnalyzer(getPassOptions(), getModule()->features, expr) .hasSideEffects()) { diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 6ec87c34dab..a2d62372594 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -537,8 +537,8 @@ void PassRunner::addDefaultPreWritingPasses() { if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) { char* useStackifyVar = getenv("BINARYEN_USE_STACKIFY"); int useStackify = useStackifyVar ? atoi(useStackifyVar) : 0; - if (useStackify) { - std::cerr << "Using new stackify pipeline\n"; + if (useStackify == 1) { + std::cerr << "Using new stackify pipeline to match Stack IR\n"; add("stackify"); add("stack-dce"); if (options.optimizeLevel >= 3 || options.shrinkLevel >= 1) { @@ -548,6 +548,16 @@ void PassRunner::addDefaultPreWritingPasses() { add("stack-dce"); // Generate stack IR so print-stack-ir tests can't tell the difference // add("generate-stack-ir"); + } else if (useStackify >= 2) { + std::cerr << "Using new stackify pipeline\n"; + add("stackify"); + add("stack-dce"); + add("stack-remove-blocks"); + add("stack-dce"); + add("stackify-drops"); + add("stackify-locals"); + // TODO: Coalesce-locals doesn't remove unused locals from the function + add("coalesce-locals"); } else { std::cerr << "Using old StackIR pipeline\n"; add("generate-stack-ir"); diff --git a/test/multivalue.wast b/test/multivalue.wast index 01e1237f37f..c94cb638e26 100644 --- a/test/multivalue.wast +++ b/test/multivalue.wast @@ -1,7 +1,9 @@ (module (import "env" "pair" (func $pair (result i32 i64))) + (import "env" "triple" (func $triple-imported (result i32 i64 f32))) (global $g1 (mut (i32 i64)) (tuple.make (i32.const 0) (i64.const 0))) (global $g2 (i32 i64) (tuple.make (i32.const 0) (i64.const 0))) + (export "reverse" (func $reverse (result f32 i64 i32))) ;; Test basic lowering of tuple.make, tuple.extract, and tuple variables (func $triple (result i32 i64 f32) (tuple.make @@ -28,7 +30,7 @@ (func $reverse (result f32 i64 i32) (local $x (i32 i64 f32)) (local.set $x - (call $triple) + (call $triple-imported) ) (tuple.make (tuple.extract 2 From 7c1912f7dcdffc75504f7ac84355799f2adf9bdd Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 25 Jun 2020 01:18:55 -0700 Subject: [PATCH 14/27] Add StackifyCodeSizeChecker to fuzzer --- scripts/fuzz_opt.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index fce580c481c..5ec54b97215 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -68,9 +68,11 @@ def random_size(): return random.randint(INPUT_SIZE_MIN, 2 * INPUT_SIZE_MEAN - INPUT_SIZE_MIN) -def run(cmd): +def run(cmd, env_extension={}): + env = dict(os.environ) + env.update(env_extension) print(' '.join(cmd)) - return subprocess.check_output(cmd, text=True) + return subprocess.check_output(cmd, text=True, env=env) def run_unchecked(cmd): @@ -689,6 +691,30 @@ def do_asyncify(wasm): def can_run_on_feature_opts(self, feature_opts): return all([x in feature_opts for x in ['--disable-exception-handling', '--disable-simd', '--disable-tail-call', '--disable-reference-types', '--disable-multivalue']]) +class StackifyCodeSizeChecker(TestCaseHandler): + frequency = 1.0 + + def handle(self, wasm): + stack_ir = 'stackir.wasm' + stackify = 'stackify.wasm' + stackify_future = 'stackify-future.wasm' + + stack_ir_command = [in_bin('wasm-opt'), wasm, '--generate-stack-ir', '--optimize-stack-ir', '--optimize-level=3', '-o', stack_ir] + stackify_command = [in_bin('wasm-opt'), wasm, '--stackify', '--stack-dce', '--stackify-locals', '--stack-remove-blocks', '--stack-dce', '--coalesce-locals', '--optimize-level=3', '-o', stackify] + # stackify_future_command = [in_bin('wasm-opt'), wasm, '--stackify', '--stack-dce', '--stackify-locals', '--stack-remove-blocks', '--stack-dce', '--optimize-level=3'] + + run(stack_ir_command, {'BINARYEN_USE_STACKIFY': '0'}) + run(stackify_command, {'BINARYEN_USE_STACKIFY': '1'}) + # run(stackify_future_command, {'BINARYEN_USE_STACKIFY': '2'}) + + stack_ir_size = os.stat(stack_ir).st_size + stackify_size = os.stat(stackify).st_size + # stackify_future_size = os.stat(stackify_future).st_size + + print("sizes:", stack_ir_size, stackify_size) + assert stackify_size <= stack_ir_size, "Stackify not as good as Stack IR" + # assert stackify_future_size <= stackify_size, "New pipeline is not as good" + # The global list of all test case handlers testcase_handlers = [ @@ -697,6 +723,7 @@ def can_run_on_feature_opts(self, feature_opts): CheckDeterminism(), Wasm2JS(), Asyncify(), + StackifyCodeSizeChecker(), ] From 05bb8d70da2c5d870425fba37d62f6a61885fdab Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 25 Jun 2020 01:22:31 -0700 Subject: [PATCH 15/27] StackSignature --- src/ir/StackUtils.cpp | 68 ++++++++++++++++++++++++++++++++++------ src/ir/stack-utils.h | 72 +++++++++++++++++++++++-------------------- 2 files changed, 98 insertions(+), 42 deletions(-) diff --git a/src/ir/StackUtils.cpp b/src/ir/StackUtils.cpp index ae84c9ec347..92e28ddfb40 100644 --- a/src/ir/StackUtils.cpp +++ b/src/ir/StackUtils.cpp @@ -30,28 +30,78 @@ void compact(Block* block) { block->list.resize(newIndex); } -Signature getStackSignature(Expression* curr) { - Type input = Type::none; - if (Properties::isControlFlowStructure(curr)) { - if (curr->is()) { - input = Type::i32; +StackSignature::StackSignature(Expression* expr) { + params = Type::none; + if (Properties::isControlFlowStructure(expr)) { + if (expr->is()) { + params = Type::i32; } } else { std::vector inputs; - for (auto* child : ChildIterator(curr)) { + for (auto* child : ChildIterator(expr)) { + // Children might be tuple pops, so expand their types const auto& types = child->type.expand(); inputs.insert(inputs.end(), types.begin(), types.end()); } - input = Type(inputs); + params = Type(inputs); } - return Signature(input, curr->type); + if (expr->type == Type::unreachable) { + unreachable = true; + results = Type::none; + } else { + unreachable = false; + results = expr->type; + } +} + +bool StackSignature::composes(const StackSignature& next) const { + // TODO + return true; +} + +StackSignature& StackSignature::operator+=(const StackSignature& next) { + assert(composes(next)); + std::vector stack = results.expand(); + size_t required = next.params.size(); + // Consume stack values according to next's parameters + if (stack.size() >= required) { + stack.resize(stack.size() - required); + } else { + if (!unreachable) { + const std::vector& currParams = params.expand(); + const std::vector& nextParams = next.params.expand(); + // Prepend the unsatisfied params of `next` to the current params + size_t unsatisfied = required - stack.size(); + std::vector newParams(nextParams.begin(), + nextParams.begin() + unsatisfied); + newParams.insert(newParams.end(), currParams.begin(), currParams.end()); + params = Type(newParams); + } + stack.clear(); + } + // Add stack values according to next's results + if (next.unreachable) { + results = next.results; + unreachable = true; + } else { + const auto& nextResults = next.results.expand(); + stack.insert(stack.end(), nextResults.begin(), nextResults.end()); + results = Type(stack); + } + return *this; +} + +StackSignature StackSignature::operator+(const StackSignature& next) const { + StackSignature sig = *this; + sig += next; + return sig; } StackFlow::StackFlow(Block* curr) { std::vector values; bool unreachable = false; for (auto* expr : curr->list) { - size_t consumed = getStackSignature(expr).params.size(); + size_t consumed = StackSignature(expr).params.size(); size_t produced = expr->type == Type::unreachable ? 0 : expr->type.size(); srcs[expr] = std::vector(consumed); dests[expr] = std::vector(produced); diff --git a/src/ir/stack-utils.h b/src/ir/stack-utils.h index 47d7a3a6856..6d865e9e748 100644 --- a/src/ir/stack-utils.h +++ b/src/ir/stack-utils.h @@ -18,6 +18,7 @@ #define wasm_ir_stack_h #include "ir/properties.h" +#include "wasm-type.h" #include "wasm.h" namespace wasm { @@ -27,41 +28,46 @@ namespace StackUtils { // Iterate through `block` and remove nops. void compact(Block* block); -// Compute the input and output types of an instruction. If the instruction does -// not return, the output type will be `unreachable`. -Signature getStackSignature(Expression* curr); - -// Compute the input and output types of a sequence of instructions. If an -// instruction in the sequence does not return, the input type will represent -// all values consumed up to that point and the output type will be -// `unreachable`. -template -Signature getStackSignature(InputIt first, InputIt last) { - // The input types for this region, constructed in reverse - std::vector inputs; - std::vector stack; - for (InputIt expr = first; expr != last; ++expr) { - auto sig = getStackSignature(*expr); - const auto& params = sig.params.expand(); - for (auto type = params.rbegin(); type != params.rend(); ++type) { - if (stack.size() == 0) { - inputs.push_back(*type); - } else { - assert(stack.back() == *type); - stack.pop_back(); - } - } - if (sig.results == Type::unreachable) { - std::reverse(inputs.begin(), inputs.end()); - return Signature(Type(inputs), Type::unreachable); - } - for (auto type : sig.results.expand()) { - stack.push_back(type); +// Stack signatures are like regular function signatures, but they are used to +// represent the stack parameters and results of arbitrary sequences of stacky +// instructions. They have to record whether they cover an unreachable +// instruction because their composition takes into account the polymorphic +// results of unreachable instructions. +struct StackSignature { + Type params; + Type results; + bool unreachable; + + StackSignature() + : params(Type::none), results(Type::none), unreachable(false) {} + StackSignature(Type params, Type results, bool unreachable = false) + : params(params), results(results), unreachable(unreachable) {} + + StackSignature(const StackSignature&) = default; + StackSignature& operator=(const StackSignature&) = default; + + // Get the stack signature of `expr` + explicit StackSignature(Expression* expr); + + // Get the stack signature of the range of instructions [first, last). The + // sequence of instructions is assumed to be valid, i.e. their signatures + // compose. + template + explicit StackSignature(InputIt first, InputIt last) + : params(Type::none), results(Type::none), unreachable(false) { + // TODO: It would be more efficient to build the params in reverse order + while (first != last) { + *this += StackSignature(*first++); } } - std::reverse(inputs.begin(), inputs.end()); - return Signature(Type(inputs), Type(stack)); -} + + // Return `true` iff `next` composes after this stack signature. + bool composes(const StackSignature& next) const; + + // Compose stack signatures. Assumes they actually compose. + StackSignature& operator+=(const StackSignature& next); + StackSignature operator+(const StackSignature& next) const; +}; // Calculates stack machine data flow, associating the sources and destinations // of all values in a block. From 8621e37575c2ae7e2396c21b5342478899463d05 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 25 Jun 2020 01:26:56 -0700 Subject: [PATCH 16/27] Stacky IRProfile finalization and validation --- src/ir/ReFinalize.cpp | 12 ++++- src/ir/utils.h | 10 +++- src/wasm-builder.h | 17 ++++--- src/wasm-interpreter.h | 2 +- src/wasm-stack.h | 2 +- src/wasm.h | 6 ++- src/wasm/wasm-validator.cpp | 11 +++-- src/wasm/wasm.cpp | 98 ++++++++++++++++++++++--------------- 8 files changed, 101 insertions(+), 57 deletions(-) diff --git a/src/ir/ReFinalize.cpp b/src/ir/ReFinalize.cpp index 822fc5808ce..ee2fe0a7ca6 100644 --- a/src/ir/ReFinalize.cpp +++ b/src/ir/ReFinalize.cpp @@ -16,6 +16,7 @@ #include "ir/branch-utils.h" #include "ir/find_all.h" +#include "ir/stack-utils.h" #include "ir/utils.h" namespace wasm { @@ -46,7 +47,16 @@ void ReFinalize::visitBlock(Block* curr) { } // Get the least upper bound type of the last element and all branch return // values - curr->type = curr->list.back()->type; + if (profile == IRProfile::Normal) { + curr->type = curr->list.back()->type; + } else { + assert(profile == IRProfile::Stacky); + StackUtils::StackSignature sig(curr->list.begin(), curr->list.end()); + curr->type = sig.results; + if (curr->type == Type::none) { + curr->type = sig.unreachable ? Type::unreachable : Type::none; + } + } if (curr->name.is()) { auto iter = breakValues.find(curr->name); if (iter != breakValues.end()) { diff --git a/src/ir/utils.h b/src/ir/utils.h index 17669959110..d0cfd660487 100644 --- a/src/ir/utils.h +++ b/src/ir/utils.h @@ -104,7 +104,11 @@ struct ReFinalize Pass* create() override { return new ReFinalize; } - ReFinalize() { name = "refinalize"; } + IRProfile profile; + + ReFinalize(IRProfile profile = IRProfile::Normal) : profile(profile) { + name = "refinalize"; + } // block finalization is O(bad) if we do each block by itself, so do it in // bulk, tracking break value types so we just do a linear pass @@ -179,7 +183,9 @@ struct ReFinalize // Re-finalize a single node. This is slow, if you want to refinalize // an entire ast, use ReFinalize struct ReFinalizeNode : public OverriddenVisitor { - void visitBlock(Block* curr) { curr->finalize(); } + IRProfile profile; + ReFinalizeNode(IRProfile profile = IRProfile::Normal) : profile(profile) {} + void visitBlock(Block* curr) { curr->finalize(profile); } void visitIf(If* curr) { curr->finalize(); } void visitLoop(Loop* curr) { curr->finalize(); } void visitBreak(Break* curr) { curr->finalize(); } diff --git a/src/wasm-builder.h b/src/wasm-builder.h index 9ee98949a85..a193248bc05 100644 --- a/src/wasm-builder.h +++ b/src/wasm-builder.h @@ -35,10 +35,13 @@ struct NameType { class Builder { MixedArena& allocator; + IRProfile profile; public: - Builder(MixedArena& allocator) : allocator(allocator) {} - Builder(Module& wasm) : allocator(wasm.allocator) {} + Builder(MixedArena& allocator, IRProfile profile = IRProfile::Normal) + : allocator(allocator), profile(profile) {} + Builder(Module& wasm, IRProfile profile = IRProfile::Normal) + : allocator(wasm.allocator), profile(profile) {} // make* functions, other globals @@ -94,20 +97,20 @@ class Builder { auto* ret = allocator.alloc(); if (first) { ret->list.push_back(first); - ret->finalize(); + ret->finalize(profile); } return ret; } Block* makeBlock(Name name, Expression* first = nullptr) { auto* ret = makeBlock(first); ret->name = name; - ret->finalize(); + ret->finalize(profile); return ret; } Block* makeBlock(const std::vector& items) { auto* ret = allocator.alloc(); ret->list.set(items); - ret->finalize(); + ret->finalize(profile); return ret; } Block* makeBlock(const std::vector& items, Type type) { @@ -119,7 +122,7 @@ class Builder { Block* makeBlock(const ExpressionList& items) { auto* ret = allocator.alloc(); ret->list.set(items); - ret->finalize(); + ret->finalize(profile); return ret; } Block* makeBlock(const ExpressionList& items, Type type) { @@ -132,7 +135,7 @@ class Builder { auto* ret = allocator.alloc(); ret->name = name; ret->list.set(items); - ret->finalize(); + ret->finalize(profile); return ret; } Block* makeBlock(Name name, const ExpressionList& items, Type type) { diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index dc6e2324d40..2c9a46b802d 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -1960,7 +1960,7 @@ template class ModuleInstanceBase { : ExpressionRunner(maxDepth), instance(instance), scope(scope) {} - bool isStacky() { return scope.function->isStacky; } + bool isStacky() { return scope.function->profile == IRProfile::Stacky; } Flow visitCall(Call* curr) { NOTE_ENTER("Call"); diff --git a/src/wasm-stack.h b/src/wasm-stack.h index 03fff2df1ca..2edb99249c7 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -853,7 +853,7 @@ class BinaryenIRToBinaryWriter } void emitUnreachable() { // Stacky code already explicitly contains these guard unreachables - if (!func->isStacky) { + if (func->profile == IRProfile::Normal) { writer.emitUnreachable(); } } diff --git a/src/wasm.h b/src/wasm.h index 913eb89fb2b..b8b8e79b0b9 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -62,6 +62,8 @@ struct Address { } }; +enum class IRProfile { Normal, Stacky }; + // Operators enum UnaryOp { @@ -626,7 +628,7 @@ class Block : public SpecificExpression { // set the type purely based on its contents. this scans the block, so it is // not fast. - void finalize(); + void finalize(IRProfile profile = IRProfile::Normal); // set the type given you know its type, which is the case when parsing // s-expression or binary, as explicit types are given. the only additional @@ -1274,7 +1276,7 @@ class Function : public Importable { // stack IR. The Pass system will throw away Stack IR if a pass is run // that declares it may modify Binaryen IR. std::unique_ptr stackIR; - bool isStacky = false; + IRProfile profile = IRProfile::Normal; // local names. these are optional. std::map localNames; diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index 20081657b52..9eb5f84d23d 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -388,6 +388,7 @@ void FunctionValidator::noteLabelName(Name name) { } void FunctionValidator::visitBlock(Block* curr) { + // TODO: factor out and implement stacky validation if (!getModule()->features.hasMultivalue()) { shouldBeTrue(!curr->type.isMulti(), curr, @@ -428,7 +429,8 @@ void FunctionValidator::visitBlock(Block* curr) { } shouldBeTrue( info.arity != BreakInfo::PoisonArity, curr, "break arities must match"); - if (curr->list.size() > 0 && !getFunction()->isStacky) { + if (curr->list.size() > 0 && + getFunction()->profile != IRProfile::Stacky) { auto last = curr->list.back()->type; if (last == Type::none) { shouldBeTrue(info.arity == Index(0), @@ -440,7 +442,7 @@ void FunctionValidator::visitBlock(Block* curr) { } breakInfos.erase(iter); } - if (curr->list.size() > 1 && !getFunction()->isStacky) { + if (curr->list.size() > 1 && getFunction()->profile != IRProfile::Stacky) { for (Index i = 0; i < curr->list.size() - 1; i++) { if (!shouldBeTrue( !curr->list[i]->type.isConcrete(), @@ -454,7 +456,7 @@ void FunctionValidator::visitBlock(Block* curr) { } } } - if (curr->list.size() > 0 && !getFunction()->isStacky) { + if (curr->list.size() > 0 && getFunction()->profile != IRProfile::Stacky) { auto backType = curr->list.back()->type; if (!curr->type.isConcrete()) { shouldBeFalse(backType.isConcrete(), @@ -2094,7 +2096,8 @@ static void validateBinaryenIR(Module& wasm, ValidationInfo& info) { // check if a node type is 'stale', i.e., we forgot to finalize() the // node. auto oldType = curr->type; - ReFinalizeNode().visit(curr); + auto profile = getFunction() ? getFunction()->profile : IRProfile::Normal; + ReFinalizeNode(profile).visit(curr); auto newType = curr->type; if (newType != oldType) { // We accept concrete => undefined, diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index be3ab6ccca9..9911e03c810 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -16,6 +16,7 @@ #include "wasm.h" #include "ir/branch-utils.h" +#include "ir/stack-utils.h" #include "wasm-printing.h" #include "wasm-traversal.h" @@ -232,13 +233,48 @@ Literals getLiteralsFromConstExpression(Expression* curr) { // core AST type checking +static void finalizeNormalBlock(Block* block) { + if (block->list.size() == 0) { + block->type = Type::none; + return; + } + // normally the type is the type of the final child and even if we have an + // unreachable child somewhere, we still mark ourselves as having that type. + // (block (result i32) + // (return) + // (i32.const 10) + // ) + block->type = block->list.back()->type; + // The exception is for blocks that produce no values, which are + // unreachable if any child is unreachable. + if (block->type == Type::none) { + for (auto* child : block->list) { + if (child->type == Type::unreachable) { + block->type = Type::unreachable; + break; + } + } + } +} + +static void finalizeStackyBlock(Block* block) { + // Similar rules apply to stacky blocks, although the type they produce + // is not just the type of their last instruction. + StackUtils::StackSignature sig(block->list.begin(), block->list.end()); + block->type = sig.results; + if (block->type == Type::none) { + block->type = sig.unreachable ? Type::unreachable : Type::none; + } +} + struct TypeSeeker : public PostWalker { Expression* target; // look for this one Name targetName; + IRProfile profile; std::vector types; - TypeSeeker(Expression* target, Name targetName) - : target(target), targetName(targetName) { + TypeSeeker(Expression* target, Name targetName, IRProfile profile) + : target(target), targetName(targetName), profile(profile) { Expression* temp = target; walk(temp); } @@ -268,11 +304,13 @@ struct TypeSeeker : public PostWalker { void visitBlock(Block* curr) { if (curr == target) { - if (curr->list.size() > 0) { - types.push_back(curr->list.back()->type); + if (profile == IRProfile::Normal) { + finalizeNormalBlock(curr); } else { - types.push_back(Type::none); + assert(profile == IRProfile::Stacky); + finalizeStackyBlock(curr); } + types.push_back(curr->type); } else if (curr->name == targetName) { // ignore all breaks til now, they were captured by someone with the same // name @@ -324,42 +362,24 @@ static void handleUnreachable(Block* block, } } -void Block::finalize() { - if (!name.is()) { - if (list.size() > 0) { - // nothing branches here, so this is easy - // normally the type is the type of the final child - type = list.back()->type; - // and even if we have an unreachable child somewhere, - // we still mark ourselves as having that type, - // (block (result i32) - // (return) - // (i32.const 10) - // ) - if (type.isConcrete()) { - return; - } - // if we are unreachable, we are done - if (type == Type::unreachable) { - return; - } - // we may still be unreachable if we have an unreachable - // child - for (auto* child : list) { - if (child->type == Type::unreachable) { - type = Type::unreachable; - return; - } - } - } else { - type = Type::none; +void Block::finalize(IRProfile profile) { + if (name.is()) { + TypeSeeker seeker(this, this->name, profile); + type = Type::mergeTypes(seeker.types); + handleUnreachable(this); + } else { + // nothing branches here, so this is easy + switch (profile) { + case IRProfile::Normal: + finalizeNormalBlock(this); + break; + case IRProfile::Stacky: + finalizeStackyBlock(this); + break; + default: + WASM_UNREACHABLE("Unknown profile"); } - return; } - - TypeSeeker seeker(this, this->name); - type = Type::mergeTypes(seeker.types); - handleUnreachable(this); } void Block::finalize(Type type_) { From 0a253ad9c1fb8298924a7bf275a5a23a870b42c4 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 25 Jun 2020 01:24:57 -0700 Subject: [PATCH 17/27] Handle stacky opts in CoalesceLocals --- src/cfg/liveness-traversal.h | 12 ++++++++++-- src/ir/manipulation.h | 8 ++++++++ src/passes/CoalesceLocals.cpp | 31 +++++++++++++++++++------------ 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/src/cfg/liveness-traversal.h b/src/cfg/liveness-traversal.h index 6deab2fd65a..125b2aa1f10 100644 --- a/src/cfg/liveness-traversal.h +++ b/src/cfg/liveness-traversal.h @@ -104,6 +104,7 @@ struct LivenessWalker : public CFGWalker { typename CFGWalker::BasicBlock BasicBlock; Index numLocals; + IRProfile profile; std::unordered_set liveBlocks; // canonicalized - accesses should check (low, high) // TODO: use a map for high N, as this tends to be sparse? or don't look at @@ -118,6 +119,7 @@ struct LivenessWalker : public CFGWalker { auto* curr = (*currp)->cast(); // if in unreachable code, ignore if (!self->currBasicBlock) { + // TODO: This probably needs to be adjusted for Stacky code as well *currp = Builder(*self->getModule()).replaceWithIdenticalType(curr); return; } @@ -131,9 +133,14 @@ struct LivenessWalker : public CFGWalker { // if it has side effects) if (!self->currBasicBlock) { if (curr->isTee()) { - *currp = curr->value; + if (self->profile == IRProfile::Normal) { + *currp = curr->value; + } else { + assert(self->profile == IRProfile::Stacky); + ExpressionManipulator::nop(curr); + } } else { - *currp = Builder(*self->getModule()).makeDrop(curr->value); + ExpressionManipulator::drop(curr, curr->value); } return; } @@ -190,6 +197,7 @@ struct LivenessWalker : public CFGWalker { void doWalkFunction(Function* func) { numLocals = func->getNumLocals(); + profile = func->profile; assert(canRun(func)); copies.resize(numLocals * numLocals); std::fill(copies.begin(), copies.end(), 0); diff --git a/src/ir/manipulation.h b/src/ir/manipulation.h index 49ed7e11eb4..daac60322f9 100644 --- a/src/ir/manipulation.h +++ b/src/ir/manipulation.h @@ -40,6 +40,14 @@ template inline Nop* nop(InputType* target) { return ret; } +template +inline Drop* drop(InputType* target, Expression* value) { + auto* ret = convert(target); + ret->value = value; + ret->finalize(); + return ret; +} + template inline RefNull* refNull(InputType* target) { auto* ret = convert(target); ret->finalize(); diff --git a/src/passes/CoalesceLocals.cpp b/src/passes/CoalesceLocals.cpp index e5416971b47..5accc8f9369 100644 --- a/src/passes/CoalesceLocals.cpp +++ b/src/passes/CoalesceLocals.cpp @@ -67,7 +67,9 @@ struct CoalesceLocals // returns a vector of oldIndex => newIndex virtual void pickIndices(std::vector& indices); - void applyIndices(std::vector& indices, Expression* root); + void applyIndices(std::vector& indices, + Expression* root, + IRProfile profile); // interference state @@ -109,7 +111,7 @@ void CoalesceLocals::doWalkFunction(Function* func) { std::vector indices; pickIndices(indices); // apply indices - applyIndices(indices, func->body); + applyIndices(indices, func->body, func->profile); } // A copy on a backedge can be especially costly, forcing us to branch just to @@ -360,7 +362,8 @@ void CoalesceLocals::pickIndices(std::vector& indices) { } void CoalesceLocals::applyIndices(std::vector& indices, - Expression* root) { + Expression* root, + IRProfile profile) { assert(indices.size() == numLocals); for (auto& curr : basicBlocks) { auto& actions = curr->contents.actions; @@ -381,16 +384,20 @@ void CoalesceLocals::applyIndices(std::vector& indices, } // remove ineffective actions if (!action.effective) { - // value may have no side effects, further optimizations can eliminate - // it - *action.origin = set->value; - if (!set->isTee()) { - // we need to drop it - Drop* drop = ExpressionManipulator::convert(set); - drop->value = *action.origin; - *action.origin = drop; + if (set->isTee()) { + if (profile == IRProfile::Normal) { + // value may have no side effects, further optimizations can + // eliminate it + *action.origin = set->value; + } else { + // pass the value through unchanged + assert(profile == IRProfile::Stacky); + ExpressionManipulator::nop(set); + } + } else { + // we need to drop the value to continue consuming it + ExpressionManipulator::drop(set, set->value); } - continue; } } } From 3b43f60edd560e516f5e25ce71d69bcc37cf10e4 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 25 Jun 2020 01:28:03 -0700 Subject: [PATCH 18/27] Do not count the body signature as separate --- src/ir/module-utils.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/ir/module-utils.h b/src/ir/module-utils.h index 8f89c780fdd..72d9175df01 100644 --- a/src/ir/module-utils.h +++ b/src/ir/module-utils.h @@ -402,20 +402,22 @@ collectSignatures(Module& wasm, struct TypeCounter : PostWalker> { Counts& counts; + Function* func; - TypeCounter(Counts& counts) : counts(counts) {} + TypeCounter(Counts& counts, Function* func) + : counts(counts), func(func) {} void visitExpression(Expression* curr) { if (auto* call = curr->dynCast()) { counts[call->sig]++; } else if (Properties::isControlFlowStructure(curr)) { // TODO: Allow control flow to have input types as well - if (curr->type.isMulti()) { + if (curr->type.isMulti() && curr != func->body) { counts[Signature(Type::none, curr->type)]++; } } } }; - TypeCounter(counts).walk(func->body); + TypeCounter(counts, func).walk(func->body); }; ModuleUtils::ParallelFunctionAnalysis analysis(wasm, updateCounts); From 6a3dc53676b7ed35d63fed0b290c8ecea19041fe Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 25 Jun 2020 01:30:19 -0700 Subject: [PATCH 19/27] Misc improvements --- src/passes/StackDCE.cpp | 22 ++++++++++---- src/passes/StackIR.cpp | 6 +++- src/passes/StackRemoveBlocks.cpp | 4 ++- src/passes/Stackify.cpp | 52 ++++++++++++++------------------ src/passes/StackifyLocals.cpp | 6 ++-- src/passes/Unstackify.cpp | 4 +-- 6 files changed, 51 insertions(+), 43 deletions(-) diff --git a/src/passes/StackDCE.cpp b/src/passes/StackDCE.cpp index 64bc8d39706..e99212b2ec5 100644 --- a/src/passes/StackDCE.cpp +++ b/src/passes/StackDCE.cpp @@ -17,6 +17,7 @@ // TODO: documentation #include "ir/properties.h" +#include "ir/utils.h" #include "pass.h" namespace wasm { @@ -24,23 +25,32 @@ namespace wasm { struct StackDCEPass : public WalkerPass> { bool isFunctionParallel() override { return true; } Pass* create() override { return new StackDCEPass; } + bool changed = false; + void visitBlock(Block* curr) { for (size_t i = 0, size = curr->list.size(); i < size; ++i) { if (curr->list[i]->type == Type::unreachable) { - if (Properties::isControlFlowStructure(curr->list[i])) { - // Conservatively keep the following `unreachable` for proper typing - // TODO: Add a pass to re-type blocks and remove these unreachables - assert(i + 1 < curr->list.size() && - curr->list[i + 1]->is() && - "Expected an unreachable after unreachable control flow"); + // Conservatively keep any `unreachable` following block structures to + // guarantee proper typing. TODO: Add a pass to re-type blocks and + // remove these unreachables + if (Properties::isControlFlowStructure(curr->list[i]) && i < size - 1 && + curr->list[i + 1]->is()) { curr->list.resize(i + 2); } else { curr->list.resize(i + 1); } + changed = true; return; } } } + + void doWalkFunction(Function* func) { + super::doWalkFunction(func); + if (changed) { + ReFinalize(IRProfile::Stacky).walkFunctionInModule(func, getModule()); + } + } }; Pass* createStackDCEPass() { return new StackDCEPass(); } diff --git a/src/passes/StackIR.cpp b/src/passes/StackIR.cpp index a628468fb8b..2f61f11807e 100644 --- a/src/passes/StackIR.cpp +++ b/src/passes/StackIR.cpp @@ -80,7 +80,9 @@ class StackIROptimizer { if (!inst) { continue; } - if (inUnreachableCode) { + if (insts[i]->origin->is()) { + removeAt(i); + } else if (inUnreachableCode) { // Does the unreachable code end here? if (isControlFlowBarrier(inst)) { inUnreachableCode = false; @@ -238,6 +240,8 @@ class StackIROptimizer { continue; } if (auto* block = inst->origin->dynCast()) { + // This can be suboptimal because the branch might have been DCEd out of + // the Stack IR but not the original IR. if (!BranchUtils::BranchSeeker::has(block, block->name)) { // TODO optimize, maybe run remove-unused-names inst = nullptr; diff --git a/src/passes/StackRemoveBlocks.cpp b/src/passes/StackRemoveBlocks.cpp index 717d1f28932..a8a6b185c3f 100644 --- a/src/passes/StackRemoveBlocks.cpp +++ b/src/passes/StackRemoveBlocks.cpp @@ -26,7 +26,7 @@ struct StackRemoveBlocksPass bool isFunctionParallel() override { return true; } Pass* create() override { return new StackRemoveBlocksPass; } void visitBlock(Block* curr) { - for (size_t i = 0; i < curr->list.size(); ++i) { + for (size_t i = 0; i < curr->list.size();) { if (auto* block = curr->list[i]->dynCast()) { if (!BranchUtils::BranchSeeker::has(block, block->name)) { // TODO: implement `insert` on arena vectors directly @@ -35,8 +35,10 @@ struct StackRemoveBlocksPass insts.insert( insts.begin() + i, block->list.begin(), block->list.end()); curr->list.set(insts); + continue; } } + ++i; } } }; diff --git a/src/passes/Stackify.cpp b/src/passes/Stackify.cpp index 0785af195c9..cfa93a71d03 100644 --- a/src/passes/Stackify.cpp +++ b/src/passes/Stackify.cpp @@ -30,15 +30,15 @@ Name getGlobalElem(Name global, Index i) { struct Poppifier : PostWalker { bool scanned = false; - MixedArena& allocator; - Poppifier(MixedArena& allocator) : allocator(allocator) {} + Builder builder; + Poppifier(Builder& builder) : builder(builder) {} static void scan(Poppifier* self, Expression** currp) { if (!self->scanned) { self->scanned = true; PostWalker::scan(self, currp); } else { - *currp = Builder(self->allocator).makePop((*currp)->type); + *currp = self->builder.makePop((*currp)->type); } } @@ -56,7 +56,7 @@ struct Scope { }; struct Stackifier : BinaryenIRWriter { - MixedArena& allocator; + Builder builder; std::vector scopeStack; // Maps tuple locals to the new locals that will hold their elements @@ -90,16 +90,18 @@ struct Stackifier : BinaryenIRWriter { }; Stackifier::Stackifier(Function* func, MixedArena& allocator) - : BinaryenIRWriter(func), allocator(allocator) { + : BinaryenIRWriter(func), builder(allocator, IRProfile::Stacky) { // Start with a scope to emit top-level instructions into scopeStack.emplace_back(Scope::Func); // Map each tuple local to a set of expanded locals - for (size_t i = 0, end = func->vars.size(); i < end; ++i) { - if (func->vars[i].isMulti()) { - auto& vars = tupleVars[Index(i)]; - for (auto type : func->vars[i].expand()) { - vars.push_back(Builder(allocator).addVar(func, type)); + for (Index i = func->getNumParams(), end = func->getNumLocals(); i < end; + ++i) { + Type localType = func->getLocalType(i); + if (localType.isMulti()) { + auto& vars = tupleVars[i]; + for (auto type : localType.expand()) { + vars.push_back(builder.addVar(func, type)); } } } @@ -109,33 +111,30 @@ Index Stackifier::getScratchLocal(Type type) { // If there is no scratch local for `type`, allocate a new one auto insert = scratchLocals.insert({type, Index(-1)}); if (insert.second) { - insert.first->second = Builder(allocator).addVar(func, type); + insert.first->second = builder.addVar(func, type); } return insert.first->second; } void Stackifier::patchInstrs(Expression*& expr) { - Builder builder(allocator); auto& instrs = scopeStack.back().instrs; if (auto* block = expr->dynCast()) { - // Conservatively keep blocks to avoid dropping important labels, but do - // not patch a block into itself, which would otherwise happen when - // emitting if/else or try/catch arms and function bodies. + // Reuse blocks, but do not patch a block into itself, which would otherwise + // happen when emitting if/else or try/catch arms and function bodies. if (instrs.size() != 1 || instrs[0] != block) { block->list.set(instrs); } - } else if (instrs.size() == 1) { - // Otherwise do not insert blocks containing single instructions - assert(expr == instrs[0] && "not actually sure about this"); - expr = instrs[0]; } else { + // Otherwise create a new block, even if we have just a single + // expression. We want blocks in every new scope rather than other + // instructions because stacky optimizations only look at the children of + // blocks. expr = builder.makeBlock(instrs, expr->type); } instrs.clear(); } void Stackifier::emit(Expression* curr) { - Builder builder(allocator); // Control flow structures introduce new scopes. The instructions collected // for the new scope will be patched back into the original Expression when // the scope ends. @@ -178,7 +177,7 @@ void Stackifier::emit(Expression* curr) { } else { // Replace all children (which have already been emitted) with pops and // emit the current instruction into the current scope. - Poppifier(allocator).poppify(curr); + Poppifier(builder).poppify(curr); scopeStack.back().instrs.push_back(curr); } }; @@ -231,13 +230,11 @@ void Stackifier::emitFunctionEnd() { } void Stackifier::emitUnreachable() { - Builder builder(allocator); auto& instrs = scopeStack.back().instrs; instrs.push_back(builder.makeUnreachable()); } void Stackifier::emitTupleExtract(TupleExtract* curr) { - Builder builder(allocator); auto& instrs = scopeStack.back().instrs; const auto& types = curr->tuple->type.expand(); // Drop all the values after the one we want @@ -260,7 +257,6 @@ void Stackifier::emitTupleExtract(TupleExtract* curr) { } void Stackifier::emitDrop(Drop* curr) { - Builder builder(allocator); auto& instrs = scopeStack.back().instrs; if (curr->value->type.isMulti()) { // Drop each element individually @@ -275,7 +271,6 @@ void Stackifier::emitDrop(Drop* curr) { } void Stackifier::emitLocalGet(LocalGet* curr) { - Builder builder(allocator); auto& instrs = scopeStack.back().instrs; if (curr->type.isMulti()) { const auto& types = curr->type.expand(); @@ -289,7 +284,6 @@ void Stackifier::emitLocalGet(LocalGet* curr) { } void Stackifier::emitLocalSet(LocalSet* curr) { - Builder builder(allocator); auto& instrs = scopeStack.back().instrs; if (curr->value->type.isMulti()) { const auto& types = curr->value->type.expand(); @@ -318,7 +312,6 @@ void Stackifier::emitLocalSet(LocalSet* curr) { } void Stackifier::emitGlobalGet(GlobalGet* curr) { - Builder builder(allocator); auto& instrs = scopeStack.back().instrs; if (curr->type.isMulti()) { const auto& types = curr->type.expand(); @@ -332,7 +325,6 @@ void Stackifier::emitGlobalGet(GlobalGet* curr) { } void Stackifier::emitGlobalSet(GlobalSet* curr) { - Builder builder(allocator); auto& instrs = scopeStack.back().instrs; if (curr->value->type.isMulti()) { const auto& types = curr->value->type.expand(); @@ -350,9 +342,9 @@ class StackifyFunctionsPass : public Pass { bool isFunctionParallel() override { return true; } void runOnFunction(PassRunner* runner, Module* module, Function* func) override { - if (!func->isStacky) { + if (func->profile != IRProfile::Stacky) { Stackifier(func, module->allocator).write(); - func->isStacky = true; + func->profile = IRProfile::Stacky; } } Pass* create() override { return new StackifyFunctionsPass(); } diff --git a/src/passes/StackifyLocals.cpp b/src/passes/StackifyLocals.cpp index 1f61e30193d..260ce9397fe 100644 --- a/src/passes/StackifyLocals.cpp +++ b/src/passes/StackifyLocals.cpp @@ -57,9 +57,9 @@ struct StackifyLocalsPass : public WalkerPass> { } auto setIndex = it->second; // Check that the intervening instructions are stack neutral - auto sig = StackUtils::getStackSignature( - curr->list.begin() + setIndex + 1, curr->list.begin() + i); - if (sig != Signature()) { + StackUtils::StackSignature sig(curr->list.begin() + setIndex + 1, + curr->list.begin() + i); + if (sig.params != Type::none || sig.results != Type::none) { continue; } // Check how many uses there are to determine how to optimize diff --git a/src/passes/Unstackify.cpp b/src/passes/Unstackify.cpp index 84dbaa86cb9..04d7b353b26 100644 --- a/src/passes/Unstackify.cpp +++ b/src/passes/Unstackify.cpp @@ -268,9 +268,9 @@ class UnstackifyPass : public Pass { bool isFunctionParallel() override { return true; } void runOnFunction(PassRunner* runner, Module* module, Function* func) override { - if (func->isStacky) { + if (func->profile != IRProfile::Normal) { Unstackifier(func, module->allocator).unstackify(func->body); - func->isStacky = false; + func->profile = IRProfile::Normal; } } Pass* create() override { return new UnstackifyPass(); } From 44a1a9d3d712bc5f5dd6fcdcbf4e2e6ea391a784 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 25 Jun 2020 15:20:40 -0700 Subject: [PATCH 20/27] ReFinalize in StackRemoveBlocks --- src/passes/StackDCE.cpp | 2 +- src/passes/StackRemoveBlocks.cpp | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/passes/StackDCE.cpp b/src/passes/StackDCE.cpp index e99212b2ec5..cf627ae0b94 100644 --- a/src/passes/StackDCE.cpp +++ b/src/passes/StackDCE.cpp @@ -53,6 +53,6 @@ struct StackDCEPass : public WalkerPass> { } }; -Pass* createStackDCEPass() { return new StackDCEPass(); } +Pass* createStackDCEPass() { return new StackDCEPass; } } // namespace wasm diff --git a/src/passes/StackRemoveBlocks.cpp b/src/passes/StackRemoveBlocks.cpp index a8a6b185c3f..c1b9bf33eb8 100644 --- a/src/passes/StackRemoveBlocks.cpp +++ b/src/passes/StackRemoveBlocks.cpp @@ -17,6 +17,7 @@ // TODO: documentation #include "ir/branch-utils.h" +#include "ir/utils.h" #include "pass.h" namespace wasm { @@ -25,6 +26,8 @@ struct StackRemoveBlocksPass : public WalkerPass> { bool isFunctionParallel() override { return true; } Pass* create() override { return new StackRemoveBlocksPass; } + bool changed = false; + void visitBlock(Block* curr) { for (size_t i = 0; i < curr->list.size();) { if (auto* block = curr->list[i]->dynCast()) { @@ -35,14 +38,22 @@ struct StackRemoveBlocksPass insts.insert( insts.begin() + i, block->list.begin(), block->list.end()); curr->list.set(insts); + changed = true; continue; } } ++i; } } + + void doWalkFunction(Function* func) { + super::doWalkFunction(func); + if (changed) { + ReFinalize(IRProfile::Stacky).walkFunctionInModule(func, getModule()); + } + } }; -Pass* createStackRemoveBlocksPass() { return new StackRemoveBlocksPass(); } +Pass* createStackRemoveBlocksPass() { return new StackRemoveBlocksPass; } } // namespace wasm From 398eeeaddbb2db306e33b2466dccc8cc76c8256e Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Tue, 30 Jun 2020 22:26:30 -0700 Subject: [PATCH 21/27] Attempt to update finalization for stacky code --- src/ir/ReFinalize.cpp | 2 +- src/wasm-builder.h | 16 ++++++++-------- src/wasm.h | 5 +++-- src/wasm/wasm.cpp | 12 +++++++----- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/ir/ReFinalize.cpp b/src/ir/ReFinalize.cpp index ee2fe0a7ca6..df417185048 100644 --- a/src/ir/ReFinalize.cpp +++ b/src/ir/ReFinalize.cpp @@ -68,7 +68,7 @@ void ReFinalize::visitBlock(Block* curr) { return; } // type is none, but we might be unreachable - if (curr->type == Type::none) { + if (curr->type == Type::none && profile == IRProfile::Normal) { for (auto* child : curr->list) { if (child->type == Type::unreachable) { curr->type = Type::unreachable; diff --git a/src/wasm-builder.h b/src/wasm-builder.h index a193248bc05..92b2205701b 100644 --- a/src/wasm-builder.h +++ b/src/wasm-builder.h @@ -116,7 +116,7 @@ class Builder { Block* makeBlock(const std::vector& items, Type type) { auto* ret = allocator.alloc(); ret->list.set(items); - ret->finalize(type); + ret->finalize(type, profile); return ret; } Block* makeBlock(const ExpressionList& items) { @@ -128,7 +128,7 @@ class Builder { Block* makeBlock(const ExpressionList& items, Type type) { auto* ret = allocator.alloc(); ret->list.set(items); - ret->finalize(type); + ret->finalize(type, profile); return ret; } Block* makeBlock(Name name, const ExpressionList& items) { @@ -142,7 +142,7 @@ class Builder { auto* ret = allocator.alloc(); ret->name = name; ret->list.set(items); - ret->finalize(type); + ret->finalize(type, profile); return ret; } If* makeIf(Expression* condition, @@ -708,7 +708,7 @@ class Builder { } if (append) { block->list.push_back(append); - block->finalize(); + block->finalize(profile); } return block; } @@ -733,7 +733,7 @@ class Builder { block->name = name; if (append) { block->list.push_back(append); - block->finalize(); + block->finalize(profile); } return block; } @@ -743,14 +743,14 @@ class Builder { Block* makeSequence(Expression* left, Expression* right) { auto* block = makeBlock(left); block->list.push_back(right); - block->finalize(); + block->finalize(profile); return block; } Block* makeSequence(Expression* left, Expression* right, Type type) { auto* block = makeBlock(left); block->list.push_back(right); - block->finalize(type); + block->finalize(type, profile); return block; } @@ -767,7 +767,7 @@ class Builder { for (Index i = from; i < to; i++) { block->list.push_back(input->list[i]); } - block->finalize(); + block->finalize(profile); ret = block; } if (to == input->list.size()) { diff --git a/src/wasm.h b/src/wasm.h index b8b8e79b0b9..336f179efbb 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -634,13 +634,14 @@ class Block : public SpecificExpression { // s-expression or binary, as explicit types are given. the only additional // work this does is to set the type to unreachable in the cases that is // needed (which may require scanning the block) - void finalize(Type type_); + void finalize(Type type_, IRProfile profile = IRProfile::Normal); // set the type given you know its type, and you know if there is a break to // this block. this avoids the need to scan the contents of the block in the // case that it might be unreachable, so it is recommended if you already know // the type and breakability anyhow. - void finalize(Type type_, bool hasBreak); + void + finalize(Type type_, bool hasBreak, IRProfile profile = IRProfile::Normal); }; class If : public SpecificExpression { diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index 9911e03c810..da73254e161 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -366,7 +366,9 @@ void Block::finalize(IRProfile profile) { if (name.is()) { TypeSeeker seeker(this, this->name, profile); type = Type::mergeTypes(seeker.types); - handleUnreachable(this); + if (profile == IRProfile::Normal) { + handleUnreachable(this); + } } else { // nothing branches here, so this is easy switch (profile) { @@ -382,16 +384,16 @@ void Block::finalize(IRProfile profile) { } } -void Block::finalize(Type type_) { +void Block::finalize(Type type_, IRProfile profile) { type = type_; - if (type == Type::none && list.size() > 0) { + if (type == Type::none && list.size() > 0 && profile == IRProfile::Normal) { handleUnreachable(this); } } -void Block::finalize(Type type_, bool hasBreak) { +void Block::finalize(Type type_, bool hasBreak, IRProfile profile) { type = type_; - if (type == Type::none && list.size() > 0) { + if (type == Type::none && list.size() > 0 && profile == IRProfile::Normal) { handleUnreachable(this, true, hasBreak); } } From e54e8403a5534863791c40f04187d393c7e0f4c6 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Tue, 30 Jun 2020 22:29:16 -0700 Subject: [PATCH 22/27] Lower unreachables during stackification. Relatively stable --- scripts/fuzz_opt.py | 33 +++++-- src/ir/StackUtils.cpp | 131 +++++++++++++++++++++------ src/ir/module-utils.h | 2 +- src/ir/stack-utils.h | 23 ++++- src/literal.h | 10 ++- src/passes/StackDCE.cpp | 20 +---- src/passes/StackIR.cpp | 8 ++ src/passes/StackRemoveBlocks.cpp | 8 -- src/passes/Stackify.cpp | 147 +++++++++++++++++++++++++++++-- src/passes/StackifyDrops.cpp | 17 ++-- src/support/small_vector.h | 6 ++ src/wasm-interpreter.h | 87 +++++++++++++++--- src/wasm-stack.h | 9 +- src/wasm/wasm-validator.cpp | 48 ++++++---- 14 files changed, 435 insertions(+), 114 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 5ec54b97215..8a6d38e7fd7 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -720,9 +720,9 @@ def handle(self, wasm): testcase_handlers = [ FuzzExec(), CompareVMs(), - CheckDeterminism(), - Wasm2JS(), - Asyncify(), + # CheckDeterminism(), + # Wasm2JS(), + # Asyncify(), StackifyCodeSizeChecker(), ] @@ -755,7 +755,7 @@ def test_one(random_input, opts, given_wasm): print('pre wasm size:', wasm_size) # create a second wasm for handlers that want to look at pairs. - generate_command = [in_bin('wasm-opt'), 'a.wasm', '-o', 'b.wasm'] + opts + FUZZ_OPTS + FEATURE_OPTS + generate_command = [in_bin('wasm-opt'), 'a.wasm', '-o', 'b.wasm'] + FUZZ_OPTS + opts + FEATURE_OPTS if PRINT_WATS: printed = run(generate_command + ['--print']) with open('b.printed.wast', 'w') as f: @@ -855,11 +855,20 @@ def write_commands(commands, filename): ["--simplify-locals-notee"], ["--simplify-locals-notee-nostructure"], ["--ssa"], - ["--stackify", "--unstackify"], + # ["--stackify", "--unstackify"], ["--vacuum"], ] +stack_opt_choices = [ + ["--stack-dce"], + ["--stack-remove-blocks"], + ["--stackify-locals"], + ["--stackify-drops"], + ["--coalesce-locals"], +] + + def randomize_opt_flags(): flag_groups = [] has_flatten = False @@ -875,14 +884,22 @@ def randomize_opt_flags(): flag_groups.append(choice) if len(flag_groups) > 20 or random.random() < 0.3: break + + # stacky opts + if random.random() < 0.5: + stack_opt_group = ['--stackify'] + while len(flag_groups) <= 20 and random.random() < 0.3: + stack_opt_group.extend(random.choice(stack_opt_choices)) + flag_groups.append(stack_opt_group) + # maybe add an extra round trip if random.random() < 0.5: pos = random.randint(0, len(flag_groups)) flag_groups = flag_groups[:pos] + [['--roundtrip']] + flag_groups[pos:] # maybe add an extra stackification round trip - if True: - pos = random.randint(0, len(flag_groups)) - flag_groups = flag_groups[:pos] + [['--stackify', '--unstackify']] + flag_groups[pos:] + # if True: + # pos = random.randint(0, len(flag_groups)) + # flag_groups = flag_groups[:pos] + [['--stackify', '--unstackify']] + flag_groups[pos:] ret = [flag for group in flag_groups for flag in group] # modifiers (if not already implied by a -O? option) if '-O' not in str(ret): diff --git a/src/ir/StackUtils.cpp b/src/ir/StackUtils.cpp index 92e28ddfb40..2b91132fd1c 100644 --- a/src/ir/StackUtils.cpp +++ b/src/ir/StackUtils.cpp @@ -20,6 +20,25 @@ namespace wasm { namespace StackUtils { +bool mayBeUnreachable(Expression* curr) { + switch (curr->_id) { + case Expression::Id::BreakId: + return curr->cast()->condition == nullptr; + case Expression::Id::CallId: + return curr->cast()->isReturn; + case Expression::Id::CallIndirectId: + return curr->cast()->isReturn; + case Expression::Id::ReturnId: + case Expression::Id::SwitchId: + case Expression::Id::UnreachableId: + case Expression::Id::ThrowId: + case Expression::Id::RethrowId: + return true; + default: + return false; + } +} + void compact(Block* block) { size_t newIndex = 0; for (size_t i = 0, size = block->list.size(); i < size; ++i) { @@ -39,6 +58,11 @@ StackSignature::StackSignature(Expression* expr) { } else { std::vector inputs; for (auto* child : ChildIterator(expr)) { + if (child->type == Type::unreachable) { + // This instruction won't consume values from before the unreachable + inputs.clear(); + continue; + } // Children might be tuple pops, so expand their types const auto& types = child->type.expand(); inputs.insert(inputs.end(), types.begin(), types.end()); @@ -59,6 +83,28 @@ bool StackSignature::composes(const StackSignature& next) const { return true; } +bool StackSignature::satisfies(Signature sig) const { + if (unreachable) { + // The unreachable can consume and produce any additional types needed by + // `sig`, so truncate `sig`'s params and results to compare only the parts + // we need to match. + if (sig.params.size() > params.size()) { + size_t diff = sig.params.size() - params.size(); + const auto& types = sig.params.expand(); + std::vector truncatedParams(types.begin(), types.end() - diff); + sig.params = Type(truncatedParams); + } + if (sig.results.size() > results.size()) { + size_t diff = sig.results.size() - results.size(); + const auto& types = sig.results.expand(); + std::vector truncatedResults(types.begin() + diff, types.end()); + sig.results = Type(truncatedResults); + } + } + return Type::isSubType(sig.params, params) && + Type::isSubType(results, sig.results); +} + StackSignature& StackSignature::operator+=(const StackSignature& next) { assert(composes(next)); std::vector stack = results.expand(); @@ -99,38 +145,71 @@ StackSignature StackSignature::operator+(const StackSignature& next) const { StackFlow::StackFlow(Block* curr) { std::vector values; - bool unreachable = false; - for (auto* expr : curr->list) { - size_t consumed = StackSignature(expr).params.size(); - size_t produced = expr->type == Type::unreachable ? 0 : expr->type.size(); - srcs[expr] = std::vector(consumed); - dests[expr] = std::vector(produced); - if (unreachable) { - continue; - } - assert(consumed <= values.size() && "block parameters not implemented"); + Expression* lastUnreachable = nullptr; + + auto process = [&](Expression* expr, StackSignature sig) { + const auto& params = sig.params.expand(); + const auto& results = sig.results.expand(); + // Unreachable instructions consume all available values + size_t consumed = + sig.unreachable ? std::max(values.size(), params.size()) : params.size(); + // Consume values + assert(params.size() <= consumed); for (Index i = 0; i < consumed; ++i) { - auto& src = values[values.size() - consumed + i]; - srcs[expr][i] = src; - dests[src.expr][src.index] = {expr, i}; + bool unreachable = i < consumed - params.size(); + if (values.size() + i < consumed) { + // This value comes from the polymorphic stack of the last unreachable + assert(consumed == params.size()); + auto& currDests = dests[lastUnreachable]; + Index destIndex(currDests.size()); + Type type = params[i]; + srcs[expr].push_back({lastUnreachable, destIndex, type, unreachable}); + currDests.push_back({expr, i, type, unreachable}); + } else { + // A normal value from the value stack + auto& src = values[values.size() + i - consumed]; + srcs[expr].push_back({src.expr, src.index, src.type, unreachable}); + dests[src.expr][src.index] = {expr, i, src.type, unreachable}; + } } - values.resize(values.size() - consumed); - for (Index i = 0; i < produced; ++i) { - values.push_back({expr, i}); + values.resize(values.size() >= consumed ? values.size() - consumed : 0); + // Produce values + for (Index i = 0; i < results.size(); ++i) { + values.push_back({expr, i, results[i], false}); } - if (expr->type == Type::unreachable) { - unreachable = true; + // Update the last unreachable instruction + if (sig.unreachable) { + lastUnreachable = expr; } + }; + + for (auto* expr : curr->list) { + StackSignature sig(expr); + assert((sig.params.size() <= values.size() || lastUnreachable) && + "Block inputs not yet supported"); + srcs[expr] = std::vector(); + dests[expr] = std::vector(sig.results.size()); + process(expr, sig); } - // If the end of the block can be reached, set the block as the destination - // for its return values - if (!unreachable) { - assert(values.size() == curr->type.size()); - for (Index i = 0, size = values.size(); i < size; ++i) { - auto& loc = values[i]; - dests[loc.expr][loc.index] = {curr, i}; - } + + // Set the block as the destination for its return values + assert(curr->type != Type::unreachable); + process(curr, StackSignature(curr->type, Type::none)); +} + +StackSignature StackFlow::getSignature(Expression* expr) { + auto exprSrcs = srcs.find(expr); + auto exprDests = dests.find(expr); + assert(exprSrcs != srcs.end() && exprDests != dests.end()); + std::vector params, results; + for (auto& src : exprSrcs->second) { + params.push_back(src.type); + } + for (auto& dest : exprDests->second) { + results.push_back(dest.type); } + bool unreachable = expr->type == Type::unreachable; + return StackSignature(Type(params), Type(results), unreachable); } } // namespace StackUtils diff --git a/src/ir/module-utils.h b/src/ir/module-utils.h index 72d9175df01..263ab3b9c6c 100644 --- a/src/ir/module-utils.h +++ b/src/ir/module-utils.h @@ -411,7 +411,7 @@ collectSignatures(Module& wasm, counts[call->sig]++; } else if (Properties::isControlFlowStructure(curr)) { // TODO: Allow control flow to have input types as well - if (curr->type.isMulti() && curr != func->body) { + if (curr->type.isMulti()) { counts[Signature(Type::none, curr->type)]++; } } diff --git a/src/ir/stack-utils.h b/src/ir/stack-utils.h index 6d865e9e748..beabdc7ca8a 100644 --- a/src/ir/stack-utils.h +++ b/src/ir/stack-utils.h @@ -25,6 +25,9 @@ namespace wasm { namespace StackUtils { +// Whether it is valid for `curr` to have unreachable type in Stacky IR. +bool mayBeUnreachable(Expression* curr); + // Iterate through `block` and remove nops. void compact(Block* block); @@ -64,6 +67,9 @@ struct StackSignature { // Return `true` iff `next` composes after this stack signature. bool composes(const StackSignature& next) const; + // Whether a block with this stack signature could be typed with `sig`. + bool satisfies(Signature sig) const; + // Compose stack signatures. Assumes they actually compose. StackSignature& operator+=(const StackSignature& next); StackSignature operator+(const StackSignature& next) const; @@ -72,20 +78,29 @@ struct StackSignature { // Calculates stack machine data flow, associating the sources and destinations // of all values in a block. struct StackFlow { - // Either an input location or an output location. An input location - // represents the `index`th input into instruction `expr` and an ouput - // location represents the `index`th output from instruction `expr`. + // The input (output) location at which a value of type `type` is consumed + // (produced), representing the `index`th input into (output from) instruction + // `expr`. `unreachable` is true iff the corresponding value is consumed + // (produced) by the polymorphic behavior of an unreachable instruction + // without being used by that instruction. struct Location { Expression* expr; Index index; + Type type; + bool unreachable; }; - // Maps each instruction to the set of output locations supplying its inputs. + // Maps each instruction to the set of output locations producing its inputs. std::unordered_map> srcs; // Maps each instruction to the set of input locations consuming its results. std::unordered_map> dests; + // Gets the effective stack signature of `expr`, which must be a child of the + // block. If `expr` is unreachable, the returned signature will reflect the + // values consumed and produced by its polymorphic unreachable behavior. + StackSignature getSignature(Expression* expr); + // Calculates `srcs` and `dests`. StackFlow(Block* curr); }; diff --git a/src/literal.h b/src/literal.h index 5c72bf96ccc..f1eaa5bec56 100644 --- a/src/literal.h +++ b/src/literal.h @@ -482,7 +482,15 @@ class Literals : public SmallVector { Literals(std::initializer_list init) : SmallVector(init) { #ifndef NDEBUG - for (auto& lit : init) { + for (auto& lit : *this) { + assert(lit.isConcrete()); + } +#endif + }; + template + Literals(InputIt begin, InputIt end) : SmallVector(begin, end) { +#ifndef NDEBUG + for (auto& lit : *this) { assert(lit.isConcrete()); } #endif diff --git a/src/passes/StackDCE.cpp b/src/passes/StackDCE.cpp index cf627ae0b94..a484d3464eb 100644 --- a/src/passes/StackDCE.cpp +++ b/src/passes/StackDCE.cpp @@ -25,32 +25,16 @@ namespace wasm { struct StackDCEPass : public WalkerPass> { bool isFunctionParallel() override { return true; } Pass* create() override { return new StackDCEPass; } - bool changed = false; void visitBlock(Block* curr) { for (size_t i = 0, size = curr->list.size(); i < size; ++i) { if (curr->list[i]->type == Type::unreachable) { - // Conservatively keep any `unreachable` following block structures to - // guarantee proper typing. TODO: Add a pass to re-type blocks and - // remove these unreachables - if (Properties::isControlFlowStructure(curr->list[i]) && i < size - 1 && - curr->list[i + 1]->is()) { - curr->list.resize(i + 2); - } else { - curr->list.resize(i + 1); - } - changed = true; + assert(!Properties::isControlFlowStructure(curr->list[i])); + curr->list.resize(i + 1); return; } } } - - void doWalkFunction(Function* func) { - super::doWalkFunction(func); - if (changed) { - ReFinalize(IRProfile::Stacky).walkFunctionInModule(func, getModule()); - } - } }; Pass* createStackDCEPass() { return new StackDCEPass; } diff --git a/src/passes/StackIR.cpp b/src/passes/StackIR.cpp index 2f61f11807e..c90efd46365 100644 --- a/src/passes/StackIR.cpp +++ b/src/passes/StackIR.cpp @@ -36,6 +36,14 @@ struct GenerateStackIR : public WalkerPass> { bool modifiesBinaryenIR() override { return false; } void doWalkFunction(Function* func) { + // Skip stacky functions due to a mismatch in the interpretation of + // pops. Specifically, normal IR treats pops as an instruction that + // generates a value, but stacky IR treats pops as markers that do not do + // anything. Skipping this situation isn't a problem because these two + // systems are not meant to work together. + if (func->profile == IRProfile::Stacky) { + return; + } StackIRGenerator stackIRGen(getModule()->allocator, func); stackIRGen.write(); func->stackIR = make_unique(); diff --git a/src/passes/StackRemoveBlocks.cpp b/src/passes/StackRemoveBlocks.cpp index c1b9bf33eb8..8940a1c1935 100644 --- a/src/passes/StackRemoveBlocks.cpp +++ b/src/passes/StackRemoveBlocks.cpp @@ -26,7 +26,6 @@ struct StackRemoveBlocksPass : public WalkerPass> { bool isFunctionParallel() override { return true; } Pass* create() override { return new StackRemoveBlocksPass; } - bool changed = false; void visitBlock(Block* curr) { for (size_t i = 0; i < curr->list.size();) { @@ -45,13 +44,6 @@ struct StackRemoveBlocksPass ++i; } } - - void doWalkFunction(Function* func) { - super::doWalkFunction(func); - if (changed) { - ReFinalize(IRProfile::Stacky).walkFunctionInModule(func, getModule()); - } - } }; Pass* createStackRemoveBlocksPass() { return new StackRemoveBlocksPass; } diff --git a/src/passes/Stackify.cpp b/src/passes/Stackify.cpp index cfa93a71d03..97870e70281 100644 --- a/src/passes/Stackify.cpp +++ b/src/passes/Stackify.cpp @@ -17,6 +17,8 @@ // TODO: documentation #include "ir/properties.h" +#include "ir/stack-utils.h" +#include "ir/utils.h" #include "pass.h" #include "wasm-stack.h" @@ -24,10 +26,12 @@ namespace wasm { namespace { +// Generate names for the elements of tuple globals Name getGlobalElem(Name global, Index i) { return Name(std::string(global.c_str()) + '$' + std::to_string(i)); } +// Utility class for replacing an expression's stack value children with Pops struct Poppifier : PostWalker { bool scanned = false; Builder builder; @@ -49,6 +53,124 @@ struct Poppifier : PostWalker { } }; +// Utility class for inferring the proper WebAssembly types for unreachable +// control flow expressions and eliminating the redundant unreachable +// instructions generated by BinaryenIRWriter. After unreachable lowering, the +// only expressions that still have Unreachable type are Unreachables. +struct UnreachableLowerer + : Walker> { + // Visit only control flow children, since we know all other children will + // be pops and we are not interested in those. Also pre-visit nodes instead + // of post-visiting them because type inference depends on parents being + // typed first. + static void scan(UnreachableLowerer* self, Expression** currp) { + Expression* curr = *currp; + switch (curr->_id) { + case Expression::Id::BlockId: { + // Visit children only through the first unreachable, since any further + // children will be discarded before we could visit them. + for (auto*& child : curr->cast()->list) { + self->pushTask(scan, &child); + if (child->type == Type::unreachable) { + break; + } + } + self->pushTask(doVisitBlock, currp); + break; + } + case Expression::Id::IfId: + self->maybePushTask(scan, &curr->cast()->ifFalse); + self->pushTask(scan, &curr->cast()->ifTrue); + self->pushTask(doVisitIf, currp); + break; + case Expression::Id::LoopId: + self->pushTask(scan, &curr->cast()->body); + self->pushTask(doVisitLoop, currp); + break; + case Expression::Id::TryId: + self->pushTask(scan, &curr->cast()->catchBody); + self->pushTask(scan, &curr->cast()->body); + self->pushTask(scan, &curr->cast()->body); + break; + default: + break; + } + } + void visitBlock(Block* curr) { + if (curr->type == Type::unreachable) { + } + size_t numReachable = 0; + for (; numReachable < curr->list.size(); ++numReachable) { + if (curr->list[numReachable]->type == Type::unreachable) { + break; + } + } + if (numReachable == curr->list.size()) { + return; + } + // Keep the first unreachable child and nothing after it + curr->list.resize(numReachable + 1); + auto* unreachable = curr->list[numReachable]; + // If it is control flow, figure out what type it should have + if (Properties::isControlFlowStructure(unreachable)) { + // If `unreachable` consumes any values (besides an i32 if it is an If) or + // if it produces multiple values and multivalue is not enabled, then + // append an `unreachable` to the list after all to make the typing work + // out. Otherwise, update the type accordingly. + // TODO: Implement control flow input types for use here. + StackUtils::StackFlow flow(curr); + auto sig = flow.getSignature(unreachable); + bool needsExtraUnreachable; + if (unreachable->is()) { + needsExtraUnreachable = sig.params != Type::i32; + } else { + needsExtraUnreachable = sig.params != Type::none; + } + if (!getModule()->features.hasMultivalue() && sig.results.isMulti()) { + needsExtraUnreachable = true; + } + if (needsExtraUnreachable) { + curr->list.push_back(Builder(*getModule()).makeUnreachable()); + unreachable->type = Type::none; + } else { + unreachable->type = sig.results; + } + } + } + void visitIf(If* curr) { + // The If type must be a supertype of the individual arm types, so just + // reuse it for both arms. This can save space in the type section. + curr->ifTrue->type = curr->type; + if (curr->ifFalse) { + curr->ifFalse->type = curr->type; + } + } + void visitLoop(Loop* curr) { curr->body->type = curr->type; } + void visitTry(Try* curr) { + curr->body->type = curr->type; + curr->catchBody->type = curr->type; + } + // The public entry point of this class + void lower(Function* func, Module* module) { + func->body->type = func->sig.results; + walkFunctionInModule(func, module); +#ifndef NDEBUG + // Assert that only instructions that never return have unreachable type + struct AssertValidUnreachableTypes + : PostWalker> { + void visitExpression(Expression* curr) { + if (curr->type == Type::unreachable) { + assert(StackUtils::mayBeUnreachable(curr) && + "Unexpected unreachable instruction"); + } + } + } asserter; + asserter.walkFunction(func); +#endif // NDEBUG + } +}; + struct Scope { enum Kind { Func, Block, Loop, If, Else, Try, Catch } kind; std::vector instrs; @@ -56,6 +178,7 @@ struct Scope { }; struct Stackifier : BinaryenIRWriter { + Module* module; Builder builder; std::vector scopeStack; @@ -65,7 +188,7 @@ struct Stackifier : BinaryenIRWriter { // Records the scratch local to be used for tuple.extracts of each type std::unordered_map scratchLocals; - Stackifier(Function* func, MixedArena& allocator); + Stackifier(Function* func, Module* module); Index getScratchLocal(Type type); void patchInstrs(Expression*& expr); @@ -89,8 +212,9 @@ struct Stackifier : BinaryenIRWriter { void emitGlobalSet(GlobalSet* curr); }; -Stackifier::Stackifier(Function* func, MixedArena& allocator) - : BinaryenIRWriter(func), builder(allocator, IRProfile::Stacky) { +Stackifier::Stackifier(Function* func, Module* module) + : BinaryenIRWriter(func), module(module), + builder(*module, IRProfile::Stacky) { // Start with a scope to emit top-level instructions into scopeStack.emplace_back(Scope::Func); @@ -227,9 +351,14 @@ void Stackifier::emitFunctionEnd() { auto& scope = scopeStack.back(); assert(scope.kind == Scope::Func); patchInstrs(func->body); + + // We are done stackifying the structure. Now eliminate redundant unreachable + // instructions and lower unreachable types away. + UnreachableLowerer().lower(func, module); } void Stackifier::emitUnreachable() { + // TODO: Try making this a nop auto& instrs = scopeStack.back().instrs; instrs.push_back(builder.makeUnreachable()); } @@ -273,7 +402,7 @@ void Stackifier::emitDrop(Drop* curr) { void Stackifier::emitLocalGet(LocalGet* curr) { auto& instrs = scopeStack.back().instrs; if (curr->type.isMulti()) { - const auto& types = curr->type.expand(); + const auto& types = func->getLocalType(curr->index).expand(); const auto& elems = tupleVars[curr->index]; for (size_t i = 0; i < types.size(); ++i) { instrs.push_back(builder.makeLocalGet(elems[i], types[i])); @@ -286,7 +415,7 @@ void Stackifier::emitLocalGet(LocalGet* curr) { void Stackifier::emitLocalSet(LocalSet* curr) { auto& instrs = scopeStack.back().instrs; if (curr->value->type.isMulti()) { - const auto& types = curr->value->type.expand(); + const auto& types = func->getLocalType(curr->index).expand(); const auto& elems = tupleVars[curr->index]; // Add the unconditional sets for (size_t i = types.size() - 1; i >= 1; --i) { @@ -314,7 +443,7 @@ void Stackifier::emitLocalSet(LocalSet* curr) { void Stackifier::emitGlobalGet(GlobalGet* curr) { auto& instrs = scopeStack.back().instrs; if (curr->type.isMulti()) { - const auto& types = curr->type.expand(); + const auto& types = module->getGlobal(curr->name)->type.expand(); for (Index i = 0; i < types.size(); ++i) { instrs.push_back( builder.makeGlobalGet(getGlobalElem(curr->name, i), types[i])); @@ -327,7 +456,7 @@ void Stackifier::emitGlobalGet(GlobalGet* curr) { void Stackifier::emitGlobalSet(GlobalSet* curr) { auto& instrs = scopeStack.back().instrs; if (curr->value->type.isMulti()) { - const auto& types = curr->value->type.expand(); + const auto& types = module->getGlobal(curr->name)->type.expand(); for (Index i = types.size(); i > 0; --i) { instrs.push_back(builder.makeGlobalSet(getGlobalElem(curr->name, i - 1), builder.makePop(types[i - 1]))); @@ -343,11 +472,11 @@ class StackifyFunctionsPass : public Pass { void runOnFunction(PassRunner* runner, Module* module, Function* func) override { if (func->profile != IRProfile::Stacky) { - Stackifier(func, module->allocator).write(); + Stackifier(func, module).write(); func->profile = IRProfile::Stacky; } } - Pass* create() override { return new StackifyFunctionsPass(); } + Pass* create() override { return new StackifyFunctionsPass; } }; } // anonymous namespace diff --git a/src/passes/StackifyDrops.cpp b/src/passes/StackifyDrops.cpp index 3362a1c31a2..269480fe4c3 100644 --- a/src/passes/StackifyDrops.cpp +++ b/src/passes/StackifyDrops.cpp @@ -30,31 +30,34 @@ struct StackifyDropsPass : public WalkerPass> { StackUtils::StackFlow flow(curr); for (auto* expr : curr->list) { auto& dests = flow.dests[expr]; - bool allDrops = std::all_of( + bool unused = std::all_of( dests.begin(), dests.end(), [](StackUtils::StackFlow::Location& loc) { - return loc.expr == nullptr || loc.expr->is(); + assert(loc.expr != nullptr); + return loc.unreachable || loc.expr->is(); }); - if (!allDrops) { + if (!unused) { continue; } + // Downgrade tees to sets when their stack value isn't used + // TODO: leave this to coalesce-locals? if (auto* set = expr->dynCast()) { if (set->isTee()) { set->makeSet(); - if (auto* drop = dests.begin()->expr) { + if (auto* drop = dests.begin()->expr->dynCast()) { ExpressionManipulator::nop(drop); } } continue; } - // TODO: downgrade tees to gets if their values aren't used if (EffectAnalyzer(getPassOptions(), getModule()->features, expr) .hasSideEffects()) { continue; } + // Remove the expression and its drops ExpressionManipulator::nop(expr); for (auto& loc : dests) { - if (loc.expr != nullptr) { - ExpressionManipulator::nop(loc.expr); + if (auto* drop = loc.expr->dynCast()) { + ExpressionManipulator::nop(drop); } } } diff --git a/src/support/small_vector.h b/src/support/small_vector.h index a8816c9e4e6..5339bc6d790 100644 --- a/src/support/small_vector.h +++ b/src/support/small_vector.h @@ -47,6 +47,12 @@ template class SmallVector { } } + template SmallVector(InputIt begin, InputIt end) { + while (begin != end) { + push_back(*begin++); + } + } + T& operator[](size_t i) { if (i < N) { return fixed[i]; diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 2c9a46b802d..76a23484fcf 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -28,6 +28,7 @@ #include #include "ir/module-utils.h" +#include "ir/stack-utils.h" #include "support/bits.h" #include "support/safe_integer.h" #include "wasm-builder.h" @@ -38,6 +39,8 @@ #include "wasm-printing.h" #endif +// #define STACKY_DEBUG + namespace wasm { struct WasmException { @@ -163,8 +166,7 @@ class ExpressionRunner : public OverriddenVisitor { Index maxLoopIterations; // Multivalue support. - // TODO: make this Literal instead of Literals once tuples are split up - std::vector valueStack; + std::vector valueStack; Flow generateArguments(const ExpressionList& operands, LiteralList& arguments) { @@ -196,12 +198,31 @@ class ExpressionRunner : public OverriddenVisitor { if (maxDepth != NO_LIMIT && depth > maxDepth) { trap("interpreter recursion limit"); } - if (isStacky() && !Properties::isControlFlowStructure(curr)) { + // The stack size to restore after a control flow structure + size_t restoreSize; + if (isStacky() && Properties::isControlFlowStructure(curr)) { + StackUtils::StackSignature sig(curr); + assert(valueStack.size() >= sig.params.size()); + restoreSize = valueStack.size() - sig.params.size(); + } + if (isStacky() && !curr->is()) { +#ifdef STACKY_DEBUG + std::cerr << "Visiting:\n"; + curr->dump(); + std::cerr << "\nValue stack before: "; + for (auto v : valueStack) { + std::cerr << " " << v.type.toString(); + } + std::cerr << "\n"; +#endif // STACKY_DEBUG // Reverse the popped values so they come out in the right order - Index numChildren = ChildIterator(curr).size(); + size_t numChildren = StackUtils::StackSignature(curr).params.size(); std::reverse(valueStack.end() - numChildren, valueStack.end()); } auto ret = OverriddenVisitor::visit(curr); + if (isStacky() && Properties::isControlFlowStructure(curr)) { + valueStack.resize(restoreSize); + } if (!ret.breaking()) { Type type = ret.getType(); if (type.isConcrete() || curr->type.isConcrete()) { @@ -214,14 +235,20 @@ class ExpressionRunner : public OverriddenVisitor { #endif assert(Type::isSubType(type, curr->type)); if (isStacky() && !curr->is()) { - // TODO: once tuples are lowered away, push results individually - // valueStack.insert(valueStack.cend(), ret.values.begin(), - // ret.values.end()); - valueStack.push_back(ret.values); + valueStack.insert( + valueStack.end(), ret.values.begin(), ret.values.end()); } } } - // TODO: handle removing valueStack entries when breaking +#ifdef STACKY_DEBUG + if (isStacky() && !curr->is()) { + std::cerr << "Value stack after:"; + for (auto v : valueStack) { + std::cerr << " " << v.type.toString(); + } + std::cerr << "\n\n"; + } +#endif // STACKY_DEBUG depth--; return ret; } @@ -243,8 +270,16 @@ class ExpressionRunner : public OverriddenVisitor { stack.pop_back(); if (flow.breaking()) { flow.clearIf(curr->name); + if (!flow.breaking()) { + valueStack.insert( + valueStack.end(), flow.values.begin(), flow.values.end()); + } continue; } + if (isStacky()) { + valueStack.insert( + valueStack.end(), flow.values.begin(), flow.values.end()); + } auto& list = curr->list; for (size_t i = 0; i < list.size(); i++) { if (curr != top && i == 0) { @@ -254,9 +289,34 @@ class ExpressionRunner : public OverriddenVisitor { flow = visit(list[i]); if (flow.breaking()) { flow.clearIf(curr->name); + if (!flow.breaking()) { + valueStack.insert( + valueStack.end(), flow.values.begin(), flow.values.end()); + } break; } } + if (isStacky() && !flow.breaking()) { + Literals vals; + assert(curr->type != Type::unreachable); + size_t newStackSize = valueStack.size() - curr->type.size(); + for (auto it = valueStack.begin() + newStackSize; + it != valueStack.end(); + ++it) { + vals.push_back(*it); + } + valueStack.resize(newStackSize); +#ifdef STACKY_DEBUG + std::cerr << "Exiting block " << curr->name << " " + << curr->type.toString() << "\n"; + std::cerr << "Value stack:"; + for (auto v : valueStack) { + std::cerr << " " << v.type.toString(); + } + std::cerr << "\n"; +#endif // STACKY_DEBUG + flow = Flow(std::move(vals)); + } } return flow; } @@ -1259,9 +1319,10 @@ class ExpressionRunner : public OverriddenVisitor { Flow visitSIMDLoadZero(SIMDLoad* curr) { WASM_UNREACHABLE("unimp"); } Flow visitPop(Pop* curr) { NOTE_ENTER("Pop"); - assert(!valueStack.empty()); - auto ret = this->valueStack.back(); - this->valueStack.pop_back(); + size_t numPopped = curr->type.size(); + assert(valueStack.size() >= numPopped); + Literals ret(valueStack.rbegin(), valueStack.rbegin() + numPopped); + valueStack.resize(valueStack.size() - numPopped); return ret; } Flow visitRefNull(RefNull* curr) { @@ -2248,7 +2309,7 @@ template class ModuleInstanceBase { WASM_UNREACHABLE("invalid op"); } load.finalize(); - Flow flow = this->visit(&load); + Flow flow = this->visitLoad(&load); if (flow.breaking()) { return flow; } diff --git a/src/wasm-stack.h b/src/wasm-stack.h index 2edb99249c7..ca95efebaea 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -852,10 +852,11 @@ class BinaryenIRToBinaryWriter writer.emitFunctionEnd(); } void emitUnreachable() { - // Stacky code already explicitly contains these guard unreachables - if (func->profile == IRProfile::Normal) { - writer.emitUnreachable(); - } + // // Stacky code already explicitly contains these guard unreachables + // if (func->profile == IRProfile::Normal) { + // writer.emitUnreachable(); + // } + writer.emitUnreachable(); } void emitDebugLocation(Expression* curr) { if (sourceMap) { diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index 9eb5f84d23d..0ddb70ace10 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -23,6 +23,7 @@ #include "ir/features.h" #include "ir/global-utils.h" #include "ir/module-utils.h" +#include "ir/stack-utils.h" #include "ir/utils.h" #include "support/colors.h" #include "wasm-printing.h" @@ -2095,28 +2096,45 @@ static void validateBinaryenIR(Module& wasm, ValidationInfo& info) { auto scope = getFunction() ? getFunction()->name : Name("(global scope)"); // check if a node type is 'stale', i.e., we forgot to finalize() the // node. - auto oldType = curr->type; auto profile = getFunction() ? getFunction()->profile : IRProfile::Normal; - ReFinalizeNode(profile).visit(curr); - auto newType = curr->type; - if (newType != oldType) { - // We accept concrete => undefined, - // e.g. - // - // (drop (block (result i32) (unreachable))) - // - // The block has an added type, not derived from the ast itself, so it - // is ok for it to be either i32 or unreachable. - if (!Type::isSubType(newType, oldType) && - !(oldType.isConcrete() && newType == Type::unreachable)) { + if (profile == IRProfile::Normal || !curr->is()) { + auto oldType = curr->type; + ReFinalizeNode(profile).visit(curr); + auto newType = curr->type; + if (newType != oldType) { + // We accept concrete => undefined, + // e.g. + // + // (drop (block (result i32) (unreachable))) + // + // The block has an added type, not derived from the ast itself, so it + // is ok for it to be either i32 or unreachable. + if (!Type::isSubType(newType, oldType) && + !(oldType.isConcrete() && newType == Type::unreachable)) { + std::ostringstream ss; + ss << "stale type found in " << scope << " on " << curr + << "\n(marked as " << oldType << ", should be " << newType + << ")\n"; + info.fail(ss.str(), curr, getFunction()); + } + curr->type = oldType; + } + } else { + // Stacky blocks cannot be typed in general without looking at their + // context, but we can at least ensure that their types agree with their + // contents. + assert(profile == IRProfile::Stacky); + auto* block = curr->cast(); + StackUtils::StackSignature sig(block->list.begin(), block->list.end()); + if (!sig.satisfies(Signature(Type::none, block->type))) { std::ostringstream ss; ss << "stale type found in " << scope << " on " << curr - << "\n(marked as " << oldType << ", should be " << newType + << "\n(marked as " << block->type << ", should be " << sig.results << ")\n"; info.fail(ss.str(), curr, getFunction()); } - curr->type = oldType; } + // check if a node is a duplicate - expressions must not be seen more than // once bool inserted; From a74da492b57175491f26409c857b912d4115469d Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 1 Jul 2020 13:45:37 -0700 Subject: [PATCH 23/27] Cleanup code organization --- src/ir/CMakeLists.txt | 2 +- src/ir/ReFinalize.cpp | 2 +- src/ir/{StackUtils.cpp => stack-utils.cpp} | 123 ++++++++++++++++++++- src/ir/stack-utils.h | 11 +- src/passes/StackRemoveBlocks.cpp | 1 - src/passes/Stackify.cpp | 120 +------------------- src/passes/StackifyDrops.cpp | 6 +- src/passes/StackifyLocals.cpp | 4 +- src/wasm-interpreter.h | 4 +- src/wasm/wasm-validator.cpp | 2 +- src/wasm/wasm.cpp | 2 +- 11 files changed, 142 insertions(+), 135 deletions(-) rename src/ir/{StackUtils.cpp => stack-utils.cpp} (61%) diff --git a/src/ir/CMakeLists.txt b/src/ir/CMakeLists.txt index d9a36588f40..5555ef98bbc 100644 --- a/src/ir/CMakeLists.txt +++ b/src/ir/CMakeLists.txt @@ -4,7 +4,7 @@ set(ir_SOURCES ExpressionManipulator.cpp LocalGraph.cpp ReFinalize.cpp - StackUtils.cpp + stack-utils.cpp ${ir_HEADERS} ) add_library(ir OBJECT ${ir_SOURCES}) diff --git a/src/ir/ReFinalize.cpp b/src/ir/ReFinalize.cpp index df417185048..c6b7ee76eb1 100644 --- a/src/ir/ReFinalize.cpp +++ b/src/ir/ReFinalize.cpp @@ -51,7 +51,7 @@ void ReFinalize::visitBlock(Block* curr) { curr->type = curr->list.back()->type; } else { assert(profile == IRProfile::Stacky); - StackUtils::StackSignature sig(curr->list.begin(), curr->list.end()); + StackSignature sig(curr->list.begin(), curr->list.end()); curr->type = sig.results; if (curr->type == Type::none) { curr->type = sig.unreachable ? Type::unreachable : Type::none; diff --git a/src/ir/StackUtils.cpp b/src/ir/stack-utils.cpp similarity index 61% rename from src/ir/StackUtils.cpp rename to src/ir/stack-utils.cpp index 2b91132fd1c..5db1b49f21d 100644 --- a/src/ir/StackUtils.cpp +++ b/src/ir/stack-utils.cpp @@ -49,6 +49,127 @@ void compact(Block* block) { block->list.resize(newIndex); } +void lowerUnreachables(Function* func, Module* module) { + struct UnreachableLowerer + : Walker> { + // Implementation details + static void scan(UnreachableLowerer* self, Expression** currp) { + // Visit only control flow children, since we know all other children will + // be pops and we are not interested in those. Also pre-visit nodes + // instead of post-visiting them because type inference depends on parents + // being typed first. + Expression* curr = *currp; + switch (curr->_id) { + case Expression::Id::BlockId: { + // Visit children only through the first unreachable, since any + // further children will be discarded before we could visit them. + for (auto*& child : curr->cast()->list) { + self->pushTask(scan, &child); + if (child->type == Type::unreachable) { + break; + } + } + self->pushTask(doVisitBlock, currp); + break; + } + case Expression::Id::IfId: + self->maybePushTask(scan, &curr->cast()->ifFalse); + self->pushTask(scan, &curr->cast()->ifTrue); + self->pushTask(doVisitIf, currp); + break; + case Expression::Id::LoopId: + self->pushTask(scan, &curr->cast()->body); + self->pushTask(doVisitLoop, currp); + break; + case Expression::Id::TryId: + self->pushTask(scan, &curr->cast()->catchBody); + self->pushTask(scan, &curr->cast()->body); + self->pushTask(scan, &curr->cast()->body); + break; + default: + break; + } + } + void visitBlock(Block* curr) { + if (curr->type == Type::unreachable) { + } + size_t numReachable = 0; + for (; numReachable < curr->list.size(); ++numReachable) { + if (curr->list[numReachable]->type == Type::unreachable) { + break; + } + } + if (numReachable == curr->list.size()) { + return; + } + // Keep the first unreachable child and nothing after it + curr->list.resize(numReachable + 1); + auto* unreachable = curr->list[numReachable]; + // If it is control flow, figure out what type it should have + if (Properties::isControlFlowStructure(unreachable)) { + // If `unreachable` consumes any values (besides an i32 if it is an If) + // or if it produces multiple values and multivalue is not enabled, then + // append an `unreachable` to the list after all to make the typing work + // out. Otherwise, update the type accordingly. + // TODO: Implement control flow input types for use here. + StackFlow flow(curr); + auto sig = flow.getSignature(unreachable); + bool needsExtraUnreachable; + if (unreachable->is()) { + needsExtraUnreachable = sig.params != Type::i32; + } else { + needsExtraUnreachable = sig.params != Type::none; + } + if (!getModule()->features.hasMultivalue() && sig.results.isMulti()) { + needsExtraUnreachable = true; + } + if (needsExtraUnreachable) { + curr->list.push_back(Builder(*getModule()).makeUnreachable()); + unreachable->type = Type::none; + } else { + unreachable->type = sig.results; + } + } + } + + void visitIf(If* curr) { + // The If type must be a supertype of the individual arm types, so just + // reuse it for both arms. This can save space in the type section. + curr->ifTrue->type = curr->type; + if (curr->ifFalse) { + curr->ifFalse->type = curr->type; + } + } + + void visitLoop(Loop* curr) { curr->body->type = curr->type; } + + void visitTry(Try* curr) { + curr->body->type = curr->type; + curr->catchBody->type = curr->type; + } + }; + + func->body->type = func->sig.results; + UnreachableLowerer().walkFunctionInModule(func, module); + +#ifndef NDEBUG + // Assert that only instructions that never return have unreachable type + struct AssertValidUnreachableTypes + : PostWalker> { + void visitExpression(Expression* curr) { + if (curr->type == Type::unreachable) { + assert(StackUtils::mayBeUnreachable(curr) && + "Unexpected unreachable instruction"); + } + } + } asserter; + asserter.walkFunction(func); +#endif // NDEBUG +} + +} // namespace StackUtils + StackSignature::StackSignature(Expression* expr) { params = Type::none; if (Properties::isControlFlowStructure(expr)) { @@ -212,6 +333,4 @@ StackSignature StackFlow::getSignature(Expression* expr) { return StackSignature(Type(params), Type(results), unreachable); } -} // namespace StackUtils - } // namespace wasm diff --git a/src/ir/stack-utils.h b/src/ir/stack-utils.h index beabdc7ca8a..537a26f211f 100644 --- a/src/ir/stack-utils.h +++ b/src/ir/stack-utils.h @@ -31,6 +31,15 @@ bool mayBeUnreachable(Expression* curr); // Iterate through `block` and remove nops. void compact(Block* block); +// Infers the proper WebAssembly types for unreachable control flow expressions +// and eliminates the redundant unreachable instructions generated by +// BinaryenIRWriter where possible. After unreachable lowering, the only +// expressions that still have Unreachable type are instructions that can never +// return. +void lowerUnreachables(Function* func, Module* module); + +} // namespace StackUtils + // Stack signatures are like regular function signatures, but they are used to // represent the stack parameters and results of arbitrary sequences of stacky // instructions. They have to record whether they cover an unreachable @@ -105,8 +114,6 @@ struct StackFlow { StackFlow(Block* curr); }; -} // namespace StackUtils - } // namespace wasm #endif // wasm_ir_stack_h diff --git a/src/passes/StackRemoveBlocks.cpp b/src/passes/StackRemoveBlocks.cpp index 8940a1c1935..d7c4e6ba081 100644 --- a/src/passes/StackRemoveBlocks.cpp +++ b/src/passes/StackRemoveBlocks.cpp @@ -37,7 +37,6 @@ struct StackRemoveBlocksPass insts.insert( insts.begin() + i, block->list.begin(), block->list.end()); curr->list.set(insts); - changed = true; continue; } } diff --git a/src/passes/Stackify.cpp b/src/passes/Stackify.cpp index 97870e70281..723fbb709df 100644 --- a/src/passes/Stackify.cpp +++ b/src/passes/Stackify.cpp @@ -53,124 +53,6 @@ struct Poppifier : PostWalker { } }; -// Utility class for inferring the proper WebAssembly types for unreachable -// control flow expressions and eliminating the redundant unreachable -// instructions generated by BinaryenIRWriter. After unreachable lowering, the -// only expressions that still have Unreachable type are Unreachables. -struct UnreachableLowerer - : Walker> { - // Visit only control flow children, since we know all other children will - // be pops and we are not interested in those. Also pre-visit nodes instead - // of post-visiting them because type inference depends on parents being - // typed first. - static void scan(UnreachableLowerer* self, Expression** currp) { - Expression* curr = *currp; - switch (curr->_id) { - case Expression::Id::BlockId: { - // Visit children only through the first unreachable, since any further - // children will be discarded before we could visit them. - for (auto*& child : curr->cast()->list) { - self->pushTask(scan, &child); - if (child->type == Type::unreachable) { - break; - } - } - self->pushTask(doVisitBlock, currp); - break; - } - case Expression::Id::IfId: - self->maybePushTask(scan, &curr->cast()->ifFalse); - self->pushTask(scan, &curr->cast()->ifTrue); - self->pushTask(doVisitIf, currp); - break; - case Expression::Id::LoopId: - self->pushTask(scan, &curr->cast()->body); - self->pushTask(doVisitLoop, currp); - break; - case Expression::Id::TryId: - self->pushTask(scan, &curr->cast()->catchBody); - self->pushTask(scan, &curr->cast()->body); - self->pushTask(scan, &curr->cast()->body); - break; - default: - break; - } - } - void visitBlock(Block* curr) { - if (curr->type == Type::unreachable) { - } - size_t numReachable = 0; - for (; numReachable < curr->list.size(); ++numReachable) { - if (curr->list[numReachable]->type == Type::unreachable) { - break; - } - } - if (numReachable == curr->list.size()) { - return; - } - // Keep the first unreachable child and nothing after it - curr->list.resize(numReachable + 1); - auto* unreachable = curr->list[numReachable]; - // If it is control flow, figure out what type it should have - if (Properties::isControlFlowStructure(unreachable)) { - // If `unreachable` consumes any values (besides an i32 if it is an If) or - // if it produces multiple values and multivalue is not enabled, then - // append an `unreachable` to the list after all to make the typing work - // out. Otherwise, update the type accordingly. - // TODO: Implement control flow input types for use here. - StackUtils::StackFlow flow(curr); - auto sig = flow.getSignature(unreachable); - bool needsExtraUnreachable; - if (unreachable->is()) { - needsExtraUnreachable = sig.params != Type::i32; - } else { - needsExtraUnreachable = sig.params != Type::none; - } - if (!getModule()->features.hasMultivalue() && sig.results.isMulti()) { - needsExtraUnreachable = true; - } - if (needsExtraUnreachable) { - curr->list.push_back(Builder(*getModule()).makeUnreachable()); - unreachable->type = Type::none; - } else { - unreachable->type = sig.results; - } - } - } - void visitIf(If* curr) { - // The If type must be a supertype of the individual arm types, so just - // reuse it for both arms. This can save space in the type section. - curr->ifTrue->type = curr->type; - if (curr->ifFalse) { - curr->ifFalse->type = curr->type; - } - } - void visitLoop(Loop* curr) { curr->body->type = curr->type; } - void visitTry(Try* curr) { - curr->body->type = curr->type; - curr->catchBody->type = curr->type; - } - // The public entry point of this class - void lower(Function* func, Module* module) { - func->body->type = func->sig.results; - walkFunctionInModule(func, module); -#ifndef NDEBUG - // Assert that only instructions that never return have unreachable type - struct AssertValidUnreachableTypes - : PostWalker> { - void visitExpression(Expression* curr) { - if (curr->type == Type::unreachable) { - assert(StackUtils::mayBeUnreachable(curr) && - "Unexpected unreachable instruction"); - } - } - } asserter; - asserter.walkFunction(func); -#endif // NDEBUG - } -}; - struct Scope { enum Kind { Func, Block, Loop, If, Else, Try, Catch } kind; std::vector instrs; @@ -354,7 +236,7 @@ void Stackifier::emitFunctionEnd() { // We are done stackifying the structure. Now eliminate redundant unreachable // instructions and lower unreachable types away. - UnreachableLowerer().lower(func, module); + StackUtils::lowerUnreachables(func, module); } void Stackifier::emitUnreachable() { diff --git a/src/passes/StackifyDrops.cpp b/src/passes/StackifyDrops.cpp index 269480fe4c3..e673d5ce8d5 100644 --- a/src/passes/StackifyDrops.cpp +++ b/src/passes/StackifyDrops.cpp @@ -27,11 +27,11 @@ struct StackifyDropsPass : public WalkerPass> { Pass* create() override { return new StackifyDropsPass; } void visitBlock(Block* curr) { - StackUtils::StackFlow flow(curr); + StackFlow flow(curr); for (auto* expr : curr->list) { auto& dests = flow.dests[expr]; - bool unused = std::all_of( - dests.begin(), dests.end(), [](StackUtils::StackFlow::Location& loc) { + bool unused = + std::all_of(dests.begin(), dests.end(), [](StackFlow::Location& loc) { assert(loc.expr != nullptr); return loc.unreachable || loc.expr->is(); }); diff --git a/src/passes/StackifyLocals.cpp b/src/passes/StackifyLocals.cpp index 260ce9397fe..a437129c21b 100644 --- a/src/passes/StackifyLocals.cpp +++ b/src/passes/StackifyLocals.cpp @@ -57,8 +57,8 @@ struct StackifyLocalsPass : public WalkerPass> { } auto setIndex = it->second; // Check that the intervening instructions are stack neutral - StackUtils::StackSignature sig(curr->list.begin() + setIndex + 1, - curr->list.begin() + i); + StackSignature sig(curr->list.begin() + setIndex + 1, + curr->list.begin() + i); if (sig.params != Type::none || sig.results != Type::none) { continue; } diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 76a23484fcf..fdc9b9e3a6f 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -201,7 +201,7 @@ class ExpressionRunner : public OverriddenVisitor { // The stack size to restore after a control flow structure size_t restoreSize; if (isStacky() && Properties::isControlFlowStructure(curr)) { - StackUtils::StackSignature sig(curr); + StackSignature sig(curr); assert(valueStack.size() >= sig.params.size()); restoreSize = valueStack.size() - sig.params.size(); } @@ -216,7 +216,7 @@ class ExpressionRunner : public OverriddenVisitor { std::cerr << "\n"; #endif // STACKY_DEBUG // Reverse the popped values so they come out in the right order - size_t numChildren = StackUtils::StackSignature(curr).params.size(); + size_t numChildren = StackSignature(curr).params.size(); std::reverse(valueStack.end() - numChildren, valueStack.end()); } auto ret = OverriddenVisitor::visit(curr); diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index 0ddb70ace10..4d461666617 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -2125,7 +2125,7 @@ static void validateBinaryenIR(Module& wasm, ValidationInfo& info) { // contents. assert(profile == IRProfile::Stacky); auto* block = curr->cast(); - StackUtils::StackSignature sig(block->list.begin(), block->list.end()); + StackSignature sig(block->list.begin(), block->list.end()); if (!sig.satisfies(Signature(Type::none, block->type))) { std::ostringstream ss; ss << "stale type found in " << scope << " on " << curr diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index da73254e161..814aff53cda 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -260,7 +260,7 @@ static void finalizeNormalBlock(Block* block) { static void finalizeStackyBlock(Block* block) { // Similar rules apply to stacky blocks, although the type they produce // is not just the type of their last instruction. - StackUtils::StackSignature sig(block->list.begin(), block->list.end()); + StackSignature sig(block->list.begin(), block->list.end()); block->type = sig.results; if (block->type == Type::none) { block->type = sig.unreachable ? Type::unreachable : Type::none; From 5a92cd56cd97c84115af70dcfc51ca7d0a92162e Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 1 Jul 2020 15:16:07 -0700 Subject: [PATCH 24/27] Move lowerUnreachables to its own pass and use in binary writing --- src/ir/stack-utils.cpp | 143 +----------------------- src/ir/stack-utils.h | 8 -- src/passes/CMakeLists.txt | 1 + src/passes/LowerUnreachables.cpp | 180 +++++++++++++++++++++++++++++++ src/passes/StackDCE.cpp | 1 - src/passes/Stackify.cpp | 4 - src/passes/pass.cpp | 4 + src/passes/passes.h | 1 + src/wasm-stack.h | 3 + src/wasm/wasm-binary.cpp | 6 ++ 10 files changed, 198 insertions(+), 153 deletions(-) create mode 100644 src/passes/LowerUnreachables.cpp diff --git a/src/ir/stack-utils.cpp b/src/ir/stack-utils.cpp index 5db1b49f21d..14e8f9a95e8 100644 --- a/src/ir/stack-utils.cpp +++ b/src/ir/stack-utils.cpp @@ -20,25 +20,6 @@ namespace wasm { namespace StackUtils { -bool mayBeUnreachable(Expression* curr) { - switch (curr->_id) { - case Expression::Id::BreakId: - return curr->cast()->condition == nullptr; - case Expression::Id::CallId: - return curr->cast()->isReturn; - case Expression::Id::CallIndirectId: - return curr->cast()->isReturn; - case Expression::Id::ReturnId: - case Expression::Id::SwitchId: - case Expression::Id::UnreachableId: - case Expression::Id::ThrowId: - case Expression::Id::RethrowId: - return true; - default: - return false; - } -} - void compact(Block* block) { size_t newIndex = 0; for (size_t i = 0, size = block->list.size(); i < size; ++i) { @@ -49,125 +30,6 @@ void compact(Block* block) { block->list.resize(newIndex); } -void lowerUnreachables(Function* func, Module* module) { - struct UnreachableLowerer - : Walker> { - // Implementation details - static void scan(UnreachableLowerer* self, Expression** currp) { - // Visit only control flow children, since we know all other children will - // be pops and we are not interested in those. Also pre-visit nodes - // instead of post-visiting them because type inference depends on parents - // being typed first. - Expression* curr = *currp; - switch (curr->_id) { - case Expression::Id::BlockId: { - // Visit children only through the first unreachable, since any - // further children will be discarded before we could visit them. - for (auto*& child : curr->cast()->list) { - self->pushTask(scan, &child); - if (child->type == Type::unreachable) { - break; - } - } - self->pushTask(doVisitBlock, currp); - break; - } - case Expression::Id::IfId: - self->maybePushTask(scan, &curr->cast()->ifFalse); - self->pushTask(scan, &curr->cast()->ifTrue); - self->pushTask(doVisitIf, currp); - break; - case Expression::Id::LoopId: - self->pushTask(scan, &curr->cast()->body); - self->pushTask(doVisitLoop, currp); - break; - case Expression::Id::TryId: - self->pushTask(scan, &curr->cast()->catchBody); - self->pushTask(scan, &curr->cast()->body); - self->pushTask(scan, &curr->cast()->body); - break; - default: - break; - } - } - void visitBlock(Block* curr) { - if (curr->type == Type::unreachable) { - } - size_t numReachable = 0; - for (; numReachable < curr->list.size(); ++numReachable) { - if (curr->list[numReachable]->type == Type::unreachable) { - break; - } - } - if (numReachable == curr->list.size()) { - return; - } - // Keep the first unreachable child and nothing after it - curr->list.resize(numReachable + 1); - auto* unreachable = curr->list[numReachable]; - // If it is control flow, figure out what type it should have - if (Properties::isControlFlowStructure(unreachable)) { - // If `unreachable` consumes any values (besides an i32 if it is an If) - // or if it produces multiple values and multivalue is not enabled, then - // append an `unreachable` to the list after all to make the typing work - // out. Otherwise, update the type accordingly. - // TODO: Implement control flow input types for use here. - StackFlow flow(curr); - auto sig = flow.getSignature(unreachable); - bool needsExtraUnreachable; - if (unreachable->is()) { - needsExtraUnreachable = sig.params != Type::i32; - } else { - needsExtraUnreachable = sig.params != Type::none; - } - if (!getModule()->features.hasMultivalue() && sig.results.isMulti()) { - needsExtraUnreachable = true; - } - if (needsExtraUnreachable) { - curr->list.push_back(Builder(*getModule()).makeUnreachable()); - unreachable->type = Type::none; - } else { - unreachable->type = sig.results; - } - } - } - - void visitIf(If* curr) { - // The If type must be a supertype of the individual arm types, so just - // reuse it for both arms. This can save space in the type section. - curr->ifTrue->type = curr->type; - if (curr->ifFalse) { - curr->ifFalse->type = curr->type; - } - } - - void visitLoop(Loop* curr) { curr->body->type = curr->type; } - - void visitTry(Try* curr) { - curr->body->type = curr->type; - curr->catchBody->type = curr->type; - } - }; - - func->body->type = func->sig.results; - UnreachableLowerer().walkFunctionInModule(func, module); - -#ifndef NDEBUG - // Assert that only instructions that never return have unreachable type - struct AssertValidUnreachableTypes - : PostWalker> { - void visitExpression(Expression* curr) { - if (curr->type == Type::unreachable) { - assert(StackUtils::mayBeUnreachable(curr) && - "Unexpected unreachable instruction"); - } - } - } asserter; - asserter.walkFunction(func); -#endif // NDEBUG -} - } // namespace StackUtils StackSignature::StackSignature(Expression* expr) { @@ -314,8 +176,9 @@ StackFlow::StackFlow(Block* curr) { } // Set the block as the destination for its return values - assert(curr->type != Type::unreachable); - process(curr, StackSignature(curr->type, Type::none)); + bool unreachable = curr->type == Type::unreachable; + Type params = unreachable ? Type::none : curr->type; + process(curr, StackSignature(params, Type::none, unreachable)); } StackSignature StackFlow::getSignature(Expression* expr) { diff --git a/src/ir/stack-utils.h b/src/ir/stack-utils.h index 537a26f211f..ba960bc672b 100644 --- a/src/ir/stack-utils.h +++ b/src/ir/stack-utils.h @@ -25,17 +25,9 @@ namespace wasm { namespace StackUtils { -// Whether it is valid for `curr` to have unreachable type in Stacky IR. -bool mayBeUnreachable(Expression* curr); - // Iterate through `block` and remove nops. void compact(Block* block); -// Infers the proper WebAssembly types for unreachable control flow expressions -// and eliminates the redundant unreachable instructions generated by -// BinaryenIRWriter where possible. After unreachable lowering, the only -// expressions that still have Unreachable type are instructions that can never -// return. void lowerUnreachables(Function* func, Module* module); } // namespace StackUtils diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 6c7c0d92dea..4ca61a8486b 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -39,6 +39,7 @@ set(passes_SOURCES LocalCSE.cpp LogExecution.cpp LoopInvariantCodeMotion.cpp + LowerUnreachables.cpp MemoryPacking.cpp MergeBlocks.cpp MergeLocals.cpp diff --git a/src/passes/LowerUnreachables.cpp b/src/passes/LowerUnreachables.cpp new file mode 100644 index 00000000000..8bd415380cf --- /dev/null +++ b/src/passes/LowerUnreachables.cpp @@ -0,0 +1,180 @@ +/* + * Copyright 2020 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Infers the proper WebAssembly types for unreachable control flow expressions +// and eliminates the redundant unreachable instructions generated by +// BinaryenIRWriter where possible. After unreachable lowering, the only +// expressions that still have Unreachable type are instructions that can never +// return. + +#include "ir/stack-utils.h" +#include "pass.h" + +namespace wasm { + +namespace { + +// Whether it is valid for `curr` to be unreachable after lowering. +bool mayBeUnreachable(Expression* curr) { + switch (curr->_id) { + case Expression::Id::BreakId: + return curr->cast()->condition == nullptr; + case Expression::Id::CallId: + return curr->cast()->isReturn; + case Expression::Id::CallIndirectId: + return curr->cast()->isReturn; + case Expression::Id::ReturnId: + case Expression::Id::SwitchId: + case Expression::Id::UnreachableId: + case Expression::Id::ThrowId: + case Expression::Id::RethrowId: + return true; + default: + return false; + } +} + +struct LowerUnreachablesPass + : public WalkerPass> { + using Super = WalkerPass>; + bool isFunctionParallel() override { return true; } + Pass* create() override { return new LowerUnreachablesPass; } + + static void scan(LowerUnreachablesPass* self, Expression** currp) { + // Visit only control flow children, since we know all other children will + // be pops and we are not interested in those. Also pre-visit nodes instead + // of post-visiting them because type inference depends on parents being + // typed first. + Expression* curr = *currp; + switch (curr->_id) { + case Expression::Id::BlockId: { + // Visit children only through the first unreachable, since any further + // children will be discarded before we could visit them. + for (auto*& child : curr->cast()->list) { + self->pushTask(scan, &child); + if (child->type == Type::unreachable) { + break; + } + } + self->pushTask(doVisitBlock, currp); + break; + } + case Expression::Id::IfId: + self->maybePushTask(scan, &curr->cast()->ifFalse); + self->pushTask(scan, &curr->cast()->ifTrue); + self->pushTask(doVisitIf, currp); + break; + case Expression::Id::LoopId: + self->pushTask(scan, &curr->cast()->body); + self->pushTask(doVisitLoop, currp); + break; + case Expression::Id::TryId: + self->pushTask(scan, &curr->cast()->catchBody); + self->pushTask(scan, &curr->cast()->body); + self->pushTask(scan, &curr->cast()->body); + self->pushTask(doVisitTry, currp); + break; + default: + break; + } + } + + void visitBlock(Block* curr) { + size_t numReachable = 0; + for (; numReachable < curr->list.size(); ++numReachable) { + if (curr->list[numReachable]->type == Type::unreachable) { + break; + } + } + if (numReachable == curr->list.size()) { + return; + } + // Keep the first unreachable child and nothing after it + curr->list.resize(numReachable + 1); + auto* unreachable = curr->list[numReachable]; + // If it is control flow, figure out what type it should have + if (Properties::isControlFlowStructure(unreachable)) { + // If `unreachable` consumes any values (besides an i32 if it is an If) or + // if it produces multiple values and multivalue is not enabled, then + // append an `unreachable` to the list after all to make the typing work + // out. Otherwise, update the type accordingly. TODO: Implement control + // flow input types for use here. + StackFlow flow(curr); + auto sig = flow.getSignature(unreachable); + bool needsExtraUnreachable; + if (unreachable->is()) { + needsExtraUnreachable = sig.params != Type::i32; + } else { + needsExtraUnreachable = sig.params != Type::none; + } + if (!getModule()->features.hasMultivalue() && sig.results.isMulti()) { + needsExtraUnreachable = true; + } + if (needsExtraUnreachable) { + curr->list.push_back(Builder(*getModule()).makeUnreachable()); + unreachable->type = Type::none; + } else { + unreachable->type = sig.results; + } + } + } + + void visitIf(If* curr) { + // The If type must be a supertype of the individual arm types, so just + // reuse it for both arms. This can save space in the type section. + curr->ifTrue->type = curr->type; + if (curr->ifFalse) { + curr->ifFalse->type = curr->type; + } + } + + void visitLoop(Loop* curr) { curr->body->type = curr->type; } + + void visitTry(Try* curr) { + curr->body->type = curr->type; + curr->catchBody->type = curr->type; + } + + void doWalkFunction(Function* func) { + if (func->profile != IRProfile::Stacky) { + return; + } + + func->body->type = func->sig.results; + Super::doWalkFunction(func); + +#ifndef NDEBUG + // Assert that only instructions that never return have unreachable type + struct AssertValidUnreachableTypes + : PostWalker> { + void visitExpression(Expression* curr) { + if (curr->type == Type::unreachable) { + assert(mayBeUnreachable(curr) && + "Unexpected unreachable instruction"); + } + } + } asserter; + asserter.walkFunction(func); +#endif // NDEBUG + } +}; + +} // anonymous namespace + +Pass* createLowerUnreachablesPass() { return new LowerUnreachablesPass; } + +} // namespace wasm diff --git a/src/passes/StackDCE.cpp b/src/passes/StackDCE.cpp index a484d3464eb..96b2e6e5754 100644 --- a/src/passes/StackDCE.cpp +++ b/src/passes/StackDCE.cpp @@ -29,7 +29,6 @@ struct StackDCEPass : public WalkerPass> { void visitBlock(Block* curr) { for (size_t i = 0, size = curr->list.size(); i < size; ++i) { if (curr->list[i]->type == Type::unreachable) { - assert(!Properties::isControlFlowStructure(curr->list[i])); curr->list.resize(i + 1); return; } diff --git a/src/passes/Stackify.cpp b/src/passes/Stackify.cpp index 723fbb709df..ed4a64da3af 100644 --- a/src/passes/Stackify.cpp +++ b/src/passes/Stackify.cpp @@ -233,10 +233,6 @@ void Stackifier::emitFunctionEnd() { auto& scope = scopeStack.back(); assert(scope.kind == Scope::Func); patchInstrs(func->body); - - // We are done stackifying the structure. Now eliminate redundant unreachable - // instructions and lower unreachable types away. - StackUtils::lowerUnreachables(func, module); } void Stackifier::emitUnreachable() { diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index a2d62372594..42f6214899c 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -169,6 +169,10 @@ void PassRegistry::registerPasses() { registerPass("log-execution", "instrument the build with logging of where execution goes", createLogExecutionPass); + registerPass("lower-unreachables", + "perform type inference to remove unreachable types from a " + "stackified module", + createLowerUnreachablesPass); registerPass("i64-to-i32-lowering", "lower all uses of i64s to use i32s instead", createI64ToI32LoweringPass); diff --git a/src/passes/passes.h b/src/passes/passes.h index 1fd6ae28dc6..b78e5e2d019 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -59,6 +59,7 @@ Pass* createLegalizeJSInterfaceMinimallyPass(); Pass* createLimitSegmentsPass(); Pass* createLocalCSEPass(); Pass* createLogExecutionPass(); +Pass* createLowerUnreachablesPass(); Pass* createInstrumentLocalsPass(); Pass* createInstrumentMemoryPass(); Pass* createLoopInvariantCodeMotionPass(); diff --git a/src/wasm-stack.h b/src/wasm-stack.h index ca95efebaea..eae669e3b9c 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -802,6 +802,9 @@ template void BinaryenIRWriter::visitTupleMake(TupleMake* curr) { for (auto* operand : curr->operands) { visit(operand); + if (operand->type == Type::unreachable) { + break; + } } emit(curr); if (curr->type == Type::unreachable) { diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index ddc4de36c22..631af1b3030 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -18,6 +18,7 @@ #include #include "ir/module-utils.h" +#include "ir/stack-utils.h" #include "support/bits.h" #include "support/debug.h" #include "wasm-binary.h" @@ -29,6 +30,11 @@ namespace wasm { void WasmBinaryWriter::prepare() { + // Perform type inference to remove `unreachable` types from stacky IR + PassRunner runner(wasm); + runner.add("lower-unreachables"); + runner.run(); + // Collect function types and their frequencies. Collect information in each // function in parallel, then merge. ModuleUtils::collectSignatures(*wasm, types, typeIndices); From 1220f9e96dcaafa2653ae84ffdc0ddd4197d268a Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Mon, 17 Aug 2020 11:28:23 -0700 Subject: [PATCH 25/27] Reintroduce stacky refinalize --- src/ir/ReFinalize.cpp | 11 ++++------ src/ir/utils.h | 23 +++++++++++++++++---- src/passes/ReReloop.cpp | 2 +- src/passes/StackDCE.cpp | 9 +++++++++ src/passes/Stackify.cpp | 1 + src/wasm/wasm-validator.cpp | 3 ++- src/wasm/wasm.cpp | 40 ++++++++++++++++++------------------- 7 files changed, 55 insertions(+), 34 deletions(-) diff --git a/src/ir/ReFinalize.cpp b/src/ir/ReFinalize.cpp index c6b7ee76eb1..28f986b6aee 100644 --- a/src/ir/ReFinalize.cpp +++ b/src/ir/ReFinalize.cpp @@ -47,15 +47,12 @@ void ReFinalize::visitBlock(Block* curr) { } // Get the least upper bound type of the last element and all branch return // values - if (profile == IRProfile::Normal) { + if (getProfile() == IRProfile::Normal) { curr->type = curr->list.back()->type; } else { - assert(profile == IRProfile::Stacky); + assert(getProfile() == IRProfile::Stacky); StackSignature sig(curr->list.begin(), curr->list.end()); - curr->type = sig.results; - if (curr->type == Type::none) { - curr->type = sig.unreachable ? Type::unreachable : Type::none; - } + curr->type = sig.unreachable ? Type::unreachable : sig.results; } if (curr->name.is()) { auto iter = breakValues.find(curr->name); @@ -68,7 +65,7 @@ void ReFinalize::visitBlock(Block* curr) { return; } // type is none, but we might be unreachable - if (curr->type == Type::none && profile == IRProfile::Normal) { + if (curr->type == Type::none && getProfile() == IRProfile::Normal) { for (auto* child : curr->list) { if (child->type == Type::unreachable) { curr->type = Type::unreachable; diff --git a/src/ir/utils.h b/src/ir/utils.h index d0cfd660487..4e7538a84bb 100644 --- a/src/ir/utils.h +++ b/src/ir/utils.h @@ -102,14 +102,29 @@ struct ReFinalize : public WalkerPass>> { bool isFunctionParallel() override { return true; } - Pass* create() override { return new ReFinalize; } + bool hasDefaultProfile; + IRProfile defaultProfile; - IRProfile profile; - - ReFinalize(IRProfile profile = IRProfile::Normal) : profile(profile) { + ReFinalize() : hasDefaultProfile(false) { name = "refinalize"; } + ReFinalize(IRProfile profile) + : hasDefaultProfile(true), defaultProfile(profile) { name = "refinalize"; } + Pass* create() override { + return hasDefaultProfile ? new ReFinalize(defaultProfile) : new ReFinalize; + } + + IRProfile getProfile() { + if (auto* func = getFunction()) { + return func->profile; + } else if (hasDefaultProfile) { + return defaultProfile; + } else { + WASM_UNREACHABLE("Cannot determine IR profile to use"); + } + } + // block finalization is O(bad) if we do each block by itself, so do it in // bulk, tracking break value types so we just do a linear pass diff --git a/src/passes/ReReloop.cpp b/src/passes/ReReloop.cpp index 004e922edc2..3e386444eb7 100644 --- a/src/passes/ReReloop.cpp +++ b/src/passes/ReReloop.cpp @@ -364,7 +364,7 @@ struct ReReloop final : public Pass { } } // TODO: should this be in the relooper itself? - ReFinalize().walk(function->body); + ReFinalize(function->profile).walk(function->body); } }; diff --git a/src/passes/StackDCE.cpp b/src/passes/StackDCE.cpp index 96b2e6e5754..e88edc1aeca 100644 --- a/src/passes/StackDCE.cpp +++ b/src/passes/StackDCE.cpp @@ -25,15 +25,24 @@ namespace wasm { struct StackDCEPass : public WalkerPass> { bool isFunctionParallel() override { return true; } Pass* create() override { return new StackDCEPass; } + bool changed = false; void visitBlock(Block* curr) { for (size_t i = 0, size = curr->list.size(); i < size; ++i) { if (curr->list[i]->type == Type::unreachable) { curr->list.resize(i + 1); + changed = true; return; } } } + + void doWalkFunction(Function* func) { + super::doWalkFunction(func); + if (changed) { + ReFinalize().walkFunctionInModule(func, getModule()); + } + } }; Pass* createStackDCEPass() { return new StackDCEPass; } diff --git a/src/passes/Stackify.cpp b/src/passes/Stackify.cpp index ed4a64da3af..fe724eaafe1 100644 --- a/src/passes/Stackify.cpp +++ b/src/passes/Stackify.cpp @@ -364,6 +364,7 @@ class StackifyPass : public Pass { lowerTupleGlobals(module); PassRunner subRunner(runner); subRunner.add(std::make_unique()); + subRunner.add(std::make_unique()); subRunner.run(); } diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index 4d461666617..0cb2a536153 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -2097,7 +2097,8 @@ static void validateBinaryenIR(Module& wasm, ValidationInfo& info) { // check if a node type is 'stale', i.e., we forgot to finalize() the // node. auto profile = getFunction() ? getFunction()->profile : IRProfile::Normal; - if (profile == IRProfile::Normal || !curr->is()) { + if (profile == IRProfile::Normal || !curr->is() || + curr->type == Type::unreachable) { auto oldType = curr->type; ReFinalizeNode(profile).visit(curr); auto newType = curr->type; diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index 814aff53cda..4f41bf13d35 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -258,13 +258,13 @@ static void finalizeNormalBlock(Block* block) { } static void finalizeStackyBlock(Block* block) { - // Similar rules apply to stacky blocks, although the type they produce - // is not just the type of their last instruction. + // Similar rules apply to stacky blocks, although the type they produce is not + // just the type of their last instruction. Stacky blocks can also be + // unreachable even they produce a concrete value after the unreachable + // instruction because unreachable stacky blocks will have their types + // inferred before binary writing. StackSignature sig(block->list.begin(), block->list.end()); - block->type = sig.results; - if (block->type == Type::none) { - block->type = sig.unreachable ? Type::unreachable : Type::none; - } + block->type = sig.unreachable ? Type::unreachable : sig.results; } struct TypeSeeker : public PostWalker { @@ -305,12 +305,15 @@ struct TypeSeeker : public PostWalker { void visitBlock(Block* curr) { if (curr == target) { if (profile == IRProfile::Normal) { - finalizeNormalBlock(curr); + if (curr->list.size() > 0) { + types.push_back(curr->list.back()->type); + } else { + types.push_back(Type::none); + } } else { - assert(profile == IRProfile::Stacky); - finalizeStackyBlock(curr); + StackSignature sig(curr->list.begin(), curr->list.end()); + types.push_back(sig.results); } - types.push_back(curr->type); } else if (curr->name == targetName) { // ignore all breaks til now, they were captured by someone with the same // name @@ -332,6 +335,7 @@ struct TypeSeeker : public PostWalker { // a block is unreachable if one of its elements is unreachable, // and there are no branches to it static void handleUnreachable(Block* block, + IRProfile profile, bool breakabilityKnown = false, bool hasBreak = false) { if (block->type == Type::unreachable) { @@ -342,8 +346,8 @@ static void handleUnreachable(Block* block, } // if we are concrete, stop - even an unreachable child // won't change that (since we have a break with a value, - // or the final child flows out a value) - if (block->type.isConcrete()) { + // or the final child flows out a value). + if (block->type.isConcrete() && profile == IRProfile::Normal) { return; } // look for an unreachable child @@ -366,9 +370,7 @@ void Block::finalize(IRProfile profile) { if (name.is()) { TypeSeeker seeker(this, this->name, profile); type = Type::mergeTypes(seeker.types); - if (profile == IRProfile::Normal) { - handleUnreachable(this); - } + handleUnreachable(this, profile); } else { // nothing branches here, so this is easy switch (profile) { @@ -386,16 +388,12 @@ void Block::finalize(IRProfile profile) { void Block::finalize(Type type_, IRProfile profile) { type = type_; - if (type == Type::none && list.size() > 0 && profile == IRProfile::Normal) { - handleUnreachable(this); - } + handleUnreachable(this, profile); } void Block::finalize(Type type_, bool hasBreak, IRProfile profile) { type = type_; - if (type == Type::none && list.size() > 0 && profile == IRProfile::Normal) { - handleUnreachable(this, true, hasBreak); - } + handleUnreachable(this, profile, true, hasBreak); } void If::finalize(Type type_) { From 695e0edcb176059bfb3f45873769b084430dde9b Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Mon, 17 Aug 2020 14:22:04 -0700 Subject: [PATCH 26/27] Re-enable stackify-future size comparison --- scripts/fuzz_opt.py | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 8a6d38e7fd7..0f5c958546c 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -691,29 +691,31 @@ def do_asyncify(wasm): def can_run_on_feature_opts(self, feature_opts): return all([x in feature_opts for x in ['--disable-exception-handling', '--disable-simd', '--disable-tail-call', '--disable-reference-types', '--disable-multivalue']]) + class StackifyCodeSizeChecker(TestCaseHandler): frequency = 1.0 def handle(self, wasm): - stack_ir = 'stackir.wasm' - stackify = 'stackify.wasm' - stackify_future = 'stackify-future.wasm' - - stack_ir_command = [in_bin('wasm-opt'), wasm, '--generate-stack-ir', '--optimize-stack-ir', '--optimize-level=3', '-o', stack_ir] - stackify_command = [in_bin('wasm-opt'), wasm, '--stackify', '--stack-dce', '--stackify-locals', '--stack-remove-blocks', '--stack-dce', '--coalesce-locals', '--optimize-level=3', '-o', stackify] - # stackify_future_command = [in_bin('wasm-opt'), wasm, '--stackify', '--stack-dce', '--stackify-locals', '--stack-remove-blocks', '--stack-dce', '--optimize-level=3'] - - run(stack_ir_command, {'BINARYEN_USE_STACKIFY': '0'}) - run(stackify_command, {'BINARYEN_USE_STACKIFY': '1'}) - # run(stackify_future_command, {'BINARYEN_USE_STACKIFY': '2'}) - - stack_ir_size = os.stat(stack_ir).st_size - stackify_size = os.stat(stackify).st_size - # stackify_future_size = os.stat(stackify_future).st_size - - print("sizes:", stack_ir_size, stackify_size) - assert stackify_size <= stack_ir_size, "Stackify not as good as Stack IR" - # assert stackify_future_size <= stackify_size, "New pipeline is not as good" + stack_ir = 'stackir.wasm' + stackify = 'stackify.wasm' + stackify_future = 'stackify-future.wasm' + + command_base = [in_bin('wasm-opt'), wasm] + FEATURE_OPTS + stack_ir_command = command_base + ['--generate-stack-ir', '--optimize-stack-ir', '--optimize-level=3', '-o', stack_ir] + stackify_command = command_base + ['--stackify', '--stack-dce', '--stackify-locals', '--stack-remove-blocks', '--stack-dce', '--coalesce-locals', '--optimize-level=3', '-o', stackify] + stackify_future_command = command_base + ['--stackify', '--stack-dce', '--stackify-locals', '--stack-remove-blocks', '--stack-dce', '--coalesce-locals', '--optimize-level=3', '-o', stackify_future] + + run(stack_ir_command, {'BINARYEN_USE_STACKIFY': '0'}) + run(stackify_command, {'BINARYEN_USE_STACKIFY': '1'}) + run(stackify_future_command, {'BINARYEN_USE_STACKIFY': '2'}) + + stack_ir_size = os.stat(stack_ir).st_size + stackify_size = os.stat(stackify).st_size + stackify_future_size = os.stat(stackify_future).st_size + + print("sizes:", stack_ir_size, stackify_size, stackify_future_size) + assert stackify_size <= stack_ir_size, "Stackify not as good as Stack IR" + assert stackify_future_size <= stackify_size, "New pipeline is not as good" # The global list of all test case handlers From 6a9709bddbfba586efe12770227047dfe2c1dbe8 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Mon, 17 Aug 2020 19:08:49 -0700 Subject: [PATCH 27/27] Fix subtle ReFinalize bug --- src/ir/ReFinalize.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/ir/ReFinalize.cpp b/src/ir/ReFinalize.cpp index 28f986b6aee..3603423ff7f 100644 --- a/src/ir/ReFinalize.cpp +++ b/src/ir/ReFinalize.cpp @@ -45,6 +45,11 @@ void ReFinalize::visitBlock(Block* curr) { curr->type = Type::none; return; } + + // Stacky blocks can be unreachable even if they yield concrete values as long + // as they are not branch targets as well. + bool unreachableIfNotBranchTarget = false; + // Get the least upper bound type of the last element and all branch return // values if (getProfile() == IRProfile::Normal) { @@ -52,7 +57,10 @@ void ReFinalize::visitBlock(Block* curr) { } else { assert(getProfile() == IRProfile::Stacky); StackSignature sig(curr->list.begin(), curr->list.end()); - curr->type = sig.unreachable ? Type::unreachable : sig.results; + curr->type = (sig.results == Type::none && sig.unreachable) + ? Type::unreachable + : sig.results; + unreachableIfNotBranchTarget = sig.unreachable; } if (curr->name.is()) { auto iter = breakValues.find(curr->name); @@ -61,6 +69,9 @@ void ReFinalize::visitBlock(Block* curr) { return; } } + if (unreachableIfNotBranchTarget) { + curr->type = Type::unreachable; + } if (curr->type == Type::unreachable) { return; }