-
Notifications
You must be signed in to change notification settings - Fork 839
StackSignature LUBs #3543
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
StackSignature LUBs #3543
Changes from all commits
b0640a8
6164c59
1911ae1
26a629e
0a5bde0
d72ba69
abfe007
de06f9e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -83,48 +83,6 @@ bool StackSignature::composes(const StackSignature& next) const { | |
| }); | ||
| } | ||
|
|
||
| bool StackSignature::satisfies(Signature sig) const { | ||
| if (sig.params.size() < params.size() || | ||
| sig.results.size() < results.size()) { | ||
| // Not enough values provided or too many produced | ||
| return false; | ||
| } | ||
| bool paramSuffixMatches = | ||
| std::equal(params.begin(), | ||
| params.end(), | ||
| sig.params.end() - params.size(), | ||
| [](const Type& consumed, const Type& provided) { | ||
| return Type::isSubType(provided, consumed); | ||
| }); | ||
| if (!paramSuffixMatches) { | ||
| return false; | ||
| } | ||
| bool resultSuffixMatches = | ||
| std::equal(results.begin(), | ||
| results.end(), | ||
| sig.results.end() - results.size(), | ||
| [](const Type& produced, const Type& expected) { | ||
| return Type::isSubType(produced, expected); | ||
| }); | ||
| if (!resultSuffixMatches) { | ||
| return false; | ||
| } | ||
| if (unreachable) { | ||
| // The unreachable can consume any additional provided params and produce | ||
| // any additional expected results, so we are done. | ||
| return true; | ||
| } | ||
| // Any additional provided params will pass through untouched, so they must be | ||
| // equivalent to the additional produced results. | ||
| return std::equal(sig.params.begin(), | ||
| sig.params.end() - params.size(), | ||
| sig.results.begin(), | ||
| sig.results.end() - results.size(), | ||
| [](const Type& produced, const Type& expected) { | ||
| return Type::isSubType(produced, expected); | ||
| }); | ||
| } | ||
|
|
||
| StackSignature& StackSignature::operator+=(const StackSignature& next) { | ||
| assert(composes(next)); | ||
| std::vector<Type> stack(results.begin(), results.end()); | ||
|
|
@@ -160,6 +118,140 @@ StackSignature StackSignature::operator+(const StackSignature& next) const { | |
| return sig; | ||
| } | ||
|
|
||
| bool StackSignature::isSubType(StackSignature a, StackSignature b) { | ||
| if (a.params.size() > b.params.size() || | ||
| a.results.size() > b.results.size()) { | ||
| // `a` consumes or produces more values than `b` can provides or expects. | ||
| return false; | ||
| } | ||
| if (b.unreachable && !a.unreachable) { | ||
| // Non-polymorphic sequences cannot be typed as being polymorphic. | ||
| return false; | ||
| } | ||
| bool paramSuffixMatches = | ||
| std::equal(a.params.begin(), | ||
| a.params.end(), | ||
| b.params.end() - a.params.size(), | ||
| [](const Type& consumed, const Type& provided) { | ||
| return Type::isSubType(provided, consumed); | ||
| }); | ||
| if (!paramSuffixMatches) { | ||
| return false; | ||
| } | ||
| bool resultSuffixMatches = | ||
| std::equal(a.results.begin(), | ||
| a.results.end(), | ||
| b.results.end() - a.results.size(), | ||
| [](const Type& produced, const Type& expected) { | ||
| return Type::isSubType(produced, expected); | ||
| }); | ||
| if (!resultSuffixMatches) { | ||
| return false; | ||
| } | ||
| if (a.unreachable) { | ||
| // The unreachable can consume any additional provided params and produce | ||
| // any additional expected results, so we are done. | ||
| return true; | ||
| } | ||
| // Any additional provided params will pass through untouched, so they must be | ||
| // compatible with the additional produced results. | ||
| return std::equal(b.params.begin(), | ||
| b.params.end() - a.params.size(), | ||
| b.results.begin(), | ||
| b.results.end() - a.results.size(), | ||
| [](const Type& provided, const Type& expected) { | ||
| return Type::isSubType(provided, expected); | ||
| }); | ||
| } | ||
|
|
||
| bool StackSignature::haveLeastUpperBound(StackSignature a, StackSignature b) { | ||
| // If a signature is unreachable, the LUB could extend its params and results | ||
| // arbitrarily. Otherwise, the LUB must extend its params and results | ||
| // uniformly so that each additional param is a subtype of the corresponding | ||
| // additional result. | ||
| auto extensionCompatible = [](auto self, auto other) -> bool { | ||
| if (self.unreachable) { | ||
| return true; | ||
| } | ||
| // If no extension, then no problem. | ||
| if (self.params.size() >= other.params.size() && | ||
| self.results.size() >= other.results.size()) { | ||
| return true; | ||
| } | ||
| auto extSize = other.params.size() - self.params.size(); | ||
| if (extSize != other.results.size() - self.results.size()) { | ||
| return false; | ||
| } | ||
| return std::equal(other.params.begin(), | ||
| other.params.begin() + extSize, | ||
| other.results.begin(), | ||
| other.results.begin() + extSize, | ||
| [](const Type& param, const Type& result) { | ||
| return Type::isSubType(param, result); | ||
| }); | ||
| }; | ||
| if (!extensionCompatible(a, b) || !extensionCompatible(b, a)) { | ||
| return false; | ||
| } | ||
|
|
||
| auto valsCompatible = [](auto as, auto bs, auto compatible) -> bool { | ||
| // Canonicalize so the as are shorter and any unshared prefix is on bs. | ||
| if (bs.size() < as.size()) { | ||
| std::swap(as, bs); | ||
| } | ||
| // Check that shared values after the unshared prefix have are compatible. | ||
| size_t diff = bs.size() - as.size(); | ||
| for (size_t i = 0, shared = as.size(); i < shared; ++i) { | ||
| if (!compatible(as[i], bs[i + diff])) { | ||
| return false; | ||
| } | ||
| } | ||
| return true; | ||
| }; | ||
|
|
||
| bool paramsOk = valsCompatible(a.params, b.params, [](Type a, Type b) { | ||
| assert(a == b && "TODO: calculate greatest lower bound to handle " | ||
| "contravariance correctly"); | ||
| return true; | ||
| }); | ||
| bool resultsOk = valsCompatible(a.results, b.results, [](Type a, Type b) { | ||
| return Type::getLeastUpperBound(a, b) != Type::none; | ||
| }); | ||
| return paramsOk && resultsOk; | ||
| } | ||
|
|
||
| StackSignature StackSignature::getLeastUpperBound(StackSignature a, | ||
| StackSignature b) { | ||
| assert(haveLeastUpperBound(a, b)); | ||
|
|
||
| auto combineVals = [](auto as, auto bs, auto combine) -> std::vector<Type> { | ||
| // Canonicalize so the as are shorter and any unshared prefix is on bs. | ||
| if (bs.size() < as.size()) { | ||
| std::swap(as, bs); | ||
| } | ||
| // Combine shared values after the unshared prefix. | ||
| size_t diff = bs.size() - as.size(); | ||
| std::vector<Type> vals(bs.begin(), bs.begin() + diff); | ||
| for (size_t i = 0, shared = as.size(); i < shared; ++i) { | ||
| vals.push_back(combine(as[i], bs[i + diff])); | ||
| } | ||
| return vals; | ||
| }; | ||
|
|
||
| auto params = combineVals(a.params, b.params, [&](Type a, Type b) { | ||
| assert(a == b && "TODO: calculate greatest lower bound to handle " | ||
| "contravariance correctly"); | ||
| return a; | ||
| }); | ||
|
|
||
| auto results = combineVals(a.results, b.results, [&](Type a, Type b) { | ||
| return Type::getLeastUpperBound(a, b); | ||
| }); | ||
|
|
||
| return StackSignature{ | ||
| Type(params), Type(results), a.unreachable && b.unreachable}; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is the LUB unreachable when both of the signatures are unreachable?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Being unreachable means that more elements can be appended to the params or prepended to the results as necessary. If two stack signatures have this property, then their LUB would have this property as well. If either signature was not unreachable and could not be extended, then the LUB should not be able to be extended either. This would be important for calculating the LUB of more than two unreachable stack signatures so that the number of params and results is not "frozen" by whatever the first two signatures to be combined are.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm still not sure if I understand the concept of LUB here. In the usual typing sense LUB of two types is a supertype of the two but the lowest one in the type hierarchy. First I'm not sure how unreachability is considered in this type hierarchy, I could be wrong, but I feel, for the two signatures with the same type input and output [a]->[b], if one of them is unreachable, that means that type has more flexibility and it should be considered as a supertype of the other. But here it is possible that we have a LUB of two types that is not unreachable but one of those two child types can be unreachable. I'm not sure if I explained my confusion very well, but I'd appreciate a bit more explanation, maybe with an example.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here's the subtype relation on stack signatures, which answers the question "can an instruction sequence with the LHS stack signature be typed with the RHS stack signature?"
As an example of the first rule, consider the instruction sequence The most specific type you could give this sequence is For the second rule, consider this sequence: This instruction sequence has the specific type Note that a reachable stack signature (without a Also, Having written this out, I realize the implementation does not correctly handle the contravariance of the params. I will fix that, although it won't matter until we support block inputs. I also screwed up the parameter extension, which should be a prefix, not a suffix. |
||
| } | ||
|
|
||
| StackFlow::StackFlow(Block* block) { | ||
| // Encapsulates the logic for treating the block and its children | ||
| // uniformly. The end of the block is treated as if it consumed values | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -16,55 +16,7 @@ | |||||||
|
|
||||||||
| // | ||||||||
| // stack-utils.h: Utilities for manipulating and analyzing stack machine code in | ||||||||
| // the form of Poppy IR. | ||||||||
| // | ||||||||
| // Poppy IR represents stack machine code using normal Binaryen IR types by | ||||||||
| // imposing the following constraints: | ||||||||
| // | ||||||||
| // 1. Function bodies and children of control flow (except If conditions) must | ||||||||
| // be blocks. | ||||||||
| // | ||||||||
| // 2. Blocks may have any Expressions except for Pops as children. The sequence | ||||||||
| // of instructions in a block follows the same validation rules as in | ||||||||
| // WebAssembly. That means that any expression may have a concrete type, not | ||||||||
| // just the final expression in the block. | ||||||||
| // | ||||||||
| // 3. All other children must be Pops, which are used to determine the input | ||||||||
| // stack type of each instruction. Pops may not have `unreachable` type. | ||||||||
| // | ||||||||
| // 4. Only control flow structures and instructions that have polymorphic | ||||||||
| // unreachable behavior in WebAssembly may have unreachable type. Blocks may | ||||||||
| // be unreachable when they are not branch targets and when they have an | ||||||||
| // unreachable child. Note that this means a block may be unreachable even | ||||||||
| // if it would otherwise have a concrete type, unlike in Binaryen IR. | ||||||||
| // | ||||||||
| // For example, the following Binaryen IR Function: | ||||||||
| // | ||||||||
| // (func $foo (result i32) | ||||||||
| // (i32.add | ||||||||
| // (i32.const 42) | ||||||||
| // (i32.const 5) | ||||||||
| // ) | ||||||||
| // ) | ||||||||
| // | ||||||||
| // would look like this in Poppy IR: | ||||||||
| // | ||||||||
| // (func $foo (result i32) | ||||||||
| // (block | ||||||||
| // (i32.const 42) | ||||||||
| // (i32.const 5) | ||||||||
| // (i32.add | ||||||||
| // (i32.pop) | ||||||||
| // (i32.pop) | ||||||||
| // ) | ||||||||
| // ) | ||||||||
| // ) | ||||||||
| // | ||||||||
| // Notice that the sequence of instructions in the block is now identical to the | ||||||||
| // sequence of instructions in raw WebAssembly. Also note that Poppy IR's | ||||||||
| // validation rules are largely additional on top of the normal Binaryen IR | ||||||||
| // validation rules, with the only exceptions being block body validation and | ||||||||
| // block unreahchability rules. | ||||||||
| // the form of Poppy IR. See src/passes/Poppify.cpp for Poppy IR documentation. | ||||||||
| // | ||||||||
|
|
||||||||
| #ifndef wasm_ir_stack_h | ||||||||
|
|
@@ -121,7 +73,7 @@ struct StackSignature { | |||||||
|
|
||||||||
| StackSignature() | ||||||||
| : params(Type::none), results(Type::none), unreachable(false) {} | ||||||||
| StackSignature(Type params, Type results, bool unreachable = false) | ||||||||
| StackSignature(Type params, Type results, bool unreachable) | ||||||||
| : params(params), results(results), unreachable(unreachable) {} | ||||||||
|
|
||||||||
| StackSignature(const StackSignature&) = default; | ||||||||
|
|
@@ -146,10 +98,6 @@ struct StackSignature { | |||||||
| // Return `true` iff `next` composes after this stack signature. | ||||||||
| bool composes(const StackSignature& next) const; | ||||||||
|
|
||||||||
| // Whether a block whose contents have this stack signature could be typed | ||||||||
| // with `sig`. | ||||||||
| bool satisfies(Signature sig) const; | ||||||||
|
|
||||||||
| // Compose stack signatures. Assumes they actually compose. | ||||||||
| StackSignature& operator+=(const StackSignature& next); | ||||||||
| StackSignature operator+(const StackSignature& next) const; | ||||||||
|
|
@@ -158,6 +106,72 @@ struct StackSignature { | |||||||
| return params == other.params && results == other.results && | ||||||||
| unreachable == other.unreachable; | ||||||||
| } | ||||||||
|
|
||||||||
| // Whether a block whose contents have stack signature `a` could be typed with | ||||||||
| // stack signature `b`, i.e. whether it could be used in a context that | ||||||||
| // expects signature `b`. Formally, where `a` is the LHS and `b` the RHS: | ||||||||
| // | ||||||||
kripken marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
| // [t1*] -> [t2*] <: [s1* t1'*] -> [s2* t2'*] iff | ||||||||
| // | ||||||||
| // - t1' <: t1 | ||||||||
| // - t2 <: t2' | ||||||||
| // - s1 <: s2 | ||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I apologize, my confusion is almost certainly a matter of language. But I still don't get it:
Your statement here uses "expected" in two places. First you say
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No worries, I find that I basically have to turn by brain inside out to wrap it around contravariance, so its no wonder everything looks reversed :) Consider a context that expects an instruction sequence with type The block we insert needs to consume a Our block expects an The block of type
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The analogy I found useful to understand is, basically, if When
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, thanks! So in |
||||||||
| // | ||||||||
| // Note that neither signature is unreachable in this rule and that the | ||||||||
| // cardinalities of t1* and t1'*, t2* and t2'*, and s1* and s2* must match. | ||||||||
| // | ||||||||
| // [t1*] -> [t2*] {u} <: [s1* t1'*] -> [s2* t2'*] {u?} iff | ||||||||
| // | ||||||||
| // - [t1*] -> [t2*] <: [t1'*] -> [t2'*] | ||||||||
| // | ||||||||
| // Note that s1* and s2* can have different cardinalities and arbitrary types | ||||||||
| // in this rule. | ||||||||
| // | ||||||||
| // As an example of the first rule, consider this instruction sequence: | ||||||||
| // | ||||||||
| // ref.as_func | ||||||||
| // drop | ||||||||
| // i32.add | ||||||||
| // | ||||||||
| // The most specific type you could give this sequence is [i32, i32, anyref] | ||||||||
| // -> [i32]. But it could also be used in a context that expects [i32, i32, | ||||||||
| // funcref] -> [i32] because ref.as_func can accept funcref or any other | ||||||||
| // subtype of anyref. That's where the contravariance comes from. This | ||||||||
| // instruction sequence could also be used anywhere that expects [f32, i32, | ||||||||
| // i32, anyref] -> [f32, i32] because the f32 simply stays on the stack | ||||||||
| // throughout the sequence. That's where the the prefix extension comes from. | ||||||||
| // | ||||||||
| // For the second rule, consider this sequence: | ||||||||
| // | ||||||||
| // ref.as_func | ||||||||
| // drop | ||||||||
| // i32.add | ||||||||
| // unreachable | ||||||||
| // i32.const 0 | ||||||||
| // | ||||||||
| // This instruction sequence has the specific type [i32, i32, anyref] -> [i32] | ||||||||
| // {u}. It can be used in any situation the previous block can be used in, but | ||||||||
| // can additionally be used in contexts that expect something like [f32, i32, | ||||||||
| // i32, anyref] -> [v128, i32]. Because of the unreachable polymorphism, the | ||||||||
| // additional prefixes on the params and results do not need to match. | ||||||||
| // | ||||||||
| // Note that a reachable stack signature (without a {u}) is never a subtype of | ||||||||
| // any unreachable stack signature (with a {u}). This makes sense because a | ||||||||
| // sequence of instructions that has no polymorphic unreachable behavior | ||||||||
| // cannot be given a type that says it does have polymorphic unreachable | ||||||||
| // behavior. | ||||||||
| // | ||||||||
| // Also, [] -> [] {u} is the bottom type here; it is a subtype of every other | ||||||||
| // stack signature. This corresponds to (unreachable) being able to be given | ||||||||
| // any stack signature. | ||||||||
| static bool isSubType(StackSignature a, StackSignature b); | ||||||||
|
|
||||||||
| // Returns true iff `a` and `b` have a LUB, i.e. a minimal StackSignature that | ||||||||
| // could type block contents of either type `a` or type `b`. | ||||||||
| static bool haveLeastUpperBound(StackSignature a, StackSignature b); | ||||||||
|
|
||||||||
| // Returns the LUB of `a` and `b`. Assumes that the LUB exists. | ||||||||
| static StackSignature getLeastUpperBound(StackSignature a, StackSignature b); | ||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should it be obvious what an LUB means here? The effects of unreachability in particular are not obvious enough to me without reading the implementation's source code. Maybe I am just missing an intuition for how/where an LUB would be used.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll add a comment clarifying the definition. This will be used in finalizing a block like this: In this case the signatures are
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In what situation would this arise? That is, when will we have such a block but not already know its type?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Part of validation involves refinalizing a node then checking that it's actual type matches its refinalized type. So even though we already know the type of the node, we have to recalculate it to make sure we are correct about what the type is.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, right, we don't use the existing type there. I think we might be able to, though, if it would make all this simpler?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some basic question about the Binaryen block validation rules that are not necessarily related to the Poppy IR or this PR:
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
AFAICT, The validator has no choice to recalculate the type of each expression because it needs to be able to detect expressions with incorrect types. I'm not sure how that could be changed without making the validator insensitive to that kind of typing problem.
Good question! When Binaryen IR is emitted, binaryen/src/wasm/wasm-stack.cpp Lines 25 to 27 in cf51ca4
That means that only blocks that could have type
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry I'm not sure if I understand. I was mostly asking about the current AST's validation rule (not Poppy IR), in
Where is your old local branch? Have we seen that type inference pass or have you uploaded it as a PR?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider what would happen right now if this Binaryen IR block were given type If this block were unreachable, the current code in
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||
| }; | ||||||||
|
|
||||||||
| // Calculates stack machine data flow, associating the sources and destinations | ||||||||
|
|
||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we still need to check this even if
ais unreachable? I asked the same question in your formal definition of the rules in the comments.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nope, in that case we would do an early return before this final check: https://github.com/WebAssembly/binaryen/pull/3543/files#diff-bf06899150c7e36bf606885aeb35046d00448bc3db1413a2a02ce5c0a093900cR151-R155