Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 134 additions & 42 deletions src/ir/stack-utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,48 +83,6 @@ bool StackSignature::composes(const StackSignature& next) const {
});
}

bool StackSignature::satisfies(Signature sig) const {
if (sig.params.size() < params.size() ||
sig.results.size() < results.size()) {
// Not enough values provided or too many produced
return false;
}
bool paramSuffixMatches =
std::equal(params.begin(),
params.end(),
sig.params.end() - params.size(),
[](const Type& consumed, const Type& provided) {
return Type::isSubType(provided, consumed);
});
if (!paramSuffixMatches) {
return false;
}
bool resultSuffixMatches =
std::equal(results.begin(),
results.end(),
sig.results.end() - results.size(),
[](const Type& produced, const Type& expected) {
return Type::isSubType(produced, expected);
});
if (!resultSuffixMatches) {
return false;
}
if (unreachable) {
// The unreachable can consume any additional provided params and produce
// any additional expected results, so we are done.
return true;
}
// Any additional provided params will pass through untouched, so they must be
// equivalent to the additional produced results.
return std::equal(sig.params.begin(),
sig.params.end() - params.size(),
sig.results.begin(),
sig.results.end() - results.size(),
[](const Type& produced, const Type& expected) {
return Type::isSubType(produced, expected);
});
}

StackSignature& StackSignature::operator+=(const StackSignature& next) {
assert(composes(next));
std::vector<Type> stack(results.begin(), results.end());
Expand Down Expand Up @@ -160,6 +118,140 @@ StackSignature StackSignature::operator+(const StackSignature& next) const {
return sig;
}

bool StackSignature::isSubType(StackSignature a, StackSignature b) {
if (a.params.size() > b.params.size() ||
a.results.size() > b.results.size()) {
// `a` consumes or produces more values than `b` can provides or expects.
return false;
}
if (b.unreachable && !a.unreachable) {
// Non-polymorphic sequences cannot be typed as being polymorphic.
return false;
}
bool paramSuffixMatches =
std::equal(a.params.begin(),
a.params.end(),
b.params.end() - a.params.size(),
[](const Type& consumed, const Type& provided) {
return Type::isSubType(provided, consumed);
});
if (!paramSuffixMatches) {
return false;
}
bool resultSuffixMatches =
std::equal(a.results.begin(),
a.results.end(),
b.results.end() - a.results.size(),
[](const Type& produced, const Type& expected) {
return Type::isSubType(produced, expected);
});
if (!resultSuffixMatches) {
return false;
}
if (a.unreachable) {
// The unreachable can consume any additional provided params and produce
// any additional expected results, so we are done.
return true;
}
// Any additional provided params will pass through untouched, so they must be
// compatible with the additional produced results.
return std::equal(b.params.begin(),
b.params.end() - a.params.size(),
b.results.begin(),
b.results.end() - a.results.size(),
[](const Type& provided, const Type& expected) {
return Type::isSubType(provided, expected);
});
Comment on lines +156 to +164
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we still need to check this even if a is unreachable? I asked the same question in your formal definition of the rules in the comments.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}

bool StackSignature::haveLeastUpperBound(StackSignature a, StackSignature b) {
// If a signature is unreachable, the LUB could extend its params and results
// arbitrarily. Otherwise, the LUB must extend its params and results
// uniformly so that each additional param is a subtype of the corresponding
// additional result.
auto extensionCompatible = [](auto self, auto other) -> bool {
if (self.unreachable) {
return true;
}
// If no extension, then no problem.
if (self.params.size() >= other.params.size() &&
self.results.size() >= other.results.size()) {
return true;
}
auto extSize = other.params.size() - self.params.size();
if (extSize != other.results.size() - self.results.size()) {
return false;
}
return std::equal(other.params.begin(),
other.params.begin() + extSize,
other.results.begin(),
other.results.begin() + extSize,
[](const Type& param, const Type& result) {
return Type::isSubType(param, result);
});
};
if (!extensionCompatible(a, b) || !extensionCompatible(b, a)) {
return false;
}

auto valsCompatible = [](auto as, auto bs, auto compatible) -> bool {
// Canonicalize so the as are shorter and any unshared prefix is on bs.
if (bs.size() < as.size()) {
std::swap(as, bs);
}
// Check that shared values after the unshared prefix have are compatible.
size_t diff = bs.size() - as.size();
for (size_t i = 0, shared = as.size(); i < shared; ++i) {
if (!compatible(as[i], bs[i + diff])) {
return false;
}
}
return true;
};

bool paramsOk = valsCompatible(a.params, b.params, [](Type a, Type b) {
assert(a == b && "TODO: calculate greatest lower bound to handle "
"contravariance correctly");
return true;
});
bool resultsOk = valsCompatible(a.results, b.results, [](Type a, Type b) {
return Type::getLeastUpperBound(a, b) != Type::none;
});
return paramsOk && resultsOk;
}

StackSignature StackSignature::getLeastUpperBound(StackSignature a,
StackSignature b) {
assert(haveLeastUpperBound(a, b));

auto combineVals = [](auto as, auto bs, auto combine) -> std::vector<Type> {
// Canonicalize so the as are shorter and any unshared prefix is on bs.
if (bs.size() < as.size()) {
std::swap(as, bs);
}
// Combine shared values after the unshared prefix.
size_t diff = bs.size() - as.size();
std::vector<Type> vals(bs.begin(), bs.begin() + diff);
for (size_t i = 0, shared = as.size(); i < shared; ++i) {
vals.push_back(combine(as[i], bs[i + diff]));
}
return vals;
};

auto params = combineVals(a.params, b.params, [&](Type a, Type b) {
assert(a == b && "TODO: calculate greatest lower bound to handle "
"contravariance correctly");
return a;
});

auto results = combineVals(a.results, b.results, [&](Type a, Type b) {
return Type::getLeastUpperBound(a, b);
});

return StackSignature{
Type(params), Type(results), a.unreachable && b.unreachable};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the LUB unreachable when both of the signatures are unreachable?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Being unreachable means that more elements can be appended to the params or prepended to the results as necessary. If two stack signatures have this property, then their LUB would have this property as well. If either signature was not unreachable and could not be extended, then the LUB should not be able to be extended either. This would be important for calculating the LUB of more than two unreachable stack signatures so that the number of params and results is not "frozen" by whatever the first two signatures to be combined are.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still not sure if I understand the concept of LUB here. In the usual typing sense LUB of two types is a supertype of the two but the lowest one in the type hierarchy. First I'm not sure how unreachability is considered in this type hierarchy, I could be wrong, but I feel, for the two signatures with the same type input and output [a]->[b], if one of them is unreachable, that means that type has more flexibility and it should be considered as a supertype of the other.

But here it is possible that we have a LUB of two types that is not unreachable but one of those two child types can be unreachable. I'm not sure if I explained my confusion very well, but I'd appreciate a bit more explanation, maybe with an example.

Copy link
Member Author

@tlively tlively Feb 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's the subtype relation on stack signatures, which answers the question "can an instruction sequence with the LHS stack signature be typed with the RHS stack signature?"

[t1*] -> [t2*] <: [s* t1'*] -> [s* t2'*] iff

  • t1'_i <: t1_i
  • t2_i <: t2'_i

[t1*] -> [t2*] {u} <: [s1* t1'*] -> [s2* t2'*] {u?} iff

  • [t1*] -> [t2*] <: [t1'*] -> [t2'*]

As an example of the first rule, consider the instruction sequence

ref.as_func
drop
i32.add

The most specific type you could give this sequence is [i32, i32, anyref] -> [i32]. But you could also use it in a context that expects [i32, i32, funcref] -> [i32] because ref.as_func can accept funcref or any other subtype of anyref. That's where the contravariance comes from. You could also use this instruction sequence anywhere that expects [f32, i32, i32, anyref] -> [f32, anyref] because the f32 simply stays on the stack throughout the sequence. That's where the the prefix extension comes from.

For the second rule, consider this sequence:

ref.as_func
drop
i32.add
unreachable
i32.const 0

This instruction sequence has the specific type [i32, i32, anyref] -> [i32] {u}. It can be used in any situation the previous block can be used in, but can additionally be used in contexts that expect something like [f32, i32, i32, anyref] -> [v128, i32]. Because of the unreachable polymorphism, the additional prefixes on the params and results do not need to match.

Note that a reachable stack signature (without a {u}) is never a subtype of any unreachable stack signature (with a {u}). This makes sense because a sequence of instructions that has no polymorphic unreachable behavior cannot be given a type that says it does have polymorphic unreachable behavior.

Also, [] -> [] {u} is the bottom type here; it is a subtype of every other stack signature. This corresponds to (unreachable) being able to be given any stack signature.

Having written this out, I realize the implementation does not correctly handle the contravariance of the params. I will fix that, although it won't matter until we support block inputs. I also screwed up the parameter extension, which should be a prefix, not a suffix.

}

StackFlow::StackFlow(Block* block) {
// Encapsulates the logic for treating the block and its children
// uniformly. The end of the block is treated as if it consumed values
Expand Down
122 changes: 68 additions & 54 deletions src/ir/stack-utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,55 +16,7 @@

//
// stack-utils.h: Utilities for manipulating and analyzing stack machine code in
// the form of Poppy IR.
//
// Poppy IR represents stack machine code using normal Binaryen IR types by
// imposing the following constraints:
//
// 1. Function bodies and children of control flow (except If conditions) must
// be blocks.
//
// 2. Blocks may have any Expressions except for Pops as children. The sequence
// of instructions in a block follows the same validation rules as in
// WebAssembly. That means that any expression may have a concrete type, not
// just the final expression in the block.
//
// 3. All other children must be Pops, which are used to determine the input
// stack type of each instruction. Pops may not have `unreachable` type.
//
// 4. Only control flow structures and instructions that have polymorphic
// unreachable behavior in WebAssembly may have unreachable type. Blocks may
// be unreachable when they are not branch targets and when they have an
// unreachable child. Note that this means a block may be unreachable even
// if it would otherwise have a concrete type, unlike in Binaryen IR.
//
// For example, the following Binaryen IR Function:
//
// (func $foo (result i32)
// (i32.add
// (i32.const 42)
// (i32.const 5)
// )
// )
//
// would look like this in Poppy IR:
//
// (func $foo (result i32)
// (block
// (i32.const 42)
// (i32.const 5)
// (i32.add
// (i32.pop)
// (i32.pop)
// )
// )
// )
//
// Notice that the sequence of instructions in the block is now identical to the
// sequence of instructions in raw WebAssembly. Also note that Poppy IR's
// validation rules are largely additional on top of the normal Binaryen IR
// validation rules, with the only exceptions being block body validation and
// block unreahchability rules.
// the form of Poppy IR. See src/passes/Poppify.cpp for Poppy IR documentation.
//

#ifndef wasm_ir_stack_h
Expand Down Expand Up @@ -121,7 +73,7 @@ struct StackSignature {

StackSignature()
: params(Type::none), results(Type::none), unreachable(false) {}
StackSignature(Type params, Type results, bool unreachable = false)
StackSignature(Type params, Type results, bool unreachable)
: params(params), results(results), unreachable(unreachable) {}

StackSignature(const StackSignature&) = default;
Expand All @@ -146,10 +98,6 @@ struct StackSignature {
// Return `true` iff `next` composes after this stack signature.
bool composes(const StackSignature& next) const;

// Whether a block whose contents have this stack signature could be typed
// with `sig`.
bool satisfies(Signature sig) const;

// Compose stack signatures. Assumes they actually compose.
StackSignature& operator+=(const StackSignature& next);
StackSignature operator+(const StackSignature& next) const;
Expand All @@ -158,6 +106,72 @@ struct StackSignature {
return params == other.params && results == other.results &&
unreachable == other.unreachable;
}

// Whether a block whose contents have stack signature `a` could be typed with
// stack signature `b`, i.e. whether it could be used in a context that
// expects signature `b`. Formally, where `a` is the LHS and `b` the RHS:
//
// [t1*] -> [t2*] <: [s1* t1'*] -> [s2* t2'*] iff
//
// - t1' <: t1
// - t2 <: t2'
// - s1 <: s2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the ' in the LHS in the first of the three clauses? That seems to allow [anyref] -> [] <: [funcref] -> [] but the LHS cannot be used in a context that expects the RHS?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[anyref] -> [] can be used anywhere [funcref] -> [] is expected because funcref can be consumed by anything that expects an anyref. This is contravariance explained in one of the examples below.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I apologize, my confusion is almost certainly a matter of language. But I still don't get it:

[anyref] -> [] can be used anywhere [funcref] -> [] is expected because funcref can be consumed by anything that expects an anyref.

Your statement here uses "expected" in two places. First you say [funcref] -> [] is expected, and then you say expects an anyref. To me it seems like you're using the term "expected" in two different (reversed) ways, as they are about the exact opposite things, or that you have a typo - ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No worries, I find that I basically have to turn by brain inside out to wrap it around contravariance, so its no wonder everything looks reversed :)

Consider a context that expects an instruction sequence with type [funcref] -> []. It looks like this:

<prior context>

;; [t* funcref] on stack

[insert our block here]

;; [t*] on stack

<later context>

The block we insert needs to consume a funcref and produce no values on the stack. What if we actually plug in a block with type [anyref] -> []?

<prior context>

;; [t* funcref] on stack

[consume `anyref`, produce nothing]

;; [t*] on stack

<later context>

Our block expects an anyref, but it was provided a funcref by the context. This is fine, though, because funcref can be used by anything that expects an anyref.

The block of type [anyref] -> [] is valid to use in a context that expects a block of type [funcref] -> [], so the former type is a subtype of the latter type.

Copy link
Member

@aheejin aheejin Feb 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The analogy I found useful to understand is, basically, if A <: B, A can act like it is of type B, e.g., Cat can claim it is an Animal and be used it in place anywhere an Animal is expected.

When A = [anyref] -> [funcref] and B = [funcref] -> [anyref], even though A takes anyref, it can act like it takes funcref, because it can process all funcref arguments with no problem. (It is called contravariance in parameter types)
Also it can act like it returns anyref instead of funcref, because all values it returns will fit the condition of anyref. (It is called covariance in result types)
So here A is a subtype of B.

Copy link
Member

@kripken kripken Feb 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, thanks! So in [x] -> [y] ? [a] -> [b] the relation of x, a or y, b is reversed depending on if the values are consumed or produced... which does in fact make total sense. And then so does the rest.

//
// Note that neither signature is unreachable in this rule and that the
// cardinalities of t1* and t1'*, t2* and t2'*, and s1* and s2* must match.
//
// [t1*] -> [t2*] {u} <: [s1* t1'*] -> [s2* t2'*] {u?} iff
//
// - [t1*] -> [t2*] <: [t1'*] -> [t2'*]
//
// Note that s1* and s2* can have different cardinalities and arbitrary types
// in this rule.
//
// As an example of the first rule, consider this instruction sequence:
//
// ref.as_func
// drop
// i32.add
//
// The most specific type you could give this sequence is [i32, i32, anyref]
// -> [i32]. But it could also be used in a context that expects [i32, i32,
// funcref] -> [i32] because ref.as_func can accept funcref or any other
// subtype of anyref. That's where the contravariance comes from. This
// instruction sequence could also be used anywhere that expects [f32, i32,
// i32, anyref] -> [f32, i32] because the f32 simply stays on the stack
// throughout the sequence. That's where the the prefix extension comes from.
//
// For the second rule, consider this sequence:
//
// ref.as_func
// drop
// i32.add
// unreachable
// i32.const 0
//
// This instruction sequence has the specific type [i32, i32, anyref] -> [i32]
// {u}. It can be used in any situation the previous block can be used in, but
// can additionally be used in contexts that expect something like [f32, i32,
// i32, anyref] -> [v128, i32]. Because of the unreachable polymorphism, the
// additional prefixes on the params and results do not need to match.
//
// Note that a reachable stack signature (without a {u}) is never a subtype of
// any unreachable stack signature (with a {u}). This makes sense because a
// sequence of instructions that has no polymorphic unreachable behavior
// cannot be given a type that says it does have polymorphic unreachable
// behavior.
//
// Also, [] -> [] {u} is the bottom type here; it is a subtype of every other
// stack signature. This corresponds to (unreachable) being able to be given
// any stack signature.
static bool isSubType(StackSignature a, StackSignature b);

// Returns true iff `a` and `b` have a LUB, i.e. a minimal StackSignature that
// could type block contents of either type `a` or type `b`.
static bool haveLeastUpperBound(StackSignature a, StackSignature b);

// Returns the LUB of `a` and `b`. Assumes that the LUB exists.
static StackSignature getLeastUpperBound(StackSignature a, StackSignature b);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it be obvious what an LUB means here? The effects of unreachability in particular are not obvious enough to me without reading the implementation's source code. Maybe I am just missing an intuition for how/where an LUB would be used.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll add a comment clarifying the definition. This will be used in finalizing a block like this:

(block
  (i32.const 0
  (ref.null extern)
  (br 0
    (pop i32 extern)
  )
  (ref.null func)
)

In this case the signatures are []->[i32, externref] and []->[funcref]{unreachable} and the LUB would be []->[i32, anyref].

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In what situation would this arise? That is, when will we have such a block but not already know its type?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Part of validation involves refinalizing a node then checking that it's actual type matches its refinalized type. So even though we already know the type of the node, we have to recalculate it to make sure we are correct about what the type is.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, right, we don't use the existing type there.

I think we might be able to, though, if it would make all this simpler?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some basic question about the Binaryen block validation rules that are not necessarily related to the Poppy IR or this PR:
In your example, even if the br is an unconditional branch, can you remind me why we still consider the type of the last element in the block?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, right, we don't use the existing type there.

I think we might be able to, though, if it would make all this simpler?

AFAICT, The validator has no choice to recalculate the type of each expression because it needs to be able to detect expressions with incorrect types. I'm not sure how that could be changed without making the validator insensitive to that kind of typing problem.

Some basic question about the Binaryen block validation rules that are not necessarily related to the Poppy IR or this PR:
In your example, even if the br is an unconditional branch, can you remind me why we still consider the type of the last element in the block?

Good question! When Binaryen IR is emitted, unreachable blocks are unconditionally given block signature []->[].

void BinaryInstWriter::emitResultType(Type type) {
if (type == Type::unreachable) {
parent.writeType(Type::none);

That means that only blocks that could have type none can be candidates to be unreachable. In contrast, my old local branch has Poppy IR go through a type inference pass that assigns concrete types to all unreachable blocks before it is emitted, so before that pass it is ok for blocks of any type to be marked unreachable. We could loosen the restrictions on Binaryen IR by running it through a similar type inference pass before emitting it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I'm not sure if I understand. I was mostly asking about the current AST's validation rule (not Poppy IR), in Block::finalize. If a br is in the middle of a block, there shouldn't be a chance that we fall through at the end of the block, but should we still consider the type of the last element in computing LUB?

In contrast, my old local branch has Poppy IR go through a type inference pass that assigns concrete types to all unreachable blocks before it is emitted, so before that pass it is ok for blocks of any type to be marked unreachable. We could loosen the restrictions on Binaryen IR by running it through a similar type inference pass before emitting it.

Where is your old local branch? Have we seen that type inference pass or have you uploaded it as a PR?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider what would happen right now if this Binaryen IR block were given type unreachable:

(block
  (unreachable)
  (i32.const 0)
)

If this block were unreachable, the current code in BinaryInstWriter::emitResultType would emit it into a binary with signature []->[]. But that would be an invalid Wasm binary because the block returns an i32. Binaryen avoids emitting invalid blocks like this by forcing the blocks to have concrete types in Binaryen IR so that they will be emitted with the correct type rather than with Type::none.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

};

// Calculates stack machine data flow, associating the sources and destinations
Expand Down
8 changes: 5 additions & 3 deletions src/wasm/wasm-validator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,9 +633,11 @@ void FunctionValidator::validatePoppyBlockElements(Block* curr) {
curr,
"unreachable block should have unreachable element");
} else {
if (!shouldBeTrue(blockSig.satisfies(Signature(Type::none, curr->type)),
curr,
"block contents should satisfy block type") &&
if (!shouldBeTrue(
StackSignature::isSubType(
blockSig, StackSignature(Type::none, curr->type, false)),
curr,
"block contents should satisfy block type") &&
!info.quiet) {
getStream() << "contents: " << blockSig.results
<< (blockSig.unreachable ? " [unreachable]" : "") << "\n"
Expand Down
Loading