WebAssembly · tlively · Feb 11, 2021 · Feb 4, 2021 · Feb 4, 2021 · Feb 4, 2021
diff --git a/src/ir/stack-utils.cpp b/src/ir/stack-utils.cpp
@@ -83,48 +83,6 @@ bool StackSignature::composes(const StackSignature& next) const {
                     });
 }
 
-bool StackSignature::satisfies(Signature sig) const {
-  if (sig.params.size() < params.size() ||
-      sig.results.size() < results.size()) {
-    // Not enough values provided or too many produced
-    return false;
-  }
-  bool paramSuffixMatches =
-    std::equal(params.begin(),
-               params.end(),
-               sig.params.end() - params.size(),
-               [](const Type& consumed, const Type& provided) {
-                 return Type::isSubType(provided, consumed);
-               });
-  if (!paramSuffixMatches) {
-    return false;
-  }
-  bool resultSuffixMatches =
-    std::equal(results.begin(),
-               results.end(),
-               sig.results.end() - results.size(),
-               [](const Type& produced, const Type& expected) {
-                 return Type::isSubType(produced, expected);
-               });
-  if (!resultSuffixMatches) {
-    return false;
-  }
-  if (unreachable) {
-    // The unreachable can consume any additional provided params and produce
-    // any additional expected results, so we are done.
-    return true;
-  }
-  // Any additional provided params will pass through untouched, so they must be
-  // equivalent to the additional produced results.
-  return std::equal(sig.params.begin(),
-                    sig.params.end() - params.size(),
-                    sig.results.begin(),
-                    sig.results.end() - results.size(),
-                    [](const Type& produced, const Type& expected) {
-                      return Type::isSubType(produced, expected);
-                    });
-}
-
 StackSignature& StackSignature::operator+=(const StackSignature& next) {
   assert(composes(next));
   std::vector<Type> stack(results.begin(), results.end());
@@ -160,6 +118,140 @@ StackSignature StackSignature::operator+(const StackSignature& next) const {
   return sig;
 }
 
+bool StackSignature::isSubType(StackSignature a, StackSignature b) {
+  if (a.params.size() > b.params.size() ||
+      a.results.size() > b.results.size()) {
+    // `a` consumes or produces more values than `b` can provides or expects.
+    return false;
+  }
+  if (b.unreachable && !a.unreachable) {
+    // Non-polymorphic sequences cannot be typed as being polymorphic.
+    return false;
+  }
+  bool paramSuffixMatches =
+    std::equal(a.params.begin(),
+               a.params.end(),
+               b.params.end() - a.params.size(),
+               [](const Type& consumed, const Type& provided) {
+                 return Type::isSubType(provided, consumed);
+               });
+  if (!paramSuffixMatches) {
+    return false;
+  }
+  bool resultSuffixMatches =
+    std::equal(a.results.begin(),
+               a.results.end(),
+               b.results.end() - a.results.size(),
+               [](const Type& produced, const Type& expected) {
+                 return Type::isSubType(produced, expected);
+               });
+  if (!resultSuffixMatches) {
+    return false;
+  }
+  if (a.unreachable) {
+    // The unreachable can consume any additional provided params and produce
+    // any additional expected results, so we are done.
+    return true;
+  }
+  // Any additional provided params will pass through untouched, so they must be
+  // compatible with the additional produced results.
+  return std::equal(b.params.begin(),
+                    b.params.end() - a.params.size(),
+                    b.results.begin(),
+                    b.results.end() - a.results.size(),
+                    [](const Type& provided, const Type& expected) {
+                      return Type::isSubType(provided, expected);
+                    });
+}
+
+bool StackSignature::haveLeastUpperBound(StackSignature a, StackSignature b) {
+  // If a signature is unreachable, the LUB could extend its params and results
+  // arbitrarily. Otherwise, the LUB must extend its params and results
+  // uniformly so that each additional param is a subtype of the corresponding
+  // additional result.
+  auto extensionCompatible = [](auto self, auto other) -> bool {
+    if (self.unreachable) {
+      return true;
+    }
+    // If no extension, then no problem.
+    if (self.params.size() >= other.params.size() &&
+        self.results.size() >= other.results.size()) {
+      return true;
+    }
+    auto extSize = other.params.size() - self.params.size();
+    if (extSize != other.results.size() - self.results.size()) {
+      return false;
+    }
+    return std::equal(other.params.begin(),
+                      other.params.begin() + extSize,
+                      other.results.begin(),
+                      other.results.begin() + extSize,
+                      [](const Type& param, const Type& result) {
+                        return Type::isSubType(param, result);
+                      });
+  };
+  if (!extensionCompatible(a, b) || !extensionCompatible(b, a)) {
+    return false;
+  }
+
+  auto valsCompatible = [](auto as, auto bs, auto compatible) -> bool {
+    // Canonicalize so the as are shorter and any unshared prefix is on bs.
+    if (bs.size() < as.size()) {
+      std::swap(as, bs);
+    }
+    // Check that shared values after the unshared prefix have are compatible.
+    size_t diff = bs.size() - as.size();
+    for (size_t i = 0, shared = as.size(); i < shared; ++i) {
+      if (!compatible(as[i], bs[i + diff])) {
+        return false;
+      }
+    }
+    return true;
+  };
+
+  bool paramsOk = valsCompatible(a.params, b.params, [](Type a, Type b) {
+    assert(a == b && "TODO: calculate greatest lower bound to handle "
+                     "contravariance correctly");
+    return true;
+  });
+  bool resultsOk = valsCompatible(a.results, b.results, [](Type a, Type b) {
+    return Type::getLeastUpperBound(a, b) != Type::none;
+  });
+  return paramsOk && resultsOk;
+}
+
+StackSignature StackSignature::getLeastUpperBound(StackSignature a,
+                                                  StackSignature b) {
+  assert(haveLeastUpperBound(a, b));
+
+  auto combineVals = [](auto as, auto bs, auto combine) -> std::vector<Type> {
+    // Canonicalize so the as are shorter and any unshared prefix is on bs.
+    if (bs.size() < as.size()) {
+      std::swap(as, bs);
+    }
+    // Combine shared values after the unshared prefix.
+    size_t diff = bs.size() - as.size();
+    std::vector<Type> vals(bs.begin(), bs.begin() + diff);
+    for (size_t i = 0, shared = as.size(); i < shared; ++i) {
+      vals.push_back(combine(as[i], bs[i + diff]));
+    }
+    return vals;
+  };
+
+  auto params = combineVals(a.params, b.params, [&](Type a, Type b) {
+    assert(a == b && "TODO: calculate greatest lower bound to handle "
+                     "contravariance correctly");
+    return a;
+  });
+
+  auto results = combineVals(a.results, b.results, [&](Type a, Type b) {
+    return Type::getLeastUpperBound(a, b);
+  });
+
+  return StackSignature{
+    Type(params), Type(results), a.unreachable && b.unreachable};
+}
+
 StackFlow::StackFlow(Block* block) {
   // Encapsulates the logic for treating the block and its children
   // uniformly. The end of the block is treated as if it consumed values

diff --git a/src/ir/stack-utils.h b/src/ir/stack-utils.h
@@ -16,55 +16,7 @@
 
 //
 // stack-utils.h: Utilities for manipulating and analyzing stack machine code in
-// the form of Poppy IR.
-//
-// Poppy IR represents stack machine code using normal Binaryen IR types by
-// imposing the following constraints:
-//
-//  1. Function bodies and children of control flow (except If conditions) must
-//     be blocks.
-//
-//  2. Blocks may have any Expressions except for Pops as children. The sequence
-//     of instructions in a block follows the same validation rules as in
-//     WebAssembly. That means that any expression may have a concrete type, not
-//     just the final expression in the block.
-//
-//  3. All other children must be Pops, which are used to determine the input
-//     stack type of each instruction. Pops may not have `unreachable` type.
-//
-//  4. Only control flow structures and instructions that have polymorphic
-//     unreachable behavior in WebAssembly may have unreachable type. Blocks may
-//     be unreachable when they are not branch targets and when they have an
-//     unreachable child. Note that this means a block may be unreachable even
-//     if it would otherwise have a concrete type, unlike in Binaryen IR.
-//
-// For example, the following Binaryen IR Function:
-//
-//   (func $foo (result i32)
-//    (i32.add
-//     (i32.const 42)
-//     (i32.const 5)
-//    )
-//   )
-//
-// would look like this in Poppy IR:
-//
-//   (func $foo (result i32)
-//    (block
-//     (i32.const 42)
-//     (i32.const 5)
-//     (i32.add
-//      (i32.pop)
-//      (i32.pop)
-//     )
-//    )
-//   )
-//
-// Notice that the sequence of instructions in the block is now identical to the
-// sequence of instructions in raw WebAssembly. Also note that Poppy IR's
-// validation rules are largely additional on top of the normal Binaryen IR
-// validation rules, with the only exceptions being block body validation and
-// block unreahchability rules.
+// the form of Poppy IR. See src/passes/Poppify.cpp for Poppy IR documentation.
 //
 
 #ifndef wasm_ir_stack_h
@@ -121,7 +73,7 @@ struct StackSignature {
 
   StackSignature()
     : params(Type::none), results(Type::none), unreachable(false) {}
-  StackSignature(Type params, Type results, bool unreachable = false)
+  StackSignature(Type params, Type results, bool unreachable)
     : params(params), results(results), unreachable(unreachable) {}
 
   StackSignature(const StackSignature&) = default;
@@ -146,10 +98,6 @@ struct StackSignature {
   // Return `true` iff `next` composes after this stack signature.
   bool composes(const StackSignature& next) const;
 
-  // Whether a block whose contents have this stack signature could be typed
-  // with `sig`.
-  bool satisfies(Signature sig) const;
-
   // Compose stack signatures. Assumes they actually compose.
   StackSignature& operator+=(const StackSignature& next);
   StackSignature operator+(const StackSignature& next) const;
@@ -158,6 +106,72 @@ struct StackSignature {
     return params == other.params && results == other.results &&
            unreachable == other.unreachable;
   }
+
+  // Whether a block whose contents have stack signature `a` could be typed with
+  // stack signature `b`, i.e. whether it could be used in a context that
+  // expects signature `b`. Formally, where `a` is the LHS and `b` the RHS:
+  //
+  // [t1*] -> [t2*] <: [s1* t1'*] -> [s2* t2'*] iff
+  //
+  //  - t1' <: t1
+  //  - t2 <: t2'
+  //  - s1 <: s2
+  //
+  //  Note that neither signature is unreachable in this rule and that the
+  //  cardinalities of t1* and t1'*, t2* and t2'*, and s1* and s2* must match.
+  //
+  // [t1*] -> [t2*] {u} <: [s1* t1'*] -> [s2* t2'*] {u?} iff
+  //
+  //  - [t1*] -> [t2*] <: [t1'*] -> [t2'*]
+  //
+  //  Note that s1* and s2* can have different cardinalities and arbitrary types
+  //  in this rule.
+  //
+  // As an example of the first rule, consider this instruction sequence:
+  //
+  //   ref.as_func
+  //   drop
+  //   i32.add
+  //
+  // The most specific type you could give this sequence is [i32, i32, anyref]
+  // -> [i32]. But it could also be used in a context that expects [i32, i32,
+  // funcref] -> [i32] because ref.as_func can accept funcref or any other
+  // subtype of anyref. That's where the contravariance comes from. This
+  // instruction sequence could also be used anywhere that expects [f32, i32,
+  // i32, anyref] -> [f32, i32] because the f32 simply stays on the stack
+  // throughout the sequence. That's where the the prefix extension comes from.
+  //
+  // For the second rule, consider this sequence:
+  //
+  //   ref.as_func
+  //   drop
+  //   i32.add
+  //   unreachable
+  //   i32.const 0
+  //
+  // This instruction sequence has the specific type [i32, i32, anyref] -> [i32]
+  // {u}. It can be used in any situation the previous block can be used in, but
+  // can additionally be used in contexts that expect something like [f32, i32,
+  // i32, anyref] -> [v128, i32]. Because of the unreachable polymorphism, the
+  // additional prefixes on the params and results do not need to match.
+  //
+  // Note that a reachable stack signature (without a {u}) is never a subtype of
+  // any unreachable stack signature (with a {u}). This makes sense because a
+  // sequence of instructions that has no polymorphic unreachable behavior
+  // cannot be given a type that says it does have polymorphic unreachable
+  // behavior.
+  //
+  // Also, [] -> [] {u} is the bottom type here; it is a subtype of every other
+  // stack signature. This corresponds to (unreachable) being able to be given
+  // any stack signature.
+  static bool isSubType(StackSignature a, StackSignature b);
+
+  // Returns true iff `a` and `b` have a LUB, i.e. a minimal StackSignature that
+  // could type block contents of either type `a` or type `b`.
+  static bool haveLeastUpperBound(StackSignature a, StackSignature b);
+
+  // Returns the LUB of `a` and `b`. Assumes that the LUB exists.
+  static StackSignature getLeastUpperBound(StackSignature a, StackSignature b);
 void BinaryInstWriter::emitResultType(Type type) { 
   if (type == Type::unreachable) { 
     parent.writeType(Type::none); 
 void BinaryInstWriter::emitResultType(Type type) { 
   if (type == Type::unreachable) { 
     parent.writeType(Type::none); 
 };
 
 // Calculates stack machine data flow, associating the sources and destinations

diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp
@@ -633,9 +633,11 @@ void FunctionValidator::validatePoppyBlockElements(Block* curr) {
                  curr,
                  "unreachable block should have unreachable element");
   } else {
-    if (!shouldBeTrue(blockSig.satisfies(Signature(Type::none, curr->type)),
-                      curr,
-                      "block contents should satisfy block type") &&
+    if (!shouldBeTrue(
+          StackSignature::isSubType(
+            blockSig, StackSignature(Type::none, curr->type, false)),
+          curr,
+          "block contents should satisfy block type") &&
         !info.quiet) {
       getStream() << "contents: " << blockSig.results
                   << (blockSig.unreachable ? " [unreachable]" : "") << "\n"