flexflow · lockshaw · Jun 19, 2024 · Jun 4, 2024 · Jun 4, 2024 · Jun 10, 2024
diff --git a/.proj.toml b/.proj.toml
@@ -11,6 +11,7 @@ build_targets = [
   # "substitutions",
   # "compiler",
   "substitution-generator",
+  "local-execution",
 ]
 test_targets = [
   "utils-tests",

diff --git a/flake.lock b/flake.lock
diff --git a/flake.nix b/flake.nix
@@ -78,6 +78,8 @@
             "-DFF_USE_EXTERNAL_TYPE_INDEX=ON"
           ];
 
+          RC_PARAMS = "max_discard_ratio=100";
+
           buildInputs = builtins.concatLists [
             (with pkgs; [
               zlib
@@ -110,7 +112,7 @@
 
         default = mkShell {
           inputsFrom = [ ci ];
-          inherit (ci) CMAKE_FLAGS;
+          inherit (ci) CMAKE_FLAGS RC_PARAMS;
 
           VIMPLUGINS = lib.strings.concatStringsSep "," [
             "${proj-repo.packages.${system}.proj-nvim}"

diff --git a/lib/compiler/test/src/test_machine_mapping.cc b/lib/compiler/test/src/test_machine_mapping.cc
@@ -3,7 +3,7 @@
 
 TEST_SUITE(FF_TEST_SUITE) {
   // TEST_CASE("MachineMapping::combine") {
-  //   rc::check([](MachineMapping const &m0, MachineMapping const &m1) {
+  //   RC_SUBCASE([](MachineMapping const &m0, MachineMapping const &m1) {
   //     RC_PRE(MachineMapping::nodes_are_disjoint(m0, m1));
 
   //     MachineMapping comb = MachineMapping::combine(m0, m1);
@@ -16,7 +16,7 @@ TEST_SUITE(FF_TEST_SUITE) {
   // }
 
   // TEST_CASE("OptimalCostResult::infinity") {
-  //   rc::check([](OptimalCostResult const &c) {
+  //   RC_SUBCASE([](OptimalCostResult const &c) {
   //     RC_ASSERT(c.runtime <= OptimalCostResult::infinity().runtime);
   //   });
   // }

diff --git a/lib/compiler/test/src/test_optimal_cost.cc b/lib/compiler/test/src/test_optimal_cost.cc
@@ -16,7 +16,7 @@ TEST_SUITE(FF_TEST_SUITE) {
   //                                        MachineSpecification const &) {
   //     return std::unordered_set<MachineView>{make_1d_machine_view(0, 1, 1)};
   //   };
-  //   rc::check([](ParallelComputationGraph const &g,
+  //   RC_SUBCASE([](ParallelComputationGraph const &g,
   //                MachineSpecification const &machine_spec) {
   //     OptimalCostCache cached_subgraph_costs;
   //     OptimalCostResult result = optimal_cost(g,

diff --git a/lib/compiler/test/src/test_unity_algorithm.cc b/lib/compiler/test/src/test_unity_algorithm.cc
@@ -6,7 +6,7 @@
 TEST_SUITE(FF_TEST_SUITE) {
   // Rapidcheck does not work for now
   // TEST_CASE("graph_optimize") {
-  //   rc::check([](ComputationGraph const &g,
+  //   RC_SUBCASE([](ComputationGraph const &g,
   //                float alpha,
   //                int budget,
   //                float threshold,

diff --git a/lib/kernels/include/kernels/legion_dim_t.dtg.h b/lib/kernels/include/kernels/legion_dim_t.dtg.h
diff --git a/lib/kernels/src/kernels/legion_dim_t.dtg.cc b/lib/kernels/src/kernels/legion_dim_t.dtg.cc
diff --git a/lib/local-execution/src/ops/attention.cc b/lib/local-execution/src/ops/attention.cc
@@ -16,6 +16,7 @@
 #include "attention.h"
 #include "kernels/attention_kernels.h"
 #include "local-execution/op_task_signature.h"
+#include "op-attrs/ops/attention/multihead_attention_parallel_inputs.h"
 
 namespace FlexFlow {
 
@@ -95,31 +96,24 @@ static DeviceSpecific<DeviceStates>
   ParallelTensorShape value_parallel_tensor_shape =
       acc.get_argument<ParallelTensorShape>(VALUE_PARALLEL_TENSOR_SHAPE);
 
-  MultiHeadAttentionInputs inputs = {
-      shard_dim_at_idx(query_parallel_tensor_shape, ff_dim_t{0}).size,
-      shard_dim_at_idx(query_parallel_tensor_shape, ff_dim_t{1}).size,
-      qProjSize,
-      kProjSize,
-      vProjSize,
-      query_parallel_tensor_shape.data_type};
-  ;
+  MultiHeadAttentionParallelInputs parsed = throw_if_unexpected(
+      parse_attention_parallel_input_shape(query_parallel_tensor_shape,
+                                           key_parallel_tensor_shape,
+                                           value_parallel_tensor_shape));
   ParallelTensorShape weight_parallel_tensor_shape =
       throw_if_unexpected(get_weights_shape(attrs,
                                             query_parallel_tensor_shape,
                                             key_parallel_tensor_shape,
                                             value_parallel_tensor_shape));
 
-  int kvSeqLength = get_kvSeqLength(inputs);
-  int qSize = get_qSize(inputs);
-  int kSize = get_kSize(inputs);
-  int vSize = get_vSize(inputs);
-
-  int qoSeqLength =
-      dim_at_idx(get_piece_shape(query_parallel_tensor_shape), ff_dim_t(1));
-  int num_samples =
-      dim_at_idx(get_piece_shape(query_parallel_tensor_shape), ff_dim_t(2));
-  int num_heads =
-      dim_at_idx(get_piece_shape(weight_parallel_tensor_shape), ff_dim_t(1));
+  int kvSeqLength = get_kvSeqLength(parsed);
+  int qSize = get_qSize(parsed);
+  int kSize = get_kSize(parsed);
+  int vSize = get_vSize(parsed);
+
+  int qoSeqLength = get_qoSeqLength(parsed);
+  int num_samples = get_num_samples(parsed);
+  int num_heads = attrs.num_heads;
 
   MHAPerDeviceState per_device_state = init_kernel(handle,
                                                    allocator,

diff --git a/lib/op-attrs/include/op-attrs/datatype.h b/lib/op-attrs/include/op-attrs/datatype.h
@@ -58,6 +58,8 @@ using DataTypeValue = std::variant<real_type<DataType::FLOAT>,
 
 size_t size_of_datatype(DataType);
 
+bool can_strictly_promote_datatype_from_to(DataType, DataType);
+
 } // namespace FlexFlow
 
 #endif
diff --git a/lib/op-attrs/include/op-attrs/ff_dim.dtg.h b/lib/op-attrs/include/op-attrs/ff_dim.dtg.h
diff --git a/lib/op-attrs/include/op-attrs/get_output_shapes.h b/lib/op-attrs/include/op-attrs/get_output_shapes.h
@@ -112,12 +112,8 @@ std::vector<TensorShape> get_output_shapes(Attrs const &attrs,
 
 ParallelTensorShape get_output_shape(MultiHeadAttentionAttrs const &,
                                      std::vector<ParallelTensorShape> const &);
-ParallelTensorShape get_output_shape(CastAttrs const &,
-                                     ParallelTensorShape const &);
 ParallelTensorShape get_output_shape(ConcatAttrs const &,
                                      std::vector<ParallelTensorShape> const &);
-ParallelTensorShape get_output_shape(Conv2DAttrs const &,
-                                     ParallelTensorShape const &);
 ParallelTensorShape get_output_shape(DropoutAttrs const &,
                                      ParallelTensorShape const &);
 ParallelTensorShape get_output_shape(FlatAttrs const &,
@@ -131,8 +127,6 @@ ParallelTensorShape get_output_shape(Pool2DAttrs const &,
                                      ParallelTensorShape const &);
 ParallelTensorShape get_output_shape(ReduceAttrs const &,
                                      ParallelTensorShape const &);
-ParallelTensorShape get_output_shape(ReplicateAttrs const &,
-                                     ParallelTensorShape const &);
 ParallelTensorShape get_output_shape(ReverseAttrs const &,
                                      ParallelTensorShape const &);
 std::vector<ParallelTensorShape> get_output_shapes(SplitAttrs const &,

diff --git a/lib/op-attrs/include/op-attrs/l1_regularizer_attrs.dtg.h b/lib/op-attrs/include/op-attrs/l1_regularizer_attrs.dtg.h
diff --git a/lib/op-attrs/include/op-attrs/l2_regularizer_attrs.dtg.h b/lib/op-attrs/include/op-attrs/l2_regularizer_attrs.dtg.h
diff --git a/lib/op-attrs/include/op-attrs/ops/attention.h b/lib/op-attrs/include/op-attrs/ops/attention.h
@@ -42,17 +42,37 @@ tl::expected<TensorShape, std::string>
                       TensorShape const &input_q,
                       TensorShape const &input_k,
                       TensorShape const &input_v);
-tl::expected<ParallelTensorShape, std::string>
-    get_weights_shape(MultiHeadAttentionAttrs const &,
-                      ParallelTensorShape const &input_q,
-                      ParallelTensorShape const &input_k,
-                      ParallelTensorShape const &input_v);
-
+tl::expected<TensorShape, std::string>
+    get_input_bias_shape(MultiHeadAttentionAttrs const &,
+                         TensorShape const &input_q,
+                         TensorShape const &input_k,
+                         TensorShape const &input_v);
+tl::expected<TensorShape, std::string>
+    get_output_bias_shape(MultiHeadAttentionAttrs const &,
+                          TensorShape const &input_q,
+                          TensorShape const &input_k,
+                          TensorShape const &input_v);
 tl::expected<TensorShape, std::string>
     get_output_shape(MultiHeadAttentionAttrs const &,
                      TensorShape const &input_q,
                      TensorShape const &input_k,
                      TensorShape const &input_v);
+
+tl::expected<ParallelTensorShape, std::string>
+    get_weights_shape(MultiHeadAttentionAttrs const &,
+                      ParallelTensorShape const &input_q,
+                      ParallelTensorShape const &input_k,
+                      ParallelTensorShape const &input_v);
+tl::expected<ParallelTensorShape, std::string>
+    get_input_bias_shape(MultiHeadAttentionAttrs const &,
+                         ParallelTensorShape const &input_q,
+                         ParallelTensorShape const &input_k,
+                         ParallelTensorShape const &input_v);
+tl::expected<ParallelTensorShape, std::string>
+    get_output_bias_shape(MultiHeadAttentionAttrs const &,
+                          ParallelTensorShape const &input_q,
+                          ParallelTensorShape const &input_k,
+                          ParallelTensorShape const &input_v);
 tl::expected<ParallelTensorShape, std::string>
     get_output_shape(MultiHeadAttentionAttrs const &,
                      ParallelTensorShape const &input_q,