diff --git a/.proj.toml b/.proj.toml
index b076671498..01ae36eddd 100644
--- a/.proj.toml
+++ b/.proj.toml
@@ -11,6 +11,7 @@ build_targets = [
   # "substitutions",
   # "compiler",
   "substitution-generator",
+  "local-execution",
 ]
 test_targets = [
   "utils-tests",
diff --git a/flake.lock b/flake.lock
index f0fc292a5e..dde0c989c3 100644
--- a/flake.lock
+++ b/flake.lock
@@ -43,11 +43,11 @@
         ]
       },
       "locked": {
-        "lastModified": 1717449667,
-        "narHash": "sha256-xFGnB44WadxlCa2LnlH82g1c89+7UAomVgytIewSwO0=",
+        "lastModified": 1717990636,
+        "narHash": "sha256-wqIc2qAkRfVp2d+NAVIYPKMx7YYpu8iBGHHT1U5sxhE=",
         "owner": "lockshaw",
         "repo": "proj",
-        "rev": "28b37a9bd993d3de3d80695eb3834a0436c805a4",
+        "rev": "f7e20a9c232dda1b945a775d91e1ed4f525b5f51",
         "type": "github"
       },
       "original": {
diff --git a/flake.nix b/flake.nix
index 2dc005b113..1c54b4f025 100644
--- a/flake.nix
+++ b/flake.nix
@@ -78,6 +78,8 @@
             "-DFF_USE_EXTERNAL_TYPE_INDEX=ON"
           ];
 
+          RC_PARAMS = "max_discard_ratio=100";
+
           buildInputs = builtins.concatLists [
             (with pkgs; [
               zlib
@@ -110,7 +112,7 @@
 
         default = mkShell {
           inputsFrom = [ ci ];
-          inherit (ci) CMAKE_FLAGS;
+          inherit (ci) CMAKE_FLAGS RC_PARAMS;
 
           VIMPLUGINS = lib.strings.concatStringsSep "," [
             "${proj-repo.packages.${system}.proj-nvim}"
diff --git a/lib/compiler/test/src/test_machine_mapping.cc b/lib/compiler/test/src/test_machine_mapping.cc
index 365ed3e1db..4f9b879574 100644
--- a/lib/compiler/test/src/test_machine_mapping.cc
+++ b/lib/compiler/test/src/test_machine_mapping.cc
@@ -3,7 +3,7 @@
 
 TEST_SUITE(FF_TEST_SUITE) {
   // TEST_CASE("MachineMapping::combine") {
-  //   rc::check([](MachineMapping const &m0, MachineMapping const &m1) {
+  //   RC_SUBCASE([](MachineMapping const &m0, MachineMapping const &m1) {
   //     RC_PRE(MachineMapping::nodes_are_disjoint(m0, m1));
 
   //     MachineMapping comb = MachineMapping::combine(m0, m1);
@@ -16,7 +16,7 @@ TEST_SUITE(FF_TEST_SUITE) {
   // }
 
   // TEST_CASE("OptimalCostResult::infinity") {
-  //   rc::check([](OptimalCostResult const &c) {
+  //   RC_SUBCASE([](OptimalCostResult const &c) {
   //     RC_ASSERT(c.runtime <= OptimalCostResult::infinity().runtime);
   //   });
   // }
diff --git a/lib/compiler/test/src/test_optimal_cost.cc b/lib/compiler/test/src/test_optimal_cost.cc
index 8c176eb4d2..82c731888f 100644
--- a/lib/compiler/test/src/test_optimal_cost.cc
+++ b/lib/compiler/test/src/test_optimal_cost.cc
@@ -16,7 +16,7 @@ TEST_SUITE(FF_TEST_SUITE) {
   //                                        MachineSpecification const &) {
   //     return std::unordered_set<MachineView>{make_1d_machine_view(0, 1, 1)};
   //   };
-  //   rc::check([](ParallelComputationGraph const &g,
+  //   RC_SUBCASE([](ParallelComputationGraph const &g,
   //                MachineSpecification const &machine_spec) {
   //     OptimalCostCache cached_subgraph_costs;
   //     OptimalCostResult result = optimal_cost(g,
diff --git a/lib/compiler/test/src/test_unity_algorithm.cc b/lib/compiler/test/src/test_unity_algorithm.cc
index 614e9bb182..ed5e895a75 100644
--- a/lib/compiler/test/src/test_unity_algorithm.cc
+++ b/lib/compiler/test/src/test_unity_algorithm.cc
@@ -6,7 +6,7 @@
 TEST_SUITE(FF_TEST_SUITE) {
   // Rapidcheck does not work for now
   // TEST_CASE("graph_optimize") {
-  //   rc::check([](ComputationGraph const &g,
+  //   RC_SUBCASE([](ComputationGraph const &g,
   //                float alpha,
   //                int budget,
   //                float threshold,
diff --git a/lib/kernels/include/kernels/legion_dim_t.dtg.h b/lib/kernels/include/kernels/legion_dim_t.dtg.h
index 622f9c240a..3dbdfb55d8 100644
--- a/lib/kernels/include/kernels/legion_dim_t.dtg.h
+++ b/lib/kernels/include/kernels/legion_dim_t.dtg.h
@@ -19,7 +19,7 @@
 namespace FlexFlow {
 struct legion_dim_t {
   legion_dim_t() = delete;
-  legion_dim_t(int const &value);
+  explicit legion_dim_t(int const &value);
 
   bool operator==(legion_dim_t const &) const;
   bool operator!=(legion_dim_t const &) const;
@@ -33,16 +33,16 @@ struct legion_dim_t {
 
 namespace std {
 template <>
-struct hash<FlexFlow::legion_dim_t> {
-  size_t operator()(FlexFlow::legion_dim_t const &) const;
+struct hash<::FlexFlow::legion_dim_t> {
+  size_t operator()(::FlexFlow::legion_dim_t const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::legion_dim_t> {
-  static FlexFlow::legion_dim_t from_json(json const &);
-  static void to_json(json &, FlexFlow::legion_dim_t const &);
+struct adl_serializer<::FlexFlow::legion_dim_t> {
+  static ::FlexFlow::legion_dim_t from_json(json const &);
+  static void to_json(json &, ::FlexFlow::legion_dim_t const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/kernels/src/kernels/legion_dim_t.dtg.cc b/lib/kernels/src/kernels/legion_dim_t.dtg.cc
index 99c1a3b3a2..bb85e4b9dd 100644
--- a/lib/kernels/src/kernels/legion_dim_t.dtg.cc
+++ b/lib/kernels/src/kernels/legion_dim_t.dtg.cc
@@ -35,7 +35,7 @@ bool legion_dim_t::operator>=(legion_dim_t const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::legion_dim_t>::operator()(
-    FlexFlow::legion_dim_t const &x) const {
+    ::FlexFlow::legion_dim_t const &x) const {
   size_t result = 0;
   result ^=
       std::hash<int>{}(x.value) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -44,12 +44,12 @@ size_t hash<FlexFlow::legion_dim_t>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::legion_dim_t
-    adl_serializer<FlexFlow::legion_dim_t>::from_json(json const &j) {
-  return {j.at("value").template get<int>()};
+::FlexFlow::legion_dim_t
+    adl_serializer<::FlexFlow::legion_dim_t>::from_json(json const &j) {
+  return ::FlexFlow::legion_dim_t{j.at("value").template get<int>()};
 }
-void adl_serializer<FlexFlow::legion_dim_t>::to_json(
-    json &j, FlexFlow::legion_dim_t const &v) {
+void adl_serializer<::FlexFlow::legion_dim_t>::to_json(
+    json &j, ::FlexFlow::legion_dim_t const &v) {
   j["__type"] = "legion_dim_t";
   j["value"] = v.value;
 }
diff --git a/lib/local-execution/src/ops/attention.cc b/lib/local-execution/src/ops/attention.cc
index c40e4f1e2d..be1fae475c 100644
--- a/lib/local-execution/src/ops/attention.cc
+++ b/lib/local-execution/src/ops/attention.cc
@@ -16,6 +16,7 @@
 #include "attention.h"
 #include "kernels/attention_kernels.h"
 #include "local-execution/op_task_signature.h"
+#include "op-attrs/ops/attention/multihead_attention_parallel_inputs.h"
 
 namespace FlexFlow {
 
@@ -95,31 +96,24 @@ static DeviceSpecific<DeviceStates>
   ParallelTensorShape value_parallel_tensor_shape =
       acc.get_argument<ParallelTensorShape>(VALUE_PARALLEL_TENSOR_SHAPE);
 
-  MultiHeadAttentionInputs inputs = {
-      shard_dim_at_idx(query_parallel_tensor_shape, ff_dim_t{0}).size,
-      shard_dim_at_idx(query_parallel_tensor_shape, ff_dim_t{1}).size,
-      qProjSize,
-      kProjSize,
-      vProjSize,
-      query_parallel_tensor_shape.data_type};
-  ;
+  MultiHeadAttentionParallelInputs parsed = throw_if_unexpected(
+      parse_attention_parallel_input_shape(query_parallel_tensor_shape,
+                                           key_parallel_tensor_shape,
+                                           value_parallel_tensor_shape));
   ParallelTensorShape weight_parallel_tensor_shape =
       throw_if_unexpected(get_weights_shape(attrs,
                                             query_parallel_tensor_shape,
                                             key_parallel_tensor_shape,
                                             value_parallel_tensor_shape));
 
-  int kvSeqLength = get_kvSeqLength(inputs);
-  int qSize = get_qSize(inputs);
-  int kSize = get_kSize(inputs);
-  int vSize = get_vSize(inputs);
-
-  int qoSeqLength =
-      dim_at_idx(get_piece_shape(query_parallel_tensor_shape), ff_dim_t(1));
-  int num_samples =
-      dim_at_idx(get_piece_shape(query_parallel_tensor_shape), ff_dim_t(2));
-  int num_heads =
-      dim_at_idx(get_piece_shape(weight_parallel_tensor_shape), ff_dim_t(1));
+  int kvSeqLength = get_kvSeqLength(parsed);
+  int qSize = get_qSize(parsed);
+  int kSize = get_kSize(parsed);
+  int vSize = get_vSize(parsed);
+
+  int qoSeqLength = get_qoSeqLength(parsed);
+  int num_samples = get_num_samples(parsed);
+  int num_heads = attrs.num_heads;
 
   MHAPerDeviceState per_device_state = init_kernel(handle,
                                                    allocator,
diff --git a/lib/op-attrs/include/op-attrs/datatype.h b/lib/op-attrs/include/op-attrs/datatype.h
index a435c1bc12..6204b9ca49 100644
--- a/lib/op-attrs/include/op-attrs/datatype.h
+++ b/lib/op-attrs/include/op-attrs/datatype.h
@@ -58,6 +58,8 @@ using DataTypeValue = std::variant<real_type<DataType::FLOAT>,
 
 size_t size_of_datatype(DataType);
 
+bool can_strictly_promote_datatype_from_to(DataType, DataType);
+
 } // namespace FlexFlow
 
 #endif
diff --git a/lib/op-attrs/include/op-attrs/ff_dim.dtg.h b/lib/op-attrs/include/op-attrs/ff_dim.dtg.h
index 1697f78196..f7df8f414b 100644
--- a/lib/op-attrs/include/op-attrs/ff_dim.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ff_dim.dtg.h
@@ -19,7 +19,7 @@
 namespace FlexFlow {
 struct ff_dim_t {
   ff_dim_t() = delete;
-  ff_dim_t(int const &value);
+  explicit ff_dim_t(int const &value);
 
   bool operator==(ff_dim_t const &) const;
   bool operator!=(ff_dim_t const &) const;
@@ -33,16 +33,16 @@ struct ff_dim_t {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ff_dim_t> {
-  size_t operator()(FlexFlow::ff_dim_t const &) const;
+struct hash<::FlexFlow::ff_dim_t> {
+  size_t operator()(::FlexFlow::ff_dim_t const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ff_dim_t> {
-  static FlexFlow::ff_dim_t from_json(json const &);
-  static void to_json(json &, FlexFlow::ff_dim_t const &);
+struct adl_serializer<::FlexFlow::ff_dim_t> {
+  static ::FlexFlow::ff_dim_t from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ff_dim_t const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/op-attrs/include/op-attrs/get_output_shapes.h b/lib/op-attrs/include/op-attrs/get_output_shapes.h
index 6ce9456797..be1cde37c4 100644
--- a/lib/op-attrs/include/op-attrs/get_output_shapes.h
+++ b/lib/op-attrs/include/op-attrs/get_output_shapes.h
@@ -112,12 +112,8 @@ std::vector<TensorShape> get_output_shapes(Attrs const &attrs,
 
 ParallelTensorShape get_output_shape(MultiHeadAttentionAttrs const &,
                                      std::vector<ParallelTensorShape> const &);
-ParallelTensorShape get_output_shape(CastAttrs const &,
-                                     ParallelTensorShape const &);
 ParallelTensorShape get_output_shape(ConcatAttrs const &,
                                      std::vector<ParallelTensorShape> const &);
-ParallelTensorShape get_output_shape(Conv2DAttrs const &,
-                                     ParallelTensorShape const &);
 ParallelTensorShape get_output_shape(DropoutAttrs const &,
                                      ParallelTensorShape const &);
 ParallelTensorShape get_output_shape(FlatAttrs const &,
@@ -131,8 +127,6 @@ ParallelTensorShape get_output_shape(Pool2DAttrs const &,
                                      ParallelTensorShape const &);
 ParallelTensorShape get_output_shape(ReduceAttrs const &,
                                      ParallelTensorShape const &);
-ParallelTensorShape get_output_shape(ReplicateAttrs const &,
-                                     ParallelTensorShape const &);
 ParallelTensorShape get_output_shape(ReverseAttrs const &,
                                      ParallelTensorShape const &);
 std::vector<ParallelTensorShape> get_output_shapes(SplitAttrs const &,
diff --git a/lib/op-attrs/include/op-attrs/l1_regularizer_attrs.dtg.h b/lib/op-attrs/include/op-attrs/l1_regularizer_attrs.dtg.h
index 1d4747db7e..9981219ca4 100644
--- a/lib/op-attrs/include/op-attrs/l1_regularizer_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/l1_regularizer_attrs.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct L1RegularizerAttrs {
   L1RegularizerAttrs() = delete;
-  L1RegularizerAttrs(float const &lambda);
+  explicit L1RegularizerAttrs(float const &lambda);
 
   bool operator==(L1RegularizerAttrs const &) const;
   bool operator!=(L1RegularizerAttrs const &) const;
@@ -34,23 +34,23 @@ struct L1RegularizerAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::L1RegularizerAttrs> {
-  size_t operator()(FlexFlow::L1RegularizerAttrs const &) const;
+struct hash<::FlexFlow::L1RegularizerAttrs> {
+  size_t operator()(::FlexFlow::L1RegularizerAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::L1RegularizerAttrs> {
-  static FlexFlow::L1RegularizerAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::L1RegularizerAttrs const &);
+struct adl_serializer<::FlexFlow::L1RegularizerAttrs> {
+  static ::FlexFlow::L1RegularizerAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::L1RegularizerAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::L1RegularizerAttrs> {
-  static Gen<FlexFlow::L1RegularizerAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::L1RegularizerAttrs> {
+  static Gen<::FlexFlow::L1RegularizerAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/l2_regularizer_attrs.dtg.h b/lib/op-attrs/include/op-attrs/l2_regularizer_attrs.dtg.h
index 981d3f4905..cd26069de1 100644
--- a/lib/op-attrs/include/op-attrs/l2_regularizer_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/l2_regularizer_attrs.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct L2RegularizerAttrs {
   L2RegularizerAttrs() = delete;
-  L2RegularizerAttrs(float const &lambda);
+  explicit L2RegularizerAttrs(float const &lambda);
 
   bool operator==(L2RegularizerAttrs const &) const;
   bool operator!=(L2RegularizerAttrs const &) const;
@@ -34,23 +34,23 @@ struct L2RegularizerAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::L2RegularizerAttrs> {
-  size_t operator()(FlexFlow::L2RegularizerAttrs const &) const;
+struct hash<::FlexFlow::L2RegularizerAttrs> {
+  size_t operator()(::FlexFlow::L2RegularizerAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::L2RegularizerAttrs> {
-  static FlexFlow::L2RegularizerAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::L2RegularizerAttrs const &);
+struct adl_serializer<::FlexFlow::L2RegularizerAttrs> {
+  static ::FlexFlow::L2RegularizerAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::L2RegularizerAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::L2RegularizerAttrs> {
-  static Gen<FlexFlow::L2RegularizerAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::L2RegularizerAttrs> {
+  static Gen<::FlexFlow::L2RegularizerAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/attention.h b/lib/op-attrs/include/op-attrs/ops/attention.h
index 8233775e63..e126c425dc 100644
--- a/lib/op-attrs/include/op-attrs/ops/attention.h
+++ b/lib/op-attrs/include/op-attrs/ops/attention.h
@@ -42,17 +42,37 @@ tl::expected<TensorShape, std::string>
                       TensorShape const &input_q,
                       TensorShape const &input_k,
                       TensorShape const &input_v);
-tl::expected<ParallelTensorShape, std::string>
-    get_weights_shape(MultiHeadAttentionAttrs const &,
-                      ParallelTensorShape const &input_q,
-                      ParallelTensorShape const &input_k,
-                      ParallelTensorShape const &input_v);
-
+tl::expected<TensorShape, std::string>
+    get_input_bias_shape(MultiHeadAttentionAttrs const &,
+                         TensorShape const &input_q,
+                         TensorShape const &input_k,
+                         TensorShape const &input_v);
+tl::expected<TensorShape, std::string>
+    get_output_bias_shape(MultiHeadAttentionAttrs const &,
+                          TensorShape const &input_q,
+                          TensorShape const &input_k,
+                          TensorShape const &input_v);
 tl::expected<TensorShape, std::string>
     get_output_shape(MultiHeadAttentionAttrs const &,
                      TensorShape const &input_q,
                      TensorShape const &input_k,
                      TensorShape const &input_v);
+
+tl::expected<ParallelTensorShape, std::string>
+    get_weights_shape(MultiHeadAttentionAttrs const &,
+                      ParallelTensorShape const &input_q,
+                      ParallelTensorShape const &input_k,
+                      ParallelTensorShape const &input_v);
+tl::expected<ParallelTensorShape, std::string>
+    get_input_bias_shape(MultiHeadAttentionAttrs const &,
+                         ParallelTensorShape const &input_q,
+                         ParallelTensorShape const &input_k,
+                         ParallelTensorShape const &input_v);
+tl::expected<ParallelTensorShape, std::string>
+    get_output_bias_shape(MultiHeadAttentionAttrs const &,
+                          ParallelTensorShape const &input_q,
+                          ParallelTensorShape const &input_k,
+                          ParallelTensorShape const &input_v);
 tl::expected<ParallelTensorShape, std::string>
     get_output_shape(MultiHeadAttentionAttrs const &,
                      ParallelTensorShape const &input_q,
diff --git a/lib/op-attrs/include/op-attrs/ops/attention/multihead_attention_inputs.dtg.h b/lib/op-attrs/include/op-attrs/ops/attention/multihead_attention_inputs.dtg.h
index 7b61305a1a..815ca5edea 100644
--- a/lib/op-attrs/include/op-attrs/ops/attention/multihead_attention_inputs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/attention/multihead_attention_inputs.dtg.h
@@ -22,12 +22,12 @@
 namespace FlexFlow {
 struct MultiHeadAttentionInputs {
   MultiHeadAttentionInputs() = delete;
-  MultiHeadAttentionInputs(size_t const &batch_size,
-                           size_t const &sequence_length,
-                           size_t const &query_size,
-                           size_t const &key_size,
-                           size_t const &value_size,
-                           ::FlexFlow::DataType const &datatype);
+  explicit MultiHeadAttentionInputs(size_t const &batch_size,
+                                    size_t const &sequence_length,
+                                    size_t const &query_size,
+                                    size_t const &key_size,
+                                    size_t const &value_size,
+                                    ::FlexFlow::DataType const &datatype);
 
   bool operator==(MultiHeadAttentionInputs const &) const;
   bool operator!=(MultiHeadAttentionInputs const &) const;
@@ -46,23 +46,23 @@ struct MultiHeadAttentionInputs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::MultiHeadAttentionInputs> {
-  size_t operator()(FlexFlow::MultiHeadAttentionInputs const &) const;
+struct hash<::FlexFlow::MultiHeadAttentionInputs> {
+  size_t operator()(::FlexFlow::MultiHeadAttentionInputs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::MultiHeadAttentionInputs> {
-  static FlexFlow::MultiHeadAttentionInputs from_json(json const &);
-  static void to_json(json &, FlexFlow::MultiHeadAttentionInputs const &);
+struct adl_serializer<::FlexFlow::MultiHeadAttentionInputs> {
+  static ::FlexFlow::MultiHeadAttentionInputs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::MultiHeadAttentionInputs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::MultiHeadAttentionInputs> {
-  static Gen<FlexFlow::MultiHeadAttentionInputs> arbitrary();
+struct Arbitrary<::FlexFlow::MultiHeadAttentionInputs> {
+  static Gen<::FlexFlow::MultiHeadAttentionInputs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/attention/multihead_attention_parallel_inputs.dtg.h b/lib/op-attrs/include/op-attrs/ops/attention/multihead_attention_parallel_inputs.dtg.h
index 297b1f8f1c..fa7c83a881 100644
--- a/lib/op-attrs/include/op-attrs/ops/attention/multihead_attention_parallel_inputs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/attention/multihead_attention_parallel_inputs.dtg.h
@@ -25,7 +25,7 @@
 namespace FlexFlow {
 struct MultiHeadAttentionParallelInputs {
   MultiHeadAttentionParallelInputs() = delete;
-  MultiHeadAttentionParallelInputs(
+  explicit MultiHeadAttentionParallelInputs(
       ::FlexFlow::ShardParallelDim const &batch_dim,
       ::FlexFlow::ShardParallelDim const &sequence_dim,
       ::FlexFlow::ShardParallelDim const &query_dim,
@@ -52,24 +52,24 @@ struct MultiHeadAttentionParallelInputs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::MultiHeadAttentionParallelInputs> {
-  size_t operator()(FlexFlow::MultiHeadAttentionParallelInputs const &) const;
+struct hash<::FlexFlow::MultiHeadAttentionParallelInputs> {
+  size_t operator()(::FlexFlow::MultiHeadAttentionParallelInputs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::MultiHeadAttentionParallelInputs> {
-  static FlexFlow::MultiHeadAttentionParallelInputs from_json(json const &);
+struct adl_serializer<::FlexFlow::MultiHeadAttentionParallelInputs> {
+  static ::FlexFlow::MultiHeadAttentionParallelInputs from_json(json const &);
   static void to_json(json &,
-                      FlexFlow::MultiHeadAttentionParallelInputs const &);
+                      ::FlexFlow::MultiHeadAttentionParallelInputs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::MultiHeadAttentionParallelInputs> {
-  static Gen<FlexFlow::MultiHeadAttentionParallelInputs> arbitrary();
+struct Arbitrary<::FlexFlow::MultiHeadAttentionParallelInputs> {
+  static Gen<::FlexFlow::MultiHeadAttentionParallelInputs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/attention_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/attention_attrs.dtg.h
index 18b2906759..8eef2df2eb 100644
--- a/lib/op-attrs/include/op-attrs/ops/attention_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/attention_attrs.dtg.h
@@ -20,14 +20,14 @@
 namespace FlexFlow {
 struct MultiHeadAttentionAttrs {
   MultiHeadAttentionAttrs() = delete;
-  MultiHeadAttentionAttrs(int const &embed_dim,
-                          int const &num_heads,
-                          int const &kdim,
-                          int const &vdim,
-                          float const &dropout,
-                          bool const &bias,
-                          bool const &add_bias_kv,
-                          bool const &add_zero_attn);
+  explicit MultiHeadAttentionAttrs(int const &embed_dim,
+                                   int const &num_heads,
+                                   int const &kdim,
+                                   int const &vdim,
+                                   float const &dropout,
+                                   bool const &bias,
+                                   bool const &add_bias_kv,
+                                   bool const &add_zero_attn);
 
   bool operator==(MultiHeadAttentionAttrs const &) const;
   bool operator!=(MultiHeadAttentionAttrs const &) const;
@@ -48,23 +48,23 @@ struct MultiHeadAttentionAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::MultiHeadAttentionAttrs> {
-  size_t operator()(FlexFlow::MultiHeadAttentionAttrs const &) const;
+struct hash<::FlexFlow::MultiHeadAttentionAttrs> {
+  size_t operator()(::FlexFlow::MultiHeadAttentionAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::MultiHeadAttentionAttrs> {
-  static FlexFlow::MultiHeadAttentionAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::MultiHeadAttentionAttrs const &);
+struct adl_serializer<::FlexFlow::MultiHeadAttentionAttrs> {
+  static ::FlexFlow::MultiHeadAttentionAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::MultiHeadAttentionAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::MultiHeadAttentionAttrs> {
-  static Gen<FlexFlow::MultiHeadAttentionAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::MultiHeadAttentionAttrs> {
+  static Gen<::FlexFlow::MultiHeadAttentionAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/batch_matmul.dtg.h b/lib/op-attrs/include/op-attrs/ops/batch_matmul.dtg.h
index a8ab52d2b3..64c4dd9ae3 100644
--- a/lib/op-attrs/include/op-attrs/ops/batch_matmul.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/batch_matmul.dtg.h
@@ -20,7 +20,8 @@
 namespace FlexFlow {
 struct BatchMatmulAttrs {
   BatchMatmulAttrs() = delete;
-  BatchMatmulAttrs(int const &a_seq_length_dim, int const &b_seq_length_dim);
+  explicit BatchMatmulAttrs(int const &a_seq_length_dim,
+                            int const &b_seq_length_dim);
 
   bool operator==(BatchMatmulAttrs const &) const;
   bool operator!=(BatchMatmulAttrs const &) const;
@@ -35,23 +36,23 @@ struct BatchMatmulAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::BatchMatmulAttrs> {
-  size_t operator()(FlexFlow::BatchMatmulAttrs const &) const;
+struct hash<::FlexFlow::BatchMatmulAttrs> {
+  size_t operator()(::FlexFlow::BatchMatmulAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::BatchMatmulAttrs> {
-  static FlexFlow::BatchMatmulAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::BatchMatmulAttrs const &);
+struct adl_serializer<::FlexFlow::BatchMatmulAttrs> {
+  static ::FlexFlow::BatchMatmulAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::BatchMatmulAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::BatchMatmulAttrs> {
-  static Gen<FlexFlow::BatchMatmulAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::BatchMatmulAttrs> {
+  static Gen<::FlexFlow::BatchMatmulAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/batch_norm_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/batch_norm_attrs.dtg.h
index f153bfde7e..a7d29d565c 100644
--- a/lib/op-attrs/include/op-attrs/ops/batch_norm_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/batch_norm_attrs.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct BatchNormAttrs {
   BatchNormAttrs() = delete;
-  BatchNormAttrs(bool const &relu);
+  explicit BatchNormAttrs(bool const &relu);
 
   bool operator==(BatchNormAttrs const &) const;
   bool operator!=(BatchNormAttrs const &) const;
@@ -34,23 +34,23 @@ struct BatchNormAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::BatchNormAttrs> {
-  size_t operator()(FlexFlow::BatchNormAttrs const &) const;
+struct hash<::FlexFlow::BatchNormAttrs> {
+  size_t operator()(::FlexFlow::BatchNormAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::BatchNormAttrs> {
-  static FlexFlow::BatchNormAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::BatchNormAttrs const &);
+struct adl_serializer<::FlexFlow::BatchNormAttrs> {
+  static ::FlexFlow::BatchNormAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::BatchNormAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::BatchNormAttrs> {
-  static Gen<FlexFlow::BatchNormAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::BatchNormAttrs> {
+  static Gen<::FlexFlow::BatchNormAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/broadcast.dtg.h b/lib/op-attrs/include/op-attrs/ops/broadcast.dtg.h
index e4de3dcc75..baff0fdad5 100644
--- a/lib/op-attrs/include/op-attrs/ops/broadcast.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/broadcast.dtg.h
@@ -21,7 +21,7 @@
 namespace FlexFlow {
 struct BroadcastAttrs {
   BroadcastAttrs() = delete;
-  BroadcastAttrs(
+  explicit BroadcastAttrs(
       ::FlexFlow::stack_vector<int, MAX_TENSOR_DIM> const &target_dims);
 
   bool operator==(BroadcastAttrs const &) const;
@@ -36,23 +36,23 @@ struct BroadcastAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::BroadcastAttrs> {
-  size_t operator()(FlexFlow::BroadcastAttrs const &) const;
+struct hash<::FlexFlow::BroadcastAttrs> {
+  size_t operator()(::FlexFlow::BroadcastAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::BroadcastAttrs> {
-  static FlexFlow::BroadcastAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::BroadcastAttrs const &);
+struct adl_serializer<::FlexFlow::BroadcastAttrs> {
+  static ::FlexFlow::BroadcastAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::BroadcastAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::BroadcastAttrs> {
-  static Gen<FlexFlow::BroadcastAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::BroadcastAttrs> {
+  static Gen<::FlexFlow::BroadcastAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/cast.h b/lib/op-attrs/include/op-attrs/ops/cast.h
index 117dcb1e01..ead779c553 100644
--- a/lib/op-attrs/include/op-attrs/ops/cast.h
+++ b/lib/op-attrs/include/op-attrs/ops/cast.h
@@ -1,12 +1,21 @@
 #ifndef _FLEXFLOW_CAST_ATTRS_H
 #define _FLEXFLOW_CAST_ATTRS_H
 
-#include "core.h"
 #include "op-attrs/ops/cast_attrs.dtg.h"
+#include "op-attrs/ops/core.h"
+#include "op-attrs/parallel_tensor_shape.h"
+#include "op-attrs/tensor_shape.h"
+#include <tl/expected.hpp>
 
 namespace FlexFlow {
 
 CHECK_VALID_OP_ATTR(CastAttrs);
+
+tl::expected<TensorShape, std::string> get_output_shape(CastAttrs const &,
+                                                        TensorShape const &);
+tl::expected<ParallelTensorShape, std::string>
+    get_output_shape(CastAttrs const &, ParallelTensorShape const &);
+
 } // namespace FlexFlow
 
 #endif
diff --git a/lib/op-attrs/include/op-attrs/ops/cast_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/cast_attrs.dtg.h
index 33391eb221..0cfb1c2161 100644
--- a/lib/op-attrs/include/op-attrs/ops/cast_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/cast_attrs.dtg.h
@@ -21,7 +21,7 @@
 namespace FlexFlow {
 struct CastAttrs {
   CastAttrs() = delete;
-  CastAttrs(DataType const &dtype);
+  explicit CastAttrs(DataType const &dtype);
 
   bool operator==(CastAttrs const &) const;
   bool operator!=(CastAttrs const &) const;
@@ -35,23 +35,23 @@ struct CastAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::CastAttrs> {
-  size_t operator()(FlexFlow::CastAttrs const &) const;
+struct hash<::FlexFlow::CastAttrs> {
+  size_t operator()(::FlexFlow::CastAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::CastAttrs> {
-  static FlexFlow::CastAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::CastAttrs const &);
+struct adl_serializer<::FlexFlow::CastAttrs> {
+  static ::FlexFlow::CastAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::CastAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::CastAttrs> {
-  static Gen<FlexFlow::CastAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::CastAttrs> {
+  static Gen<::FlexFlow::CastAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/combine_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/combine_attrs.dtg.h
index 43db204bc5..a9f2385fed 100644
--- a/lib/op-attrs/include/op-attrs/ops/combine_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/combine_attrs.dtg.h
@@ -22,8 +22,8 @@
 namespace FlexFlow {
 struct CombineAttrs {
   CombineAttrs() = delete;
-  CombineAttrs(::FlexFlow::ff_dim_t const &combine_dim,
-               int const &combine_degree);
+  explicit CombineAttrs(::FlexFlow::ff_dim_t const &combine_dim,
+                        int const &combine_degree);
 
   bool operator==(CombineAttrs const &) const;
   bool operator!=(CombineAttrs const &) const;
@@ -38,23 +38,23 @@ struct CombineAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::CombineAttrs> {
-  size_t operator()(FlexFlow::CombineAttrs const &) const;
+struct hash<::FlexFlow::CombineAttrs> {
+  size_t operator()(::FlexFlow::CombineAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::CombineAttrs> {
-  static FlexFlow::CombineAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::CombineAttrs const &);
+struct adl_serializer<::FlexFlow::CombineAttrs> {
+  static ::FlexFlow::CombineAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::CombineAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::CombineAttrs> {
-  static Gen<FlexFlow::CombineAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::CombineAttrs> {
+  static Gen<::FlexFlow::CombineAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/concat_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/concat_attrs.dtg.h
index 3c26473a4e..435cc08f90 100644
--- a/lib/op-attrs/include/op-attrs/ops/concat_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/concat_attrs.dtg.h
@@ -22,7 +22,7 @@
 namespace FlexFlow {
 struct ConcatAttrs {
   ConcatAttrs() = delete;
-  ConcatAttrs(::FlexFlow::ff_dim_t const &axis, int const &num_inputs);
+  explicit ConcatAttrs(::FlexFlow::ff_dim_t const &axis, int const &num_inputs);
 
   bool operator==(ConcatAttrs const &) const;
   bool operator!=(ConcatAttrs const &) const;
@@ -37,23 +37,23 @@ struct ConcatAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ConcatAttrs> {
-  size_t operator()(FlexFlow::ConcatAttrs const &) const;
+struct hash<::FlexFlow::ConcatAttrs> {
+  size_t operator()(::FlexFlow::ConcatAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ConcatAttrs> {
-  static FlexFlow::ConcatAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::ConcatAttrs const &);
+struct adl_serializer<::FlexFlow::ConcatAttrs> {
+  static ::FlexFlow::ConcatAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ConcatAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ConcatAttrs> {
-  static Gen<FlexFlow::ConcatAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::ConcatAttrs> {
+  static Gen<::FlexFlow::ConcatAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/conv_2d.h b/lib/op-attrs/include/op-attrs/ops/conv_2d.h
index 7759380088..72d1123c39 100644
--- a/lib/op-attrs/include/op-attrs/ops/conv_2d.h
+++ b/lib/op-attrs/include/op-attrs/ops/conv_2d.h
@@ -1,8 +1,8 @@
 #ifndef _FLEXFLOW_CONV_2D_ATTRS_H
 #define _FLEXFLOW_CONV_2D_ATTRS_H
 
-#include "core.h"
 #include "op-attrs/ops/conv_2d_attrs.dtg.h"
+#include "op-attrs/ops/core.h"
 #include "op-attrs/parallel_tensor_shape.h"
 #include "op-attrs/tensor_shape.h"
 
diff --git a/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_input_shape.dtg.h b/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_input_shape.dtg.h
index 2e7833064c..353213e33f 100644
--- a/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_input_shape.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_input_shape.dtg.h
@@ -22,11 +22,11 @@
 namespace FlexFlow {
 struct Conv2DInputShape {
   Conv2DInputShape() = delete;
-  Conv2DInputShape(size_t const &num_samples,
-                   size_t const &num_channels,
-                   size_t const &height,
-                   size_t const &width,
-                   ::FlexFlow::DataType const &datatype);
+  explicit Conv2DInputShape(size_t const &num_samples,
+                            size_t const &num_channels,
+                            size_t const &height,
+                            size_t const &width,
+                            ::FlexFlow::DataType const &datatype);
 
   bool operator==(Conv2DInputShape const &) const;
   bool operator!=(Conv2DInputShape const &) const;
@@ -44,23 +44,23 @@ struct Conv2DInputShape {
 
 namespace std {
 template <>
-struct hash<FlexFlow::Conv2DInputShape> {
-  size_t operator()(FlexFlow::Conv2DInputShape const &) const;
+struct hash<::FlexFlow::Conv2DInputShape> {
+  size_t operator()(::FlexFlow::Conv2DInputShape const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::Conv2DInputShape> {
-  static FlexFlow::Conv2DInputShape from_json(json const &);
-  static void to_json(json &, FlexFlow::Conv2DInputShape const &);
+struct adl_serializer<::FlexFlow::Conv2DInputShape> {
+  static ::FlexFlow::Conv2DInputShape from_json(json const &);
+  static void to_json(json &, ::FlexFlow::Conv2DInputShape const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::Conv2DInputShape> {
-  static Gen<FlexFlow::Conv2DInputShape> arbitrary();
+struct Arbitrary<::FlexFlow::Conv2DInputShape> {
+  static Gen<::FlexFlow::Conv2DInputShape> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.dtg.h b/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.dtg.h
index 846c9e413a..0b02d74a4b 100644
--- a/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.dtg.h
@@ -22,13 +22,14 @@
 namespace FlexFlow {
 struct Conv2DParallelInputShape {
   Conv2DParallelInputShape() = delete;
-  Conv2DParallelInputShape(::FlexFlow::ShardParallelDim const &sample_dim,
-                           ::FlexFlow::ShardParallelDim const &channel_dim,
-                           ::FlexFlow::ShardParallelDim const &height_dim,
-                           ::FlexFlow::ShardParallelDim const &width_dim,
-                           int const &sum_reduction_degree,
-                           int const &discard_copy_reduction_degree,
-                           ::FlexFlow::DataType const &datatype);
+  explicit Conv2DParallelInputShape(
+      ::FlexFlow::ShardParallelDim const &sample_dim,
+      ::FlexFlow::ShardParallelDim const &channel_dim,
+      ::FlexFlow::ShardParallelDim const &height_dim,
+      ::FlexFlow::ShardParallelDim const &width_dim,
+      int const &sum_reduction_degree,
+      int const &discard_copy_reduction_degree,
+      ::FlexFlow::DataType const &datatype);
 
   bool operator==(Conv2DParallelInputShape const &) const;
   bool operator!=(Conv2DParallelInputShape const &) const;
@@ -48,23 +49,23 @@ struct Conv2DParallelInputShape {
 
 namespace std {
 template <>
-struct hash<FlexFlow::Conv2DParallelInputShape> {
-  size_t operator()(FlexFlow::Conv2DParallelInputShape const &) const;
+struct hash<::FlexFlow::Conv2DParallelInputShape> {
+  size_t operator()(::FlexFlow::Conv2DParallelInputShape const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::Conv2DParallelInputShape> {
-  static FlexFlow::Conv2DParallelInputShape from_json(json const &);
-  static void to_json(json &, FlexFlow::Conv2DParallelInputShape const &);
+struct adl_serializer<::FlexFlow::Conv2DParallelInputShape> {
+  static ::FlexFlow::Conv2DParallelInputShape from_json(json const &);
+  static void to_json(json &, ::FlexFlow::Conv2DParallelInputShape const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::Conv2DParallelInputShape> {
-  static Gen<FlexFlow::Conv2DParallelInputShape> arbitrary();
+struct Arbitrary<::FlexFlow::Conv2DParallelInputShape> {
+  static Gen<::FlexFlow::Conv2DParallelInputShape> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/conv_2d_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/conv_2d_attrs.dtg.h
index 06827656da..0602a6eb92 100644
--- a/lib/op-attrs/include/op-attrs/ops/conv_2d_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/conv_2d_attrs.dtg.h
@@ -23,16 +23,16 @@
 namespace FlexFlow {
 struct Conv2DAttrs {
   Conv2DAttrs() = delete;
-  Conv2DAttrs(int const &out_channels,
-              int const &kernel_h,
-              int const &kernel_w,
-              int const &stride_h,
-              int const &stride_w,
-              int const &padding_h,
-              int const &padding_w,
-              int const &groups,
-              std::optional<::FlexFlow::Activation> const &activation,
-              bool const &use_bias);
+  explicit Conv2DAttrs(int const &out_channels,
+                       int const &kernel_h,
+                       int const &kernel_w,
+                       int const &stride_h,
+                       int const &stride_w,
+                       int const &padding_h,
+                       int const &padding_w,
+                       int const &groups,
+                       std::optional<::FlexFlow::Activation> const &activation,
+                       bool const &use_bias);
 
   bool operator==(Conv2DAttrs const &) const;
   bool operator!=(Conv2DAttrs const &) const;
@@ -55,23 +55,23 @@ struct Conv2DAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::Conv2DAttrs> {
-  size_t operator()(FlexFlow::Conv2DAttrs const &) const;
+struct hash<::FlexFlow::Conv2DAttrs> {
+  size_t operator()(::FlexFlow::Conv2DAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::Conv2DAttrs> {
-  static FlexFlow::Conv2DAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::Conv2DAttrs const &);
+struct adl_serializer<::FlexFlow::Conv2DAttrs> {
+  static ::FlexFlow::Conv2DAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::Conv2DAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::Conv2DAttrs> {
-  static Gen<FlexFlow::Conv2DAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::Conv2DAttrs> {
+  static Gen<::FlexFlow::Conv2DAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/dropout_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/dropout_attrs.dtg.h
index ef86e49560..433e2c8aa7 100644
--- a/lib/op-attrs/include/op-attrs/ops/dropout_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/dropout_attrs.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct DropoutAttrs {
   DropoutAttrs() = delete;
-  DropoutAttrs(float const &rate, unsigned long long const &seed);
+  explicit DropoutAttrs(float const &rate, unsigned long long const &seed);
 
   bool operator==(DropoutAttrs const &) const;
   bool operator!=(DropoutAttrs const &) const;
@@ -35,23 +35,23 @@ struct DropoutAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::DropoutAttrs> {
-  size_t operator()(FlexFlow::DropoutAttrs const &) const;
+struct hash<::FlexFlow::DropoutAttrs> {
+  size_t operator()(::FlexFlow::DropoutAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::DropoutAttrs> {
-  static FlexFlow::DropoutAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::DropoutAttrs const &);
+struct adl_serializer<::FlexFlow::DropoutAttrs> {
+  static ::FlexFlow::DropoutAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::DropoutAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::DropoutAttrs> {
-  static Gen<FlexFlow::DropoutAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::DropoutAttrs> {
+  static Gen<::FlexFlow::DropoutAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/element_binary_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/element_binary_attrs.dtg.h
index 10d93c87d3..c4049f9c8d 100644
--- a/lib/op-attrs/include/op-attrs/ops/element_binary_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/element_binary_attrs.dtg.h
@@ -22,10 +22,10 @@
 namespace FlexFlow {
 struct ElementBinaryAttrs {
   ElementBinaryAttrs() = delete;
-  ElementBinaryAttrs(::FlexFlow::OperatorType const &type,
-                     ::FlexFlow::DataType const &compute_type,
-                     bool const &should_broadcast_lhs,
-                     bool const &should_broadcast_rhs);
+  explicit ElementBinaryAttrs(::FlexFlow::OperatorType const &type,
+                              ::FlexFlow::DataType const &compute_type,
+                              bool const &should_broadcast_lhs,
+                              bool const &should_broadcast_rhs);
 
   bool operator==(ElementBinaryAttrs const &) const;
   bool operator!=(ElementBinaryAttrs const &) const;
@@ -42,23 +42,23 @@ struct ElementBinaryAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ElementBinaryAttrs> {
-  size_t operator()(FlexFlow::ElementBinaryAttrs const &) const;
+struct hash<::FlexFlow::ElementBinaryAttrs> {
+  size_t operator()(::FlexFlow::ElementBinaryAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ElementBinaryAttrs> {
-  static FlexFlow::ElementBinaryAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::ElementBinaryAttrs const &);
+struct adl_serializer<::FlexFlow::ElementBinaryAttrs> {
+  static ::FlexFlow::ElementBinaryAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ElementBinaryAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ElementBinaryAttrs> {
-  static Gen<FlexFlow::ElementBinaryAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::ElementBinaryAttrs> {
+  static Gen<::FlexFlow::ElementBinaryAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/element_unary_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/element_unary_attrs.dtg.h
index 1d308c9acd..75a16d4054 100644
--- a/lib/op-attrs/include/op-attrs/ops/element_unary_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/element_unary_attrs.dtg.h
@@ -22,8 +22,8 @@
 namespace FlexFlow {
 struct ElementUnaryAttrs {
   ElementUnaryAttrs() = delete;
-  ElementUnaryAttrs(::FlexFlow::OperatorType const &op_type,
-                    std::optional<float> const &scalar);
+  explicit ElementUnaryAttrs(::FlexFlow::OperatorType const &op_type,
+                             std::optional<float> const &scalar);
 
   bool operator==(ElementUnaryAttrs const &) const;
   bool operator!=(ElementUnaryAttrs const &) const;
@@ -38,23 +38,23 @@ struct ElementUnaryAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ElementUnaryAttrs> {
-  size_t operator()(FlexFlow::ElementUnaryAttrs const &) const;
+struct hash<::FlexFlow::ElementUnaryAttrs> {
+  size_t operator()(::FlexFlow::ElementUnaryAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ElementUnaryAttrs> {
-  static FlexFlow::ElementUnaryAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::ElementUnaryAttrs const &);
+struct adl_serializer<::FlexFlow::ElementUnaryAttrs> {
+  static ::FlexFlow::ElementUnaryAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ElementUnaryAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ElementUnaryAttrs> {
-  static Gen<FlexFlow::ElementUnaryAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::ElementUnaryAttrs> {
+  static Gen<::FlexFlow::ElementUnaryAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/embedding_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/embedding_attrs.dtg.h
index f1cae86460..7b1eb8d2f7 100644
--- a/lib/op-attrs/include/op-attrs/ops/embedding_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/embedding_attrs.dtg.h
@@ -23,10 +23,10 @@
 namespace FlexFlow {
 struct EmbeddingAttrs {
   EmbeddingAttrs() = delete;
-  EmbeddingAttrs(int const &num_entries,
-                 int const &out_channels,
-                 std::optional<::FlexFlow::AggregateOp> const &aggr,
-                 ::FlexFlow::DataType const &data_type);
+  explicit EmbeddingAttrs(int const &num_entries,
+                          int const &out_channels,
+                          std::optional<::FlexFlow::AggregateOp> const &aggr,
+                          ::FlexFlow::DataType const &data_type);
 
   bool operator==(EmbeddingAttrs const &) const;
   bool operator!=(EmbeddingAttrs const &) const;
@@ -43,23 +43,23 @@ struct EmbeddingAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::EmbeddingAttrs> {
-  size_t operator()(FlexFlow::EmbeddingAttrs const &) const;
+struct hash<::FlexFlow::EmbeddingAttrs> {
+  size_t operator()(::FlexFlow::EmbeddingAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::EmbeddingAttrs> {
-  static FlexFlow::EmbeddingAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::EmbeddingAttrs const &);
+struct adl_serializer<::FlexFlow::EmbeddingAttrs> {
+  static ::FlexFlow::EmbeddingAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::EmbeddingAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::EmbeddingAttrs> {
-  static Gen<FlexFlow::EmbeddingAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::EmbeddingAttrs> {
+  static Gen<::FlexFlow::EmbeddingAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/flat_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/flat_attrs.dtg.h
index a94c0aeff3..a8b74af565 100644
--- a/lib/op-attrs/include/op-attrs/ops/flat_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/flat_attrs.dtg.h
@@ -30,23 +30,23 @@ struct FlatAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::FlatAttrs> {
-  size_t operator()(FlexFlow::FlatAttrs const &) const;
+struct hash<::FlexFlow::FlatAttrs> {
+  size_t operator()(::FlexFlow::FlatAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::FlatAttrs> {
-  static FlexFlow::FlatAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::FlatAttrs const &);
+struct adl_serializer<::FlexFlow::FlatAttrs> {
+  static ::FlexFlow::FlatAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::FlatAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::FlatAttrs> {
-  static Gen<FlexFlow::FlatAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::FlatAttrs> {
+  static Gen<::FlexFlow::FlatAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/gather_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/gather_attrs.dtg.h
index e7a35e5800..84835bc850 100644
--- a/lib/op-attrs/include/op-attrs/ops/gather_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/gather_attrs.dtg.h
@@ -22,7 +22,7 @@
 namespace FlexFlow {
 struct GatherAttrs {
   GatherAttrs() = delete;
-  GatherAttrs(::FlexFlow::ff_dim_t const &dim);
+  explicit GatherAttrs(::FlexFlow::ff_dim_t const &dim);
 
   bool operator==(GatherAttrs const &) const;
   bool operator!=(GatherAttrs const &) const;
@@ -36,23 +36,23 @@ struct GatherAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::GatherAttrs> {
-  size_t operator()(FlexFlow::GatherAttrs const &) const;
+struct hash<::FlexFlow::GatherAttrs> {
+  size_t operator()(::FlexFlow::GatherAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::GatherAttrs> {
-  static FlexFlow::GatherAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::GatherAttrs const &);
+struct adl_serializer<::FlexFlow::GatherAttrs> {
+  static ::FlexFlow::GatherAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::GatherAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::GatherAttrs> {
-  static Gen<FlexFlow::GatherAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::GatherAttrs> {
+  static Gen<::FlexFlow::GatherAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/input_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/input_attrs.dtg.h
index aa2ca1e933..729b47dedc 100644
--- a/lib/op-attrs/include/op-attrs/ops/input_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/input_attrs.dtg.h
@@ -30,23 +30,23 @@ struct InputAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::InputAttrs> {
-  size_t operator()(FlexFlow::InputAttrs const &) const;
+struct hash<::FlexFlow::InputAttrs> {
+  size_t operator()(::FlexFlow::InputAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::InputAttrs> {
-  static FlexFlow::InputAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::InputAttrs const &);
+struct adl_serializer<::FlexFlow::InputAttrs> {
+  static ::FlexFlow::InputAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::InputAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::InputAttrs> {
-  static Gen<FlexFlow::InputAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::InputAttrs> {
+  static Gen<::FlexFlow::InputAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/layer_norm_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/layer_norm_attrs.dtg.h
index c945206863..e480544815 100644
--- a/lib/op-attrs/include/op-attrs/ops/layer_norm_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/layer_norm_attrs.dtg.h
@@ -23,10 +23,10 @@
 namespace FlexFlow {
 struct LayerNormAttrs {
   LayerNormAttrs() = delete;
-  LayerNormAttrs(::FlexFlow::stack_vector<::FlexFlow::ff_dim_t,
-                                          MAX_TENSOR_DIM> const &axes,
-                 bool const &elementwise_affine,
-                 float const &eps);
+  explicit LayerNormAttrs(::FlexFlow::stack_vector<::FlexFlow::ff_dim_t,
+                                                   MAX_TENSOR_DIM> const &axes,
+                          bool const &elementwise_affine,
+                          float const &eps);
 
   bool operator==(LayerNormAttrs const &) const;
   bool operator!=(LayerNormAttrs const &) const;
@@ -42,23 +42,23 @@ struct LayerNormAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::LayerNormAttrs> {
-  size_t operator()(FlexFlow::LayerNormAttrs const &) const;
+struct hash<::FlexFlow::LayerNormAttrs> {
+  size_t operator()(::FlexFlow::LayerNormAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::LayerNormAttrs> {
-  static FlexFlow::LayerNormAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::LayerNormAttrs const &);
+struct adl_serializer<::FlexFlow::LayerNormAttrs> {
+  static ::FlexFlow::LayerNormAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::LayerNormAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::LayerNormAttrs> {
-  static Gen<FlexFlow::LayerNormAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::LayerNormAttrs> {
+  static Gen<::FlexFlow::LayerNormAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/linear_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/linear_attrs.dtg.h
index 28cd2a8b33..a00dc65ccb 100644
--- a/lib/op-attrs/include/op-attrs/ops/linear_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/linear_attrs.dtg.h
@@ -24,11 +24,12 @@
 namespace FlexFlow {
 struct LinearAttrs {
   LinearAttrs() = delete;
-  LinearAttrs(int const &out_channels,
-              bool const &use_bias,
-              ::FlexFlow::DataType const &data_type,
-              std::optional<::FlexFlow::Activation> const &activation,
-              std::optional<::FlexFlow::RegularizerAttrs> const &regularizer);
+  explicit LinearAttrs(
+      int const &out_channels,
+      bool const &use_bias,
+      ::FlexFlow::DataType const &data_type,
+      std::optional<::FlexFlow::Activation> const &activation,
+      std::optional<::FlexFlow::RegularizerAttrs> const &regularizer);
 
   bool operator==(LinearAttrs const &) const;
   bool operator!=(LinearAttrs const &) const;
@@ -46,23 +47,23 @@ struct LinearAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::LinearAttrs> {
-  size_t operator()(FlexFlow::LinearAttrs const &) const;
+struct hash<::FlexFlow::LinearAttrs> {
+  size_t operator()(::FlexFlow::LinearAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::LinearAttrs> {
-  static FlexFlow::LinearAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::LinearAttrs const &);
+struct adl_serializer<::FlexFlow::LinearAttrs> {
+  static ::FlexFlow::LinearAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::LinearAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::LinearAttrs> {
-  static Gen<FlexFlow::LinearAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::LinearAttrs> {
+  static Gen<::FlexFlow::LinearAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/noop_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/noop_attrs.dtg.h
index ed0d8c9348..528926cc0c 100644
--- a/lib/op-attrs/include/op-attrs/ops/noop_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/noop_attrs.dtg.h
@@ -30,23 +30,23 @@ struct NoopAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::NoopAttrs> {
-  size_t operator()(FlexFlow::NoopAttrs const &) const;
+struct hash<::FlexFlow::NoopAttrs> {
+  size_t operator()(::FlexFlow::NoopAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::NoopAttrs> {
-  static FlexFlow::NoopAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::NoopAttrs const &);
+struct adl_serializer<::FlexFlow::NoopAttrs> {
+  static ::FlexFlow::NoopAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::NoopAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::NoopAttrs> {
-  static Gen<FlexFlow::NoopAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::NoopAttrs> {
+  static Gen<::FlexFlow::NoopAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/parallel_attention_inputs.dtg.h b/lib/op-attrs/include/op-attrs/ops/parallel_attention_inputs.dtg.h
index d3903bd3b2..f6a739473a 100644
--- a/lib/op-attrs/include/op-attrs/ops/parallel_attention_inputs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/parallel_attention_inputs.dtg.h
@@ -21,7 +21,7 @@
 namespace FlexFlow {
 struct ParallelMultiHeadAttentionInputs {
   ParallelMultiHeadAttentionInputs() = delete;
-  ParallelMultiHeadAttentionInputs(
+  explicit ParallelMultiHeadAttentionInputs(
       ::FlexFlow::ParallelTensorShape const &query,
       ::FlexFlow::ParallelTensorShape const &key,
       ::FlexFlow::ParallelTensorShape const &value);
@@ -36,24 +36,24 @@ struct ParallelMultiHeadAttentionInputs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ParallelMultiHeadAttentionInputs> {
-  size_t operator()(FlexFlow::ParallelMultiHeadAttentionInputs const &) const;
+struct hash<::FlexFlow::ParallelMultiHeadAttentionInputs> {
+  size_t operator()(::FlexFlow::ParallelMultiHeadAttentionInputs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ParallelMultiHeadAttentionInputs> {
-  static FlexFlow::ParallelMultiHeadAttentionInputs from_json(json const &);
+struct adl_serializer<::FlexFlow::ParallelMultiHeadAttentionInputs> {
+  static ::FlexFlow::ParallelMultiHeadAttentionInputs from_json(json const &);
   static void to_json(json &,
-                      FlexFlow::ParallelMultiHeadAttentionInputs const &);
+                      ::FlexFlow::ParallelMultiHeadAttentionInputs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ParallelMultiHeadAttentionInputs> {
-  static Gen<FlexFlow::ParallelMultiHeadAttentionInputs> arbitrary();
+struct Arbitrary<::FlexFlow::ParallelMultiHeadAttentionInputs> {
+  static Gen<::FlexFlow::ParallelMultiHeadAttentionInputs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/pool_2d_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/pool_2d_attrs.dtg.h
index a5c6603302..ef779217cd 100644
--- a/lib/op-attrs/include/op-attrs/ops/pool_2d_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/pool_2d_attrs.dtg.h
@@ -22,14 +22,14 @@
 namespace FlexFlow {
 struct Pool2DAttrs {
   Pool2DAttrs() = delete;
-  Pool2DAttrs(int const &kernel_h,
-              int const &kernel_w,
-              int const &stride_h,
-              int const &stride_w,
-              int const &padding_h,
-              int const &padding_w,
-              ::FlexFlow::PoolOp const &pool_type,
-              ::FlexFlow::Activation const &activation);
+  explicit Pool2DAttrs(int const &kernel_h,
+                       int const &kernel_w,
+                       int const &stride_h,
+                       int const &stride_w,
+                       int const &padding_h,
+                       int const &padding_w,
+                       ::FlexFlow::PoolOp const &pool_type,
+                       ::FlexFlow::Activation const &activation);
 
   bool operator==(Pool2DAttrs const &) const;
   bool operator!=(Pool2DAttrs const &) const;
@@ -50,23 +50,23 @@ struct Pool2DAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::Pool2DAttrs> {
-  size_t operator()(FlexFlow::Pool2DAttrs const &) const;
+struct hash<::FlexFlow::Pool2DAttrs> {
+  size_t operator()(::FlexFlow::Pool2DAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::Pool2DAttrs> {
-  static FlexFlow::Pool2DAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::Pool2DAttrs const &);
+struct adl_serializer<::FlexFlow::Pool2DAttrs> {
+  static ::FlexFlow::Pool2DAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::Pool2DAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::Pool2DAttrs> {
-  static Gen<FlexFlow::Pool2DAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::Pool2DAttrs> {
+  static Gen<::FlexFlow::Pool2DAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/reduce_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/reduce_attrs.dtg.h
index af27bf35be..1710687b36 100644
--- a/lib/op-attrs/include/op-attrs/ops/reduce_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/reduce_attrs.dtg.h
@@ -24,10 +24,10 @@
 namespace FlexFlow {
 struct ReduceAttrs {
   ReduceAttrs() = delete;
-  ReduceAttrs(::FlexFlow::stack_vector<::FlexFlow::ff_dim_t,
-                                       MAX_TENSOR_DIM> const &axes,
-              ::FlexFlow::OperatorType const &op_type,
-              bool const &keepdims);
+  explicit ReduceAttrs(::FlexFlow::stack_vector<::FlexFlow::ff_dim_t,
+                                                MAX_TENSOR_DIM> const &axes,
+                       ::FlexFlow::OperatorType const &op_type,
+                       bool const &keepdims);
 
   bool operator==(ReduceAttrs const &) const;
   bool operator!=(ReduceAttrs const &) const;
@@ -43,23 +43,23 @@ struct ReduceAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ReduceAttrs> {
-  size_t operator()(FlexFlow::ReduceAttrs const &) const;
+struct hash<::FlexFlow::ReduceAttrs> {
+  size_t operator()(::FlexFlow::ReduceAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ReduceAttrs> {
-  static FlexFlow::ReduceAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::ReduceAttrs const &);
+struct adl_serializer<::FlexFlow::ReduceAttrs> {
+  static ::FlexFlow::ReduceAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ReduceAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ReduceAttrs> {
-  static Gen<FlexFlow::ReduceAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::ReduceAttrs> {
+  static Gen<::FlexFlow::ReduceAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/reduction_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/reduction_attrs.dtg.h
index 9de5eb2252..f742ce46fb 100644
--- a/lib/op-attrs/include/op-attrs/ops/reduction_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/reduction_attrs.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct ReductionAttrs {
   ReductionAttrs() = delete;
-  ReductionAttrs(int const &reduction_degree);
+  explicit ReductionAttrs(int const &reduction_degree);
 
   bool operator==(ReductionAttrs const &) const;
   bool operator!=(ReductionAttrs const &) const;
@@ -34,23 +34,23 @@ struct ReductionAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ReductionAttrs> {
-  size_t operator()(FlexFlow::ReductionAttrs const &) const;
+struct hash<::FlexFlow::ReductionAttrs> {
+  size_t operator()(::FlexFlow::ReductionAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ReductionAttrs> {
-  static FlexFlow::ReductionAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::ReductionAttrs const &);
+struct adl_serializer<::FlexFlow::ReductionAttrs> {
+  static ::FlexFlow::ReductionAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ReductionAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ReductionAttrs> {
-  static Gen<FlexFlow::ReductionAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::ReductionAttrs> {
+  static Gen<::FlexFlow::ReductionAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/repartition_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/repartition_attrs.dtg.h
index 66c21466f4..33f32f709c 100644
--- a/lib/op-attrs/include/op-attrs/ops/repartition_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/repartition_attrs.dtg.h
@@ -22,8 +22,8 @@
 namespace FlexFlow {
 struct RepartitionAttrs {
   RepartitionAttrs() = delete;
-  RepartitionAttrs(::FlexFlow::ff_dim_t const &repartition_dim,
-                   int const &repartition_degree);
+  explicit RepartitionAttrs(::FlexFlow::ff_dim_t const &repartition_dim,
+                            int const &repartition_degree);
 
   bool operator==(RepartitionAttrs const &) const;
   bool operator!=(RepartitionAttrs const &) const;
@@ -38,23 +38,23 @@ struct RepartitionAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::RepartitionAttrs> {
-  size_t operator()(FlexFlow::RepartitionAttrs const &) const;
+struct hash<::FlexFlow::RepartitionAttrs> {
+  size_t operator()(::FlexFlow::RepartitionAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::RepartitionAttrs> {
-  static FlexFlow::RepartitionAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::RepartitionAttrs const &);
+struct adl_serializer<::FlexFlow::RepartitionAttrs> {
+  static ::FlexFlow::RepartitionAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::RepartitionAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::RepartitionAttrs> {
-  static Gen<FlexFlow::RepartitionAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::RepartitionAttrs> {
+  static Gen<::FlexFlow::RepartitionAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/replicate_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/replicate_attrs.dtg.h
index ea3f0d46c7..53a9a05337 100644
--- a/lib/op-attrs/include/op-attrs/ops/replicate_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/replicate_attrs.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct ReplicateAttrs {
   ReplicateAttrs() = delete;
-  ReplicateAttrs(int const &replicate_degree);
+  explicit ReplicateAttrs(int const &replicate_degree);
 
   bool operator==(ReplicateAttrs const &) const;
   bool operator!=(ReplicateAttrs const &) const;
@@ -34,23 +34,23 @@ struct ReplicateAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ReplicateAttrs> {
-  size_t operator()(FlexFlow::ReplicateAttrs const &) const;
+struct hash<::FlexFlow::ReplicateAttrs> {
+  size_t operator()(::FlexFlow::ReplicateAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ReplicateAttrs> {
-  static FlexFlow::ReplicateAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::ReplicateAttrs const &);
+struct adl_serializer<::FlexFlow::ReplicateAttrs> {
+  static ::FlexFlow::ReplicateAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ReplicateAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ReplicateAttrs> {
-  static Gen<FlexFlow::ReplicateAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::ReplicateAttrs> {
+  static Gen<::FlexFlow::ReplicateAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/reshape_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/reshape_attrs.dtg.h
index 612874790f..1d16e9eccb 100644
--- a/lib/op-attrs/include/op-attrs/ops/reshape_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/reshape_attrs.dtg.h
@@ -21,7 +21,7 @@
 namespace FlexFlow {
 struct ReshapeAttrs {
   ReshapeAttrs() = delete;
-  ReshapeAttrs(::FlexFlow::TensorShape const &shape);
+  explicit ReshapeAttrs(::FlexFlow::TensorShape const &shape);
 
   bool operator==(ReshapeAttrs const &) const;
   bool operator!=(ReshapeAttrs const &) const;
@@ -35,23 +35,23 @@ struct ReshapeAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ReshapeAttrs> {
-  size_t operator()(FlexFlow::ReshapeAttrs const &) const;
+struct hash<::FlexFlow::ReshapeAttrs> {
+  size_t operator()(::FlexFlow::ReshapeAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ReshapeAttrs> {
-  static FlexFlow::ReshapeAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::ReshapeAttrs const &);
+struct adl_serializer<::FlexFlow::ReshapeAttrs> {
+  static ::FlexFlow::ReshapeAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ReshapeAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ReshapeAttrs> {
-  static Gen<FlexFlow::ReshapeAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::ReshapeAttrs> {
+  static Gen<::FlexFlow::ReshapeAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/reverse_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/reverse_attrs.dtg.h
index 8c8c8a7a9e..94037c653d 100644
--- a/lib/op-attrs/include/op-attrs/ops/reverse_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/reverse_attrs.dtg.h
@@ -22,7 +22,7 @@
 namespace FlexFlow {
 struct ReverseAttrs {
   ReverseAttrs() = delete;
-  ReverseAttrs(::FlexFlow::ff_dim_t const &axis);
+  explicit ReverseAttrs(::FlexFlow::ff_dim_t const &axis);
 
   bool operator==(ReverseAttrs const &) const;
   bool operator!=(ReverseAttrs const &) const;
@@ -36,23 +36,23 @@ struct ReverseAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ReverseAttrs> {
-  size_t operator()(FlexFlow::ReverseAttrs const &) const;
+struct hash<::FlexFlow::ReverseAttrs> {
+  size_t operator()(::FlexFlow::ReverseAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ReverseAttrs> {
-  static FlexFlow::ReverseAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::ReverseAttrs const &);
+struct adl_serializer<::FlexFlow::ReverseAttrs> {
+  static ::FlexFlow::ReverseAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ReverseAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ReverseAttrs> {
-  static Gen<FlexFlow::ReverseAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::ReverseAttrs> {
+  static Gen<::FlexFlow::ReverseAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/softmax_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/softmax_attrs.dtg.h
index 1c855d90f4..5705c7a882 100644
--- a/lib/op-attrs/include/op-attrs/ops/softmax_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/softmax_attrs.dtg.h
@@ -22,7 +22,7 @@
 namespace FlexFlow {
 struct SoftmaxAttrs {
   SoftmaxAttrs() = delete;
-  SoftmaxAttrs(::FlexFlow::ff_dim_t const &dim);
+  explicit SoftmaxAttrs(::FlexFlow::ff_dim_t const &dim);
 
   bool operator==(SoftmaxAttrs const &) const;
   bool operator!=(SoftmaxAttrs const &) const;
@@ -36,23 +36,23 @@ struct SoftmaxAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::SoftmaxAttrs> {
-  size_t operator()(FlexFlow::SoftmaxAttrs const &) const;
+struct hash<::FlexFlow::SoftmaxAttrs> {
+  size_t operator()(::FlexFlow::SoftmaxAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::SoftmaxAttrs> {
-  static FlexFlow::SoftmaxAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::SoftmaxAttrs const &);
+struct adl_serializer<::FlexFlow::SoftmaxAttrs> {
+  static ::FlexFlow::SoftmaxAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::SoftmaxAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::SoftmaxAttrs> {
-  static Gen<FlexFlow::SoftmaxAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::SoftmaxAttrs> {
+  static Gen<::FlexFlow::SoftmaxAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/split_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/split_attrs.dtg.h
index b602015e2e..baf0a8f305 100644
--- a/lib/op-attrs/include/op-attrs/ops/split_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/split_attrs.dtg.h
@@ -23,8 +23,9 @@
 namespace FlexFlow {
 struct SplitAttrs {
   SplitAttrs() = delete;
-  SplitAttrs(::FlexFlow::stack_vector<int, MAX_NUM_OUTPUTS> const &splits,
-             ::FlexFlow::ff_dim_t const &axis);
+  explicit SplitAttrs(
+      ::FlexFlow::stack_vector<int, MAX_NUM_OUTPUTS> const &splits,
+      ::FlexFlow::ff_dim_t const &axis);
 
   bool operator==(SplitAttrs const &) const;
   bool operator!=(SplitAttrs const &) const;
@@ -39,23 +40,23 @@ struct SplitAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::SplitAttrs> {
-  size_t operator()(FlexFlow::SplitAttrs const &) const;
+struct hash<::FlexFlow::SplitAttrs> {
+  size_t operator()(::FlexFlow::SplitAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::SplitAttrs> {
-  static FlexFlow::SplitAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::SplitAttrs const &);
+struct adl_serializer<::FlexFlow::SplitAttrs> {
+  static ::FlexFlow::SplitAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::SplitAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::SplitAttrs> {
-  static Gen<FlexFlow::SplitAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::SplitAttrs> {
+  static Gen<::FlexFlow::SplitAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/topk_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/topk_attrs.dtg.h
index d1f32f67b7..ef09bc3b16 100644
--- a/lib/op-attrs/include/op-attrs/ops/topk_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/topk_attrs.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct TopKAttrs {
   TopKAttrs() = delete;
-  TopKAttrs(int const &k, bool const &sorted);
+  explicit TopKAttrs(int const &k, bool const &sorted);
 
   bool operator==(TopKAttrs const &) const;
   bool operator!=(TopKAttrs const &) const;
@@ -35,23 +35,23 @@ struct TopKAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::TopKAttrs> {
-  size_t operator()(FlexFlow::TopKAttrs const &) const;
+struct hash<::FlexFlow::TopKAttrs> {
+  size_t operator()(::FlexFlow::TopKAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::TopKAttrs> {
-  static FlexFlow::TopKAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::TopKAttrs const &);
+struct adl_serializer<::FlexFlow::TopKAttrs> {
+  static ::FlexFlow::TopKAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::TopKAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::TopKAttrs> {
-  static Gen<FlexFlow::TopKAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::TopKAttrs> {
+  static Gen<::FlexFlow::TopKAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/transpose_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/transpose_attrs.dtg.h
index f4d932845f..fac95b406b 100644
--- a/lib/op-attrs/include/op-attrs/ops/transpose_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/transpose_attrs.dtg.h
@@ -23,7 +23,8 @@
 namespace FlexFlow {
 struct TransposeAttrs {
   TransposeAttrs() = delete;
-  TransposeAttrs(::FlexFlow::FFOrdered<::FlexFlow::ff_dim_t> const &perm);
+  explicit TransposeAttrs(
+      ::FlexFlow::FFOrdered<::FlexFlow::ff_dim_t> const &perm);
 
   bool operator==(TransposeAttrs const &) const;
   bool operator!=(TransposeAttrs const &) const;
@@ -37,23 +38,23 @@ struct TransposeAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::TransposeAttrs> {
-  size_t operator()(FlexFlow::TransposeAttrs const &) const;
+struct hash<::FlexFlow::TransposeAttrs> {
+  size_t operator()(::FlexFlow::TransposeAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::TransposeAttrs> {
-  static FlexFlow::TransposeAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::TransposeAttrs const &);
+struct adl_serializer<::FlexFlow::TransposeAttrs> {
+  static ::FlexFlow::TransposeAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::TransposeAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::TransposeAttrs> {
-  static Gen<FlexFlow::TransposeAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::TransposeAttrs> {
+  static Gen<::FlexFlow::TransposeAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/ops/weight_attrs.dtg.h b/lib/op-attrs/include/op-attrs/ops/weight_attrs.dtg.h
index 4a19909c25..c7672267fe 100644
--- a/lib/op-attrs/include/op-attrs/ops/weight_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/ops/weight_attrs.dtg.h
@@ -30,23 +30,23 @@ struct WeightAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::WeightAttrs> {
-  size_t operator()(FlexFlow::WeightAttrs const &) const;
+struct hash<::FlexFlow::WeightAttrs> {
+  size_t operator()(::FlexFlow::WeightAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::WeightAttrs> {
-  static FlexFlow::WeightAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::WeightAttrs const &);
+struct adl_serializer<::FlexFlow::WeightAttrs> {
+  static ::FlexFlow::WeightAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::WeightAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::WeightAttrs> {
-  static Gen<FlexFlow::WeightAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::WeightAttrs> {
+  static Gen<::FlexFlow::WeightAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_dims.dtg.h b/lib/op-attrs/include/op-attrs/parallel_tensor_dims.dtg.h
index 71ad517095..edb24f78f4 100644
--- a/lib/op-attrs/include/op-attrs/parallel_tensor_dims.dtg.h
+++ b/lib/op-attrs/include/op-attrs/parallel_tensor_dims.dtg.h
@@ -26,7 +26,7 @@
 namespace FlexFlow {
 struct ParallelTensorDims {
   ParallelTensorDims() = delete;
-  ParallelTensorDims(
+  explicit ParallelTensorDims(
       ::FlexFlow::FFOrdered<::FlexFlow::ShardParallelDim> const &shard_dims,
       ::FlexFlow::ReplicaParallelDimSet const &replica_dims);
 
@@ -43,23 +43,23 @@ struct ParallelTensorDims {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ParallelTensorDims> {
-  size_t operator()(FlexFlow::ParallelTensorDims const &) const;
+struct hash<::FlexFlow::ParallelTensorDims> {
+  size_t operator()(::FlexFlow::ParallelTensorDims const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ParallelTensorDims> {
-  static FlexFlow::ParallelTensorDims from_json(json const &);
-  static void to_json(json &, FlexFlow::ParallelTensorDims const &);
+struct adl_serializer<::FlexFlow::ParallelTensorDims> {
+  static ::FlexFlow::ParallelTensorDims from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ParallelTensorDims const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ParallelTensorDims> {
-  static Gen<FlexFlow::ParallelTensorDims> arbitrary();
+struct Arbitrary<::FlexFlow::ParallelTensorDims> {
+  static Gen<::FlexFlow::ParallelTensorDims> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.dtg.h b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.dtg.h
index 62d291fa4f..9f56d29fbb 100644
--- a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.dtg.h
+++ b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.dtg.h
@@ -22,8 +22,8 @@
 namespace FlexFlow {
 struct ParallelTensorShape {
   ParallelTensorShape() = delete;
-  ParallelTensorShape(::FlexFlow::ParallelTensorDims const &dims,
-                      ::FlexFlow::DataType const &data_type);
+  explicit ParallelTensorShape(::FlexFlow::ParallelTensorDims const &dims,
+                               ::FlexFlow::DataType const &data_type);
 
   bool operator==(ParallelTensorShape const &) const;
   bool operator!=(ParallelTensorShape const &) const;
@@ -38,23 +38,23 @@ struct ParallelTensorShape {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ParallelTensorShape> {
-  size_t operator()(FlexFlow::ParallelTensorShape const &) const;
+struct hash<::FlexFlow::ParallelTensorShape> {
+  size_t operator()(::FlexFlow::ParallelTensorShape const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ParallelTensorShape> {
-  static FlexFlow::ParallelTensorShape from_json(json const &);
-  static void to_json(json &, FlexFlow::ParallelTensorShape const &);
+struct adl_serializer<::FlexFlow::ParallelTensorShape> {
+  static ::FlexFlow::ParallelTensorShape from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ParallelTensorShape const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ParallelTensorShape> {
-  static Gen<FlexFlow::ParallelTensorShape> arbitrary();
+struct Arbitrary<::FlexFlow::ParallelTensorShape> {
+  static Gen<::FlexFlow::ParallelTensorShape> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_shape/discard_copy_degree.dtg.h b/lib/op-attrs/include/op-attrs/parallel_tensor_shape/discard_copy_degree.dtg.h
index a820bfe81c..c5f8748cbc 100644
--- a/lib/op-attrs/include/op-attrs/parallel_tensor_shape/discard_copy_degree.dtg.h
+++ b/lib/op-attrs/include/op-attrs/parallel_tensor_shape/discard_copy_degree.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct DiscardCopyDegree {
   DiscardCopyDegree() = delete;
-  DiscardCopyDegree(int const &value);
+  explicit DiscardCopyDegree(int const &value);
 
   bool operator==(DiscardCopyDegree const &) const;
   bool operator!=(DiscardCopyDegree const &) const;
@@ -34,23 +34,23 @@ struct DiscardCopyDegree {
 
 namespace std {
 template <>
-struct hash<FlexFlow::DiscardCopyDegree> {
-  size_t operator()(FlexFlow::DiscardCopyDegree const &) const;
+struct hash<::FlexFlow::DiscardCopyDegree> {
+  size_t operator()(::FlexFlow::DiscardCopyDegree const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::DiscardCopyDegree> {
-  static FlexFlow::DiscardCopyDegree from_json(json const &);
-  static void to_json(json &, FlexFlow::DiscardCopyDegree const &);
+struct adl_serializer<::FlexFlow::DiscardCopyDegree> {
+  static ::FlexFlow::DiscardCopyDegree from_json(json const &);
+  static void to_json(json &, ::FlexFlow::DiscardCopyDegree const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::DiscardCopyDegree> {
-  static Gen<FlexFlow::DiscardCopyDegree> arbitrary();
+struct Arbitrary<::FlexFlow::DiscardCopyDegree> {
+  static Gen<::FlexFlow::DiscardCopyDegree> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_shape/sum_degree.dtg.h b/lib/op-attrs/include/op-attrs/parallel_tensor_shape/sum_degree.dtg.h
index 17388f8d05..9391f7743e 100644
--- a/lib/op-attrs/include/op-attrs/parallel_tensor_shape/sum_degree.dtg.h
+++ b/lib/op-attrs/include/op-attrs/parallel_tensor_shape/sum_degree.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct SumDegree {
   SumDegree() = delete;
-  SumDegree(int const &value);
+  explicit SumDegree(int const &value);
 
   bool operator==(SumDegree const &) const;
   bool operator!=(SumDegree const &) const;
@@ -34,23 +34,23 @@ struct SumDegree {
 
 namespace std {
 template <>
-struct hash<FlexFlow::SumDegree> {
-  size_t operator()(FlexFlow::SumDegree const &) const;
+struct hash<::FlexFlow::SumDegree> {
+  size_t operator()(::FlexFlow::SumDegree const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::SumDegree> {
-  static FlexFlow::SumDegree from_json(json const &);
-  static void to_json(json &, FlexFlow::SumDegree const &);
+struct adl_serializer<::FlexFlow::SumDegree> {
+  static ::FlexFlow::SumDegree from_json(json const &);
+  static void to_json(json &, ::FlexFlow::SumDegree const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::SumDegree> {
-  static Gen<FlexFlow::SumDegree> arbitrary();
+struct Arbitrary<::FlexFlow::SumDegree> {
+  static Gen<::FlexFlow::SumDegree> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/pcg_operator_attrs.dtg.h b/lib/op-attrs/include/op-attrs/pcg_operator_attrs.dtg.h
index ed60c162ee..3a2fe45810 100644
--- a/lib/op-attrs/include/op-attrs/pcg_operator_attrs.dtg.h
+++ b/lib/op-attrs/include/op-attrs/pcg_operator_attrs.dtg.h
@@ -3,7 +3,7 @@
 // lib/op-attrs/include/op-attrs/pcg_operator_attrs.variant.toml
 /* proj-data
 {
-  "generated_from": "9149c47c2055195f15966ae7a3f619ff"
+  "generated_from": "72d324ec59ca0c5a390458ea20e79338"
 }
 */
 
@@ -40,6 +40,7 @@
 #include "op-attrs/ops/split_attrs.dtg.h"
 #include "op-attrs/ops/topk_attrs.dtg.h"
 #include "op-attrs/ops/transpose_attrs.dtg.h"
+#include "op-attrs/ops/weight_attrs.dtg.h"
 #include "rapidcheck.h"
 #include <cstddef>
 #include <functional>
@@ -78,6 +79,7 @@ struct PCGOperatorAttrs {
   explicit PCGOperatorAttrs(::FlexFlow::SoftmaxAttrs const &);
   explicit PCGOperatorAttrs(::FlexFlow::TopKAttrs const &);
   explicit PCGOperatorAttrs(::FlexFlow::TransposeAttrs const &);
+  explicit PCGOperatorAttrs(::FlexFlow::WeightAttrs const &);
   template <typename T>
   static constexpr bool IsPartOfPCGOperatorAttrs_v =
       std::is_same_v<T, ::FlexFlow::BatchMatmulAttrs> ||
@@ -107,7 +109,8 @@ struct PCGOperatorAttrs {
       std::is_same_v<T, ::FlexFlow::SplitAttrs> ||
       std::is_same_v<T, ::FlexFlow::SoftmaxAttrs> ||
       std::is_same_v<T, ::FlexFlow::TopKAttrs> ||
-      std::is_same_v<T, ::FlexFlow::TransposeAttrs>;
+      std::is_same_v<T, ::FlexFlow::TransposeAttrs> ||
+      std::is_same_v<T, ::FlexFlow::WeightAttrs>;
   template <typename ReturnType, typename Visitor>
   ReturnType visit(Visitor &&v) const {
     switch (this->index()) {
@@ -223,6 +226,10 @@ struct PCGOperatorAttrs {
         ReturnType result = v(this->get<::FlexFlow::TransposeAttrs>());
         return result;
       }
+      case 28: {
+        ReturnType result = v(this->get<::FlexFlow::WeightAttrs>());
+        return result;
+      }
       default: {
         throw std::runtime_error(fmt::format(
             "Unknown index {} for type PCGOperatorAttrs", this->index()));
@@ -344,6 +351,10 @@ struct PCGOperatorAttrs {
         ReturnType result = v(this->get<::FlexFlow::TransposeAttrs>());
         return result;
       }
+      case 28: {
+        ReturnType result = v(this->get<::FlexFlow::WeightAttrs>());
+        return result;
+      }
       default: {
         throw std::runtime_error(fmt::format(
             "Unknown index {} for type PCGOperatorAttrs", this->index()));
@@ -368,7 +379,8 @@ struct PCGOperatorAttrs {
         "::FlexFlow::RepartitionAttrs, ::FlexFlow::ReplicateAttrs, "
         "::FlexFlow::ReverseAttrs, ::FlexFlow::ReshapeAttrs, "
         "::FlexFlow::SplitAttrs, ::FlexFlow::SoftmaxAttrs, "
-        "::FlexFlow::TopKAttrs, ::FlexFlow::TransposeAttrs], received T");
+        "::FlexFlow::TopKAttrs, ::FlexFlow::TransposeAttrs, "
+        "::FlexFlow::WeightAttrs], received T");
     return std::holds_alternative<T>(this->raw_variant);
   }
   template <typename T>
@@ -389,7 +401,8 @@ struct PCGOperatorAttrs {
         "::FlexFlow::RepartitionAttrs, ::FlexFlow::ReplicateAttrs, "
         "::FlexFlow::ReverseAttrs, ::FlexFlow::ReshapeAttrs, "
         "::FlexFlow::SplitAttrs, ::FlexFlow::SoftmaxAttrs, "
-        "::FlexFlow::TopKAttrs, ::FlexFlow::TransposeAttrs], received T");
+        "::FlexFlow::TopKAttrs, ::FlexFlow::TransposeAttrs, "
+        "::FlexFlow::WeightAttrs], received T");
     return std::get<T>(this->raw_variant);
   }
   template <typename T>
@@ -410,7 +423,8 @@ struct PCGOperatorAttrs {
         "::FlexFlow::RepartitionAttrs, ::FlexFlow::ReplicateAttrs, "
         "::FlexFlow::ReverseAttrs, ::FlexFlow::ReshapeAttrs, "
         "::FlexFlow::SplitAttrs, ::FlexFlow::SoftmaxAttrs, "
-        "::FlexFlow::TopKAttrs, ::FlexFlow::TransposeAttrs], received T");
+        "::FlexFlow::TopKAttrs, ::FlexFlow::TransposeAttrs, "
+        "::FlexFlow::WeightAttrs], received T");
     return std::get<T>(this->raw_variant);
   }
   size_t index() const {
@@ -449,7 +463,8 @@ struct PCGOperatorAttrs {
                ::FlexFlow::SplitAttrs,
                ::FlexFlow::SoftmaxAttrs,
                ::FlexFlow::TopKAttrs,
-               ::FlexFlow::TransposeAttrs>
+               ::FlexFlow::TransposeAttrs,
+               ::FlexFlow::WeightAttrs>
       raw_variant;
 };
 } // namespace FlexFlow
diff --git a/lib/op-attrs/include/op-attrs/pcg_operator_attrs.variant.toml b/lib/op-attrs/include/op-attrs/pcg_operator_attrs.variant.toml
index 7aefaa09f9..8617c5fd64 100644
--- a/lib/op-attrs/include/op-attrs/pcg_operator_attrs.variant.toml
+++ b/lib/op-attrs/include/op-attrs/pcg_operator_attrs.variant.toml
@@ -38,6 +38,7 @@ includes = [
   "op-attrs/ops/split_attrs.dtg.h",
   "op-attrs/ops/topk_attrs.dtg.h",
   "op-attrs/ops/transpose_attrs.dtg.h",
+  "op-attrs/ops/weight_attrs.dtg.h",
 ]
 
 [[values]]
@@ -151,3 +152,7 @@ key = "topk"
 [[values]]
 type = "::FlexFlow::TransposeAttrs"
 key = "transpose"
+
+[[values]]
+type = "::FlexFlow::WeightAttrs"
+key = "weight"
diff --git a/lib/op-attrs/include/op-attrs/replica_parallel_dim.dtg.h b/lib/op-attrs/include/op-attrs/replica_parallel_dim.dtg.h
index 250ba29947..171cad2680 100644
--- a/lib/op-attrs/include/op-attrs/replica_parallel_dim.dtg.h
+++ b/lib/op-attrs/include/op-attrs/replica_parallel_dim.dtg.h
@@ -21,8 +21,8 @@
 namespace FlexFlow {
 struct ReplicaParallelDim {
   ReplicaParallelDim() = delete;
-  ReplicaParallelDim(int const &degree,
-                     ::FlexFlow::ReplicaType const &replica_type);
+  explicit ReplicaParallelDim(int const &degree,
+                              ::FlexFlow::ReplicaType const &replica_type);
 
   bool operator==(ReplicaParallelDim const &) const;
   bool operator!=(ReplicaParallelDim const &) const;
@@ -37,23 +37,23 @@ struct ReplicaParallelDim {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ReplicaParallelDim> {
-  size_t operator()(FlexFlow::ReplicaParallelDim const &) const;
+struct hash<::FlexFlow::ReplicaParallelDim> {
+  size_t operator()(::FlexFlow::ReplicaParallelDim const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ReplicaParallelDim> {
-  static FlexFlow::ReplicaParallelDim from_json(json const &);
-  static void to_json(json &, FlexFlow::ReplicaParallelDim const &);
+struct adl_serializer<::FlexFlow::ReplicaParallelDim> {
+  static ::FlexFlow::ReplicaParallelDim from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ReplicaParallelDim const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ReplicaParallelDim> {
-  static Gen<FlexFlow::ReplicaParallelDim> arbitrary();
+struct Arbitrary<::FlexFlow::ReplicaParallelDim> {
+  static Gen<::FlexFlow::ReplicaParallelDim> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/replica_parallel_dim_set.dtg.h b/lib/op-attrs/include/op-attrs/replica_parallel_dim_set.dtg.h
index 321029347f..1f964c4645 100644
--- a/lib/op-attrs/include/op-attrs/replica_parallel_dim_set.dtg.h
+++ b/lib/op-attrs/include/op-attrs/replica_parallel_dim_set.dtg.h
@@ -22,7 +22,7 @@
 namespace FlexFlow {
 struct ReplicaParallelDimSet {
   ReplicaParallelDimSet() = delete;
-  ReplicaParallelDimSet(
+  explicit ReplicaParallelDimSet(
       ::FlexFlow::SumDegree const &sum_degree,
       ::FlexFlow::DiscardCopyDegree const &discard_copy_degree);
 
@@ -39,23 +39,23 @@ struct ReplicaParallelDimSet {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ReplicaParallelDimSet> {
-  size_t operator()(FlexFlow::ReplicaParallelDimSet const &) const;
+struct hash<::FlexFlow::ReplicaParallelDimSet> {
+  size_t operator()(::FlexFlow::ReplicaParallelDimSet const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ReplicaParallelDimSet> {
-  static FlexFlow::ReplicaParallelDimSet from_json(json const &);
-  static void to_json(json &, FlexFlow::ReplicaParallelDimSet const &);
+struct adl_serializer<::FlexFlow::ReplicaParallelDimSet> {
+  static ::FlexFlow::ReplicaParallelDimSet from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ReplicaParallelDimSet const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ReplicaParallelDimSet> {
-  static Gen<FlexFlow::ReplicaParallelDimSet> arbitrary();
+struct Arbitrary<::FlexFlow::ReplicaParallelDimSet> {
+  static Gen<::FlexFlow::ReplicaParallelDimSet> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/shard_parallel_dim.dtg.h b/lib/op-attrs/include/op-attrs/shard_parallel_dim.dtg.h
index 631852c259..a1cdea1fce 100644
--- a/lib/op-attrs/include/op-attrs/shard_parallel_dim.dtg.h
+++ b/lib/op-attrs/include/op-attrs/shard_parallel_dim.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct ShardParallelDim {
   ShardParallelDim() = delete;
-  ShardParallelDim(size_t const &size, int const &degree);
+  explicit ShardParallelDim(size_t const &size, int const &degree);
 
   bool operator==(ShardParallelDim const &) const;
   bool operator!=(ShardParallelDim const &) const;
@@ -35,23 +35,23 @@ struct ShardParallelDim {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ShardParallelDim> {
-  size_t operator()(FlexFlow::ShardParallelDim const &) const;
+struct hash<::FlexFlow::ShardParallelDim> {
+  size_t operator()(::FlexFlow::ShardParallelDim const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ShardParallelDim> {
-  static FlexFlow::ShardParallelDim from_json(json const &);
-  static void to_json(json &, FlexFlow::ShardParallelDim const &);
+struct adl_serializer<::FlexFlow::ShardParallelDim> {
+  static ::FlexFlow::ShardParallelDim from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ShardParallelDim const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ShardParallelDim> {
-  static Gen<FlexFlow::ShardParallelDim> arbitrary();
+struct Arbitrary<::FlexFlow::ShardParallelDim> {
+  static Gen<::FlexFlow::ShardParallelDim> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/tensor_dims.dtg.h b/lib/op-attrs/include/op-attrs/tensor_dims.dtg.h
index a8e46a4626..1d50442831 100644
--- a/lib/op-attrs/include/op-attrs/tensor_dims.dtg.h
+++ b/lib/op-attrs/include/op-attrs/tensor_dims.dtg.h
@@ -21,7 +21,7 @@
 namespace FlexFlow {
 struct TensorDims {
   TensorDims() = delete;
-  TensorDims(::FlexFlow::FFOrdered<size_t> const &ff_ordered);
+  explicit TensorDims(::FlexFlow::FFOrdered<size_t> const &ff_ordered);
 
   bool operator==(TensorDims const &) const;
   bool operator!=(TensorDims const &) const;
@@ -35,23 +35,23 @@ struct TensorDims {
 
 namespace std {
 template <>
-struct hash<FlexFlow::TensorDims> {
-  size_t operator()(FlexFlow::TensorDims const &) const;
+struct hash<::FlexFlow::TensorDims> {
+  size_t operator()(::FlexFlow::TensorDims const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::TensorDims> {
-  static FlexFlow::TensorDims from_json(json const &);
-  static void to_json(json &, FlexFlow::TensorDims const &);
+struct adl_serializer<::FlexFlow::TensorDims> {
+  static ::FlexFlow::TensorDims from_json(json const &);
+  static void to_json(json &, ::FlexFlow::TensorDims const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::TensorDims> {
-  static Gen<FlexFlow::TensorDims> arbitrary();
+struct Arbitrary<::FlexFlow::TensorDims> {
+  static Gen<::FlexFlow::TensorDims> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/include/op-attrs/tensor_shape.dtg.h b/lib/op-attrs/include/op-attrs/tensor_shape.dtg.h
index f36d5d1306..17a1d88994 100644
--- a/lib/op-attrs/include/op-attrs/tensor_shape.dtg.h
+++ b/lib/op-attrs/include/op-attrs/tensor_shape.dtg.h
@@ -22,8 +22,8 @@
 namespace FlexFlow {
 struct TensorShape {
   TensorShape() = delete;
-  TensorShape(::FlexFlow::TensorDims const &dims,
-              ::FlexFlow::DataType const &data_type);
+  explicit TensorShape(::FlexFlow::TensorDims const &dims,
+                       ::FlexFlow::DataType const &data_type);
 
   bool operator==(TensorShape const &) const;
   bool operator!=(TensorShape const &) const;
@@ -38,23 +38,23 @@ struct TensorShape {
 
 namespace std {
 template <>
-struct hash<FlexFlow::TensorShape> {
-  size_t operator()(FlexFlow::TensorShape const &) const;
+struct hash<::FlexFlow::TensorShape> {
+  size_t operator()(::FlexFlow::TensorShape const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::TensorShape> {
-  static FlexFlow::TensorShape from_json(json const &);
-  static void to_json(json &, FlexFlow::TensorShape const &);
+struct adl_serializer<::FlexFlow::TensorShape> {
+  static ::FlexFlow::TensorShape from_json(json const &);
+  static void to_json(json &, ::FlexFlow::TensorShape const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::TensorShape> {
-  static Gen<FlexFlow::TensorShape> arbitrary();
+struct Arbitrary<::FlexFlow::TensorShape> {
+  static Gen<::FlexFlow::TensorShape> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/op-attrs/src/datatype.cc b/lib/op-attrs/src/datatype.cc
deleted file mode 100644
index 20e55a641f..0000000000
--- a/lib/op-attrs/src/datatype.cc
+++ /dev/null
@@ -1,24 +0,0 @@
-#include "op-attrs/datatype.h"
-
-namespace FlexFlow {
-
-size_t size_of_datatype(DataType data_type) {
-  switch (data_type) {
-    case DataType::BOOL:
-      return sizeof(bool);
-    case DataType::INT32:
-      return sizeof(int32_t);
-    case DataType::INT64:
-      return sizeof(int64_t);
-    case DataType::HALF:
-      return sizeof(float) / 2;
-    case DataType::FLOAT:
-      return sizeof(float);
-    case DataType::DOUBLE:
-      return sizeof(double);
-    default:
-      throw mk_runtime_error("Unknown data type {}", data_type);
-  }
-}
-
-} // namespace FlexFlow
diff --git a/lib/op-attrs/src/op-attrs/datatype.cc b/lib/op-attrs/src/op-attrs/datatype.cc
new file mode 100644
index 0000000000..06d99db702
--- /dev/null
+++ b/lib/op-attrs/src/op-attrs/datatype.cc
@@ -0,0 +1,51 @@
+#include "op-attrs/datatype.h"
+
+namespace FlexFlow {
+
+size_t size_of_datatype(DataType data_type) {
+  switch (data_type) {
+    case DataType::BOOL:
+      return sizeof(bool);
+    case DataType::INT32:
+      return sizeof(int32_t);
+    case DataType::INT64:
+      return sizeof(int64_t);
+    case DataType::HALF:
+      return sizeof(float) / 2;
+    case DataType::FLOAT:
+      return sizeof(float);
+    case DataType::DOUBLE:
+      return sizeof(double);
+    default:
+      throw mk_runtime_error("Unknown DataType {}", data_type);
+  }
+}
+
+bool can_strictly_promote_datatype_from_to(DataType src, DataType dst) {
+  std::unordered_set<DataType> allowed;
+  switch (src) {
+    case DataType::BOOL:
+      allowed = {
+          DataType::INT32, DataType::INT64, DataType::FLOAT, DataType::DOUBLE};
+      break;
+    case DataType::INT32:
+      allowed = {DataType::INT64};
+      break;
+    case DataType::INT64:
+      break;
+    case DataType::HALF:
+      allowed = {DataType::FLOAT, DataType::DOUBLE};
+      break;
+    case DataType::FLOAT:
+      allowed = {DataType::DOUBLE};
+      break;
+    case DataType::DOUBLE:
+      break;
+    default:
+      throw mk_runtime_error(fmt::format("Unknown DataType {}", src));
+  }
+
+  return contains(allowed, dst);
+}
+
+} // namespace FlexFlow
diff --git a/lib/op-attrs/src/op-attrs/ff_dim.dtg.cc b/lib/op-attrs/src/op-attrs/ff_dim.dtg.cc
index 8b22dfd18d..8cebeaeed0 100644
--- a/lib/op-attrs/src/op-attrs/ff_dim.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ff_dim.dtg.cc
@@ -34,7 +34,8 @@ bool ff_dim_t::operator>=(ff_dim_t const &other) const {
 } // namespace FlexFlow
 
 namespace std {
-size_t hash<FlexFlow::ff_dim_t>::operator()(FlexFlow::ff_dim_t const &x) const {
+size_t
+    hash<FlexFlow::ff_dim_t>::operator()(::FlexFlow::ff_dim_t const &x) const {
   size_t result = 0;
   result ^=
       std::hash<int>{}(x.value) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -43,12 +44,12 @@ size_t hash<FlexFlow::ff_dim_t>::operator()(FlexFlow::ff_dim_t const &x) const {
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ff_dim_t
-    adl_serializer<FlexFlow::ff_dim_t>::from_json(json const &j) {
-  return {j.at("value").template get<int>()};
+::FlexFlow::ff_dim_t
+    adl_serializer<::FlexFlow::ff_dim_t>::from_json(json const &j) {
+  return ::FlexFlow::ff_dim_t{j.at("value").template get<int>()};
 }
-void adl_serializer<FlexFlow::ff_dim_t>::to_json(json &j,
-                                                 FlexFlow::ff_dim_t const &v) {
+void adl_serializer<::FlexFlow::ff_dim_t>::to_json(
+    json &j, ::FlexFlow::ff_dim_t const &v) {
   j["__type"] = "ff_dim_t";
   j["value"] = v.value;
 }
diff --git a/lib/op-attrs/src/op-attrs/l1_regularizer_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/l1_regularizer_attrs.dtg.cc
index ed06df2c78..747108c386 100644
--- a/lib/op-attrs/src/op-attrs/l1_regularizer_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/l1_regularizer_attrs.dtg.cc
@@ -35,7 +35,7 @@ bool L1RegularizerAttrs::operator>=(L1RegularizerAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::L1RegularizerAttrs>::operator()(
-    FlexFlow::L1RegularizerAttrs const &x) const {
+    ::FlexFlow::L1RegularizerAttrs const &x) const {
   size_t result = 0;
   result ^=
       std::hash<float>{}(x.lambda) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -44,21 +44,22 @@ size_t hash<FlexFlow::L1RegularizerAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::L1RegularizerAttrs
-    adl_serializer<FlexFlow::L1RegularizerAttrs>::from_json(json const &j) {
-  return {j.at("lambda").template get<float>()};
+::FlexFlow::L1RegularizerAttrs
+    adl_serializer<::FlexFlow::L1RegularizerAttrs>::from_json(json const &j) {
+  return ::FlexFlow::L1RegularizerAttrs{j.at("lambda").template get<float>()};
 }
-void adl_serializer<FlexFlow::L1RegularizerAttrs>::to_json(
-    json &j, FlexFlow::L1RegularizerAttrs const &v) {
+void adl_serializer<::FlexFlow::L1RegularizerAttrs>::to_json(
+    json &j, ::FlexFlow::L1RegularizerAttrs const &v) {
   j["__type"] = "L1RegularizerAttrs";
   j["lambda"] = v.lambda;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::L1RegularizerAttrs>
-    Arbitrary<FlexFlow::L1RegularizerAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::L1RegularizerAttrs>(gen::arbitrary<float>());
+Gen<::FlexFlow::L1RegularizerAttrs>
+    Arbitrary<::FlexFlow::L1RegularizerAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::L1RegularizerAttrs>(
+      gen::arbitrary<float>());
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/l2_regularizer_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/l2_regularizer_attrs.dtg.cc
index f0f3f34ee5..877f1703ca 100644
--- a/lib/op-attrs/src/op-attrs/l2_regularizer_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/l2_regularizer_attrs.dtg.cc
@@ -35,7 +35,7 @@ bool L2RegularizerAttrs::operator>=(L2RegularizerAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::L2RegularizerAttrs>::operator()(
-    FlexFlow::L2RegularizerAttrs const &x) const {
+    ::FlexFlow::L2RegularizerAttrs const &x) const {
   size_t result = 0;
   result ^=
       std::hash<float>{}(x.lambda) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -44,21 +44,22 @@ size_t hash<FlexFlow::L2RegularizerAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::L2RegularizerAttrs
-    adl_serializer<FlexFlow::L2RegularizerAttrs>::from_json(json const &j) {
-  return {j.at("lambda").template get<float>()};
+::FlexFlow::L2RegularizerAttrs
+    adl_serializer<::FlexFlow::L2RegularizerAttrs>::from_json(json const &j) {
+  return ::FlexFlow::L2RegularizerAttrs{j.at("lambda").template get<float>()};
 }
-void adl_serializer<FlexFlow::L2RegularizerAttrs>::to_json(
-    json &j, FlexFlow::L2RegularizerAttrs const &v) {
+void adl_serializer<::FlexFlow::L2RegularizerAttrs>::to_json(
+    json &j, ::FlexFlow::L2RegularizerAttrs const &v) {
   j["__type"] = "L2RegularizerAttrs";
   j["lambda"] = v.lambda;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::L2RegularizerAttrs>
-    Arbitrary<FlexFlow::L2RegularizerAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::L2RegularizerAttrs>(gen::arbitrary<float>());
+Gen<::FlexFlow::L2RegularizerAttrs>
+    Arbitrary<::FlexFlow::L2RegularizerAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::L2RegularizerAttrs>(
+      gen::arbitrary<float>());
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/ops/attention.cc b/lib/op-attrs/src/op-attrs/ops/attention.cc
index 14ab2b9b00..3e4095eca8 100644
--- a/lib/op-attrs/src/op-attrs/ops/attention.cc
+++ b/lib/op-attrs/src/op-attrs/ops/attention.cc
@@ -114,7 +114,7 @@ tl::expected<TensorShape, std::string>
 
   // W^O in "Attention Is All You Need" top of page 5, with num_heads factored
   // out
-  size_t outWeightSize = parsed.value_size * attrs.embed_dim;
+  size_t outWeightSize = attrs.vdim * attrs.embed_dim;
 
   return TensorShape{
       TensorDims{FFOrdered<size_t>{
@@ -126,6 +126,50 @@ tl::expected<TensorShape, std::string>
   };
 }
 
+tl::expected<TensorShape, std::string>
+    get_input_bias_shape(MultiHeadAttentionAttrs const &attrs,
+                         TensorShape const &input_q,
+                         TensorShape const &input_k,
+                         TensorShape const &input_v) {
+  MultiHeadAttentionInputs parsed = ({
+    tl::expected<MultiHeadAttentionInputs, std::string> parse_result =
+        parse_attention_input_shape(input_q, input_k, input_v);
+    if (!parse_result.has_value()) {
+      return tl::unexpected(parse_result.error());
+    }
+    parse_result.value();
+  });
+
+  return TensorShape{
+      TensorDims{FFOrdered<size_t>{
+          size_t_from_int(attrs.kdim + attrs.kdim + attrs.vdim),
+      }},
+      parsed.datatype,
+  };
+}
+
+tl::expected<TensorShape, std::string>
+    get_output_bias_shape(MultiHeadAttentionAttrs const &attrs,
+                          TensorShape const &input_q,
+                          TensorShape const &input_k,
+                          TensorShape const &input_v) {
+  MultiHeadAttentionInputs parsed = ({
+    tl::expected<MultiHeadAttentionInputs, std::string> parse_result =
+        parse_attention_input_shape(input_q, input_k, input_v);
+    if (!parse_result.has_value()) {
+      return tl::unexpected(parse_result.error());
+    }
+    parse_result.value();
+  });
+
+  return TensorShape{
+      TensorDims{FFOrdered<size_t>{
+          size_t_from_int(attrs.embed_dim),
+      }},
+      parsed.datatype,
+  };
+}
+
 tl::expected<ParallelTensorShape, std::string>
     get_weights_shape(MultiHeadAttentionAttrs const &attrs,
                       ParallelTensorShape const &input_q,
@@ -158,6 +202,78 @@ tl::expected<ParallelTensorShape, std::string>
       FFOrdered<int>{joined_dim_degree, head_dim_degree});
 }
 
+tl::expected<ParallelTensorShape, std::string>
+    get_input_bias_shape(MultiHeadAttentionAttrs const &attrs,
+                         ParallelTensorShape const &input_q,
+                         ParallelTensorShape const &input_k,
+                         ParallelTensorShape const &input_v) {
+  MultiHeadAttentionParallelInputs parsed = ({
+    tl::expected<MultiHeadAttentionParallelInputs, std::string> parse_result =
+        parse_attention_parallel_input_shape(input_q, input_k, input_v);
+    if (!parse_result.has_value()) {
+      return tl::unexpected(parse_result.error());
+    }
+
+    parse_result.value();
+  });
+
+  TensorShape unpar_shape = ({
+    tl::expected<TensorShape, std::string> result_unpar =
+        get_input_bias_shape(attrs,
+                             get_reduced_shape(input_q),
+                             get_reduced_shape(input_k),
+                             get_reduced_shape(input_v));
+    if (!result_unpar.has_value()) {
+      return tl::unexpected(result_unpar.error());
+    }
+
+    result_unpar.value();
+  });
+
+  SumDegree sum_degree = SumDegree{1};
+  DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{
+      parsed.batch_dim.degree * parsed.discard_copy_degree.value};
+  FFOrdered<int> shard_degrees = FFOrdered<int>{1};
+  return lift_to_parallel_with_degrees(
+      unpar_shape, sum_degree, discard_copy_degree, shard_degrees);
+}
+
+tl::expected<ParallelTensorShape, std::string>
+    get_output_bias_shape(MultiHeadAttentionAttrs const &attrs,
+                          ParallelTensorShape const &input_q,
+                          ParallelTensorShape const &input_k,
+                          ParallelTensorShape const &input_v) {
+  MultiHeadAttentionParallelInputs parsed = ({
+    tl::expected<MultiHeadAttentionParallelInputs, std::string> parse_result =
+        parse_attention_parallel_input_shape(input_q, input_k, input_v);
+    if (!parse_result.has_value()) {
+      return tl::unexpected(parse_result.error());
+    }
+
+    parse_result.value();
+  });
+
+  TensorShape unpar_shape = ({
+    tl::expected<TensorShape, std::string> result_unpar =
+        get_output_bias_shape(attrs,
+                              get_reduced_shape(input_q),
+                              get_reduced_shape(input_k),
+                              get_reduced_shape(input_v));
+    if (!result_unpar.has_value()) {
+      return tl::unexpected(result_unpar.error());
+    }
+
+    result_unpar.value();
+  });
+
+  SumDegree sum_degree = SumDegree{1};
+  DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{
+      parsed.batch_dim.degree * parsed.discard_copy_degree.value};
+  FFOrdered<int> shard_degrees = FFOrdered<int>{1};
+  return lift_to_parallel_with_degrees(
+      unpar_shape, sum_degree, discard_copy_degree, shard_degrees);
+}
+
 tl::expected<ParallelTensorShape, std::string>
     get_output_shape(MultiHeadAttentionAttrs const &attrs,
                      ParallelTensorShape const &input_q,
diff --git a/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_inputs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_inputs.dtg.cc
index 26d3138eb4..a5a66b1a77 100644
--- a/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_inputs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_inputs.dtg.cc
@@ -112,7 +112,7 @@ bool MultiHeadAttentionInputs::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::MultiHeadAttentionInputs>::operator()(
-    FlexFlow::MultiHeadAttentionInputs const &x) const {
+    ::FlexFlow::MultiHeadAttentionInputs const &x) const {
   size_t result = 0;
   result ^= std::hash<size_t>{}(x.batch_size) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -131,18 +131,19 @@ size_t hash<FlexFlow::MultiHeadAttentionInputs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::MultiHeadAttentionInputs
-    adl_serializer<FlexFlow::MultiHeadAttentionInputs>::from_json(
+::FlexFlow::MultiHeadAttentionInputs
+    adl_serializer<::FlexFlow::MultiHeadAttentionInputs>::from_json(
         json const &j) {
-  return {j.at("batch_size").template get<size_t>(),
-          j.at("sequence_length").template get<size_t>(),
-          j.at("query_size").template get<size_t>(),
-          j.at("key_size").template get<size_t>(),
-          j.at("value_size").template get<size_t>(),
-          j.at("datatype").template get<::FlexFlow::DataType>()};
+  return ::FlexFlow::MultiHeadAttentionInputs{
+      j.at("batch_size").template get<size_t>(),
+      j.at("sequence_length").template get<size_t>(),
+      j.at("query_size").template get<size_t>(),
+      j.at("key_size").template get<size_t>(),
+      j.at("value_size").template get<size_t>(),
+      j.at("datatype").template get<::FlexFlow::DataType>()};
 }
-void adl_serializer<FlexFlow::MultiHeadAttentionInputs>::to_json(
-    json &j, FlexFlow::MultiHeadAttentionInputs const &v) {
+void adl_serializer<::FlexFlow::MultiHeadAttentionInputs>::to_json(
+    json &j, ::FlexFlow::MultiHeadAttentionInputs const &v) {
   j["__type"] = "MultiHeadAttentionInputs";
   j["batch_size"] = v.batch_size;
   j["sequence_length"] = v.sequence_length;
@@ -154,9 +155,9 @@ void adl_serializer<FlexFlow::MultiHeadAttentionInputs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::MultiHeadAttentionInputs>
-    Arbitrary<FlexFlow::MultiHeadAttentionInputs>::arbitrary() {
-  return gen::construct<FlexFlow::MultiHeadAttentionInputs>(
+Gen<::FlexFlow::MultiHeadAttentionInputs>
+    Arbitrary<::FlexFlow::MultiHeadAttentionInputs>::arbitrary() {
+  return gen::construct<::FlexFlow::MultiHeadAttentionInputs>(
       gen::arbitrary<size_t>(),
       gen::arbitrary<size_t>(),
       gen::arbitrary<size_t>(),
diff --git a/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.cc b/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.cc
index 2cd5b7ec00..b5ddeaac30 100644
--- a/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.cc
+++ b/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.cc
@@ -122,7 +122,7 @@ tl::expected<MultiHeadAttentionParallelInputs, std::string>
       query_dim,
       key_dim,
       value_dim,
-      discard_copy_q,
+      DiscardCopyDegree{discard_copy_q},
       input_q.data_type,
   };
 
diff --git a/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.dtg.cc
index 94784d83cc..be4507677b 100644
--- a/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.dtg.cc
@@ -128,7 +128,7 @@ bool MultiHeadAttentionParallelInputs::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::MultiHeadAttentionParallelInputs>::operator()(
-    FlexFlow::MultiHeadAttentionParallelInputs const &x) const {
+    ::FlexFlow::MultiHeadAttentionParallelInputs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::ShardParallelDim>{}(x.batch_dim) +
             0x9e3779b9 + (result << 6) + (result >> 2);
@@ -149,10 +149,10 @@ size_t hash<FlexFlow::MultiHeadAttentionParallelInputs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::MultiHeadAttentionParallelInputs
-    adl_serializer<FlexFlow::MultiHeadAttentionParallelInputs>::from_json(
+::FlexFlow::MultiHeadAttentionParallelInputs
+    adl_serializer<::FlexFlow::MultiHeadAttentionParallelInputs>::from_json(
         json const &j) {
-  return {
+  return ::FlexFlow::MultiHeadAttentionParallelInputs{
       j.at("batch_dim").template get<::FlexFlow::ShardParallelDim>(),
       j.at("sequence_dim").template get<::FlexFlow::ShardParallelDim>(),
       j.at("query_dim").template get<::FlexFlow::ShardParallelDim>(),
@@ -161,8 +161,8 @@ FlexFlow::MultiHeadAttentionParallelInputs
       j.at("discard_copy_degree").template get<::FlexFlow::DiscardCopyDegree>(),
       j.at("datatype").template get<::FlexFlow::DataType>()};
 }
-void adl_serializer<FlexFlow::MultiHeadAttentionParallelInputs>::to_json(
-    json &j, FlexFlow::MultiHeadAttentionParallelInputs const &v) {
+void adl_serializer<::FlexFlow::MultiHeadAttentionParallelInputs>::to_json(
+    json &j, ::FlexFlow::MultiHeadAttentionParallelInputs const &v) {
   j["__type"] = "MultiHeadAttentionParallelInputs";
   j["batch_dim"] = v.batch_dim;
   j["sequence_dim"] = v.sequence_dim;
@@ -175,9 +175,9 @@ void adl_serializer<FlexFlow::MultiHeadAttentionParallelInputs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::MultiHeadAttentionParallelInputs>
-    Arbitrary<FlexFlow::MultiHeadAttentionParallelInputs>::arbitrary() {
-  return gen::construct<FlexFlow::MultiHeadAttentionParallelInputs>(
+Gen<::FlexFlow::MultiHeadAttentionParallelInputs>
+    Arbitrary<::FlexFlow::MultiHeadAttentionParallelInputs>::arbitrary() {
+  return gen::construct<::FlexFlow::MultiHeadAttentionParallelInputs>(
       gen::arbitrary<::FlexFlow::ShardParallelDim>(),
       gen::arbitrary<::FlexFlow::ShardParallelDim>(),
       gen::arbitrary<::FlexFlow::ShardParallelDim>(),
diff --git a/lib/op-attrs/src/op-attrs/ops/attention_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/attention_attrs.dtg.cc
index ad0c094969..a5fbcd6cf6 100644
--- a/lib/op-attrs/src/op-attrs/ops/attention_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/attention_attrs.dtg.cc
@@ -135,7 +135,7 @@ bool MultiHeadAttentionAttrs::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::MultiHeadAttentionAttrs>::operator()(
-    FlexFlow::MultiHeadAttentionAttrs const &x) const {
+    ::FlexFlow::MultiHeadAttentionAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<int>{}(x.embed_dim) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -158,20 +158,21 @@ size_t hash<FlexFlow::MultiHeadAttentionAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::MultiHeadAttentionAttrs
-    adl_serializer<FlexFlow::MultiHeadAttentionAttrs>::from_json(
+::FlexFlow::MultiHeadAttentionAttrs
+    adl_serializer<::FlexFlow::MultiHeadAttentionAttrs>::from_json(
         json const &j) {
-  return {j.at("embed_dim").template get<int>(),
-          j.at("num_heads").template get<int>(),
-          j.at("kdim").template get<int>(),
-          j.at("vdim").template get<int>(),
-          j.at("dropout").template get<float>(),
-          j.at("bias").template get<bool>(),
-          j.at("add_bias_kv").template get<bool>(),
-          j.at("add_zero_attn").template get<bool>()};
+  return ::FlexFlow::MultiHeadAttentionAttrs{
+      j.at("embed_dim").template get<int>(),
+      j.at("num_heads").template get<int>(),
+      j.at("kdim").template get<int>(),
+      j.at("vdim").template get<int>(),
+      j.at("dropout").template get<float>(),
+      j.at("bias").template get<bool>(),
+      j.at("add_bias_kv").template get<bool>(),
+      j.at("add_zero_attn").template get<bool>()};
 }
-void adl_serializer<FlexFlow::MultiHeadAttentionAttrs>::to_json(
-    json &j, FlexFlow::MultiHeadAttentionAttrs const &v) {
+void adl_serializer<::FlexFlow::MultiHeadAttentionAttrs>::to_json(
+    json &j, ::FlexFlow::MultiHeadAttentionAttrs const &v) {
   j["__type"] = "MultiHeadAttentionAttrs";
   j["embed_dim"] = v.embed_dim;
   j["num_heads"] = v.num_heads;
@@ -185,9 +186,9 @@ void adl_serializer<FlexFlow::MultiHeadAttentionAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::MultiHeadAttentionAttrs>
-    Arbitrary<FlexFlow::MultiHeadAttentionAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::MultiHeadAttentionAttrs>(
+Gen<::FlexFlow::MultiHeadAttentionAttrs>
+    Arbitrary<::FlexFlow::MultiHeadAttentionAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::MultiHeadAttentionAttrs>(
       gen::arbitrary<int>(),
       gen::arbitrary<int>(),
       gen::arbitrary<int>(),
diff --git a/lib/op-attrs/src/op-attrs/ops/batch_matmul.cc b/lib/op-attrs/src/op-attrs/ops/batch_matmul.cc
index cbda4ea533..f9836bd3ed 100644
--- a/lib/op-attrs/src/op-attrs/ops/batch_matmul.cc
+++ b/lib/op-attrs/src/op-attrs/ops/batch_matmul.cc
@@ -163,8 +163,8 @@ tl::expected<ParallelTensorShape, std::string>
               output_p,
           },
           ReplicaParallelDimSet{
-              output_sum_degree,
-              output_discard_copy_degree,
+              SumDegree{output_sum_degree},
+              DiscardCopyDegree{output_discard_copy_degree},
           },
       },
       input_lhs.data_type,
diff --git a/lib/op-attrs/src/op-attrs/ops/batch_matmul.dtg.cc b/lib/op-attrs/src/op-attrs/ops/batch_matmul.dtg.cc
index f178d40696..2395bf5691 100644
--- a/lib/op-attrs/src/op-attrs/ops/batch_matmul.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/batch_matmul.dtg.cc
@@ -43,7 +43,7 @@ bool BatchMatmulAttrs::operator>=(BatchMatmulAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::BatchMatmulAttrs>::operator()(
-    FlexFlow::BatchMatmulAttrs const &x) const {
+    ::FlexFlow::BatchMatmulAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<int>{}(x.a_seq_length_dim) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -54,13 +54,14 @@ size_t hash<FlexFlow::BatchMatmulAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::BatchMatmulAttrs
-    adl_serializer<FlexFlow::BatchMatmulAttrs>::from_json(json const &j) {
-  return {j.at("a_seq_length_dim").template get<int>(),
-          j.at("b_seq_length_dim").template get<int>()};
+::FlexFlow::BatchMatmulAttrs
+    adl_serializer<::FlexFlow::BatchMatmulAttrs>::from_json(json const &j) {
+  return ::FlexFlow::BatchMatmulAttrs{
+      j.at("a_seq_length_dim").template get<int>(),
+      j.at("b_seq_length_dim").template get<int>()};
 }
-void adl_serializer<FlexFlow::BatchMatmulAttrs>::to_json(
-    json &j, FlexFlow::BatchMatmulAttrs const &v) {
+void adl_serializer<::FlexFlow::BatchMatmulAttrs>::to_json(
+    json &j, ::FlexFlow::BatchMatmulAttrs const &v) {
   j["__type"] = "BatchMatmulAttrs";
   j["a_seq_length_dim"] = v.a_seq_length_dim;
   j["b_seq_length_dim"] = v.b_seq_length_dim;
@@ -68,10 +69,10 @@ void adl_serializer<FlexFlow::BatchMatmulAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::BatchMatmulAttrs>
-    Arbitrary<FlexFlow::BatchMatmulAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::BatchMatmulAttrs>(gen::arbitrary<int>(),
-                                                    gen::arbitrary<int>());
+Gen<::FlexFlow::BatchMatmulAttrs>
+    Arbitrary<::FlexFlow::BatchMatmulAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::BatchMatmulAttrs>(gen::arbitrary<int>(),
+                                                      gen::arbitrary<int>());
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/ops/batch_norm_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/batch_norm_attrs.dtg.cc
index cb8dcadae1..13f20a82a5 100644
--- a/lib/op-attrs/src/op-attrs/ops/batch_norm_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/batch_norm_attrs.dtg.cc
@@ -35,7 +35,7 @@ bool BatchNormAttrs::operator>=(BatchNormAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::BatchNormAttrs>::operator()(
-    FlexFlow::BatchNormAttrs const &x) const {
+    ::FlexFlow::BatchNormAttrs const &x) const {
   size_t result = 0;
   result ^=
       std::hash<bool>{}(x.relu) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -44,20 +44,21 @@ size_t hash<FlexFlow::BatchNormAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::BatchNormAttrs
-    adl_serializer<FlexFlow::BatchNormAttrs>::from_json(json const &j) {
-  return {j.at("relu").template get<bool>()};
+::FlexFlow::BatchNormAttrs
+    adl_serializer<::FlexFlow::BatchNormAttrs>::from_json(json const &j) {
+  return ::FlexFlow::BatchNormAttrs{j.at("relu").template get<bool>()};
 }
-void adl_serializer<FlexFlow::BatchNormAttrs>::to_json(
-    json &j, FlexFlow::BatchNormAttrs const &v) {
+void adl_serializer<::FlexFlow::BatchNormAttrs>::to_json(
+    json &j, ::FlexFlow::BatchNormAttrs const &v) {
   j["__type"] = "BatchNormAttrs";
   j["relu"] = v.relu;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::BatchNormAttrs> Arbitrary<FlexFlow::BatchNormAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::BatchNormAttrs>(gen::arbitrary<bool>());
+Gen<::FlexFlow::BatchNormAttrs>
+    Arbitrary<::FlexFlow::BatchNormAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::BatchNormAttrs>(gen::arbitrary<bool>());
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/ops/broadcast.dtg.cc b/lib/op-attrs/src/op-attrs/ops/broadcast.dtg.cc
index ec08bd6a1d..85fff2518c 100644
--- a/lib/op-attrs/src/op-attrs/ops/broadcast.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/broadcast.dtg.cc
@@ -38,7 +38,7 @@ bool BroadcastAttrs::operator>=(BroadcastAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::BroadcastAttrs>::operator()(
-    FlexFlow::BroadcastAttrs const &x) const {
+    ::FlexFlow::BroadcastAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::stack_vector<int, MAX_TENSOR_DIM>>{}(
                 x.target_dims) +
@@ -48,21 +48,23 @@ size_t hash<FlexFlow::BroadcastAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::BroadcastAttrs
-    adl_serializer<FlexFlow::BroadcastAttrs>::from_json(json const &j) {
-  return {j.at("target_dims")
-              .template get<::FlexFlow::stack_vector<int, MAX_TENSOR_DIM>>()};
+::FlexFlow::BroadcastAttrs
+    adl_serializer<::FlexFlow::BroadcastAttrs>::from_json(json const &j) {
+  return ::FlexFlow::BroadcastAttrs{
+      j.at("target_dims")
+          .template get<::FlexFlow::stack_vector<int, MAX_TENSOR_DIM>>()};
 }
-void adl_serializer<FlexFlow::BroadcastAttrs>::to_json(
-    json &j, FlexFlow::BroadcastAttrs const &v) {
+void adl_serializer<::FlexFlow::BroadcastAttrs>::to_json(
+    json &j, ::FlexFlow::BroadcastAttrs const &v) {
   j["__type"] = "BroadcastAttrs";
   j["target_dims"] = v.target_dims;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::BroadcastAttrs> Arbitrary<FlexFlow::BroadcastAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::BroadcastAttrs>(
+Gen<::FlexFlow::BroadcastAttrs>
+    Arbitrary<::FlexFlow::BroadcastAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::BroadcastAttrs>(
       gen::arbitrary<::FlexFlow::stack_vector<int, MAX_TENSOR_DIM>>());
 }
 } // namespace rc
diff --git a/lib/op-attrs/src/op-attrs/ops/cast.cc b/lib/op-attrs/src/op-attrs/ops/cast.cc
index e4ab178a7e..444409ffcb 100644
--- a/lib/op-attrs/src/op-attrs/ops/cast.cc
+++ b/lib/op-attrs/src/op-attrs/ops/cast.cc
@@ -2,6 +2,37 @@
 
 namespace FlexFlow {
 
+tl::expected<TensorShape, std::string>
+    get_output_shape(CastAttrs const &attrs, TensorShape const &input) {
+
+  if (!can_strictly_promote_datatype_from_to(input.data_type, attrs.dtype)) {
+    return tl::unexpected(fmt::format(
+        "Cast cannot strictly promote input datatype {} to output datatype {}",
+        input.data_type,
+        attrs.dtype));
+  }
+
+  TensorShape output = input;
+  output.data_type = attrs.dtype;
+  return output;
+}
+
+tl::expected<ParallelTensorShape, std::string>
+    get_output_shape(CastAttrs const &attrs, ParallelTensorShape const &input) {
+
+  if (!can_strictly_promote_datatype_from_to(input.data_type, attrs.dtype)) {
+    return tl::unexpected(fmt::format(
+        "Cast cannot strictly promote input datatype {} to output datatype {}",
+        input.data_type,
+        attrs.dtype));
+  }
+
+  ParallelTensorShape output = input;
+  output.data_type = attrs.dtype;
+
+  return output;
+}
+
 /* bool CastAttrs::is_valid(ParallelTensorShape const &input) const { */
 /*   bool valid = input.is_valid(); */
 /*   valid &= (input.at(input.num_dims() - 1).degree == 1); */
diff --git a/lib/op-attrs/src/op-attrs/ops/cast_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/cast_attrs.dtg.cc
index 28367f3449..423fc2e046 100644
--- a/lib/op-attrs/src/op-attrs/ops/cast_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/cast_attrs.dtg.cc
@@ -35,8 +35,8 @@ bool CastAttrs::operator>=(CastAttrs const &other) const {
 } // namespace FlexFlow
 
 namespace std {
-size_t
-    hash<FlexFlow::CastAttrs>::operator()(FlexFlow::CastAttrs const &x) const {
+size_t hash<FlexFlow::CastAttrs>::operator()(
+    ::FlexFlow::CastAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<DataType>{}(x.dtype) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -45,20 +45,20 @@ size_t
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::CastAttrs
-    adl_serializer<FlexFlow::CastAttrs>::from_json(json const &j) {
-  return {j.at("dtype").template get<DataType>()};
+::FlexFlow::CastAttrs
+    adl_serializer<::FlexFlow::CastAttrs>::from_json(json const &j) {
+  return ::FlexFlow::CastAttrs{j.at("dtype").template get<DataType>()};
 }
-void adl_serializer<FlexFlow::CastAttrs>::to_json(
-    json &j, FlexFlow::CastAttrs const &v) {
+void adl_serializer<::FlexFlow::CastAttrs>::to_json(
+    json &j, ::FlexFlow::CastAttrs const &v) {
   j["__type"] = "CastAttrs";
   j["dtype"] = v.dtype;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::CastAttrs> Arbitrary<FlexFlow::CastAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::CastAttrs>(gen::arbitrary<DataType>());
+Gen<::FlexFlow::CastAttrs> Arbitrary<::FlexFlow::CastAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::CastAttrs>(gen::arbitrary<DataType>());
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/ops/combine_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/combine_attrs.dtg.cc
index 516d3b0318..198da728bf 100644
--- a/lib/op-attrs/src/op-attrs/ops/combine_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/combine_attrs.dtg.cc
@@ -45,7 +45,7 @@ bool CombineAttrs::operator>=(CombineAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::CombineAttrs>::operator()(
-    FlexFlow::CombineAttrs const &x) const {
+    ::FlexFlow::CombineAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::ff_dim_t>{}(x.combine_dim) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -56,13 +56,14 @@ size_t hash<FlexFlow::CombineAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::CombineAttrs
-    adl_serializer<FlexFlow::CombineAttrs>::from_json(json const &j) {
-  return {j.at("combine_dim").template get<::FlexFlow::ff_dim_t>(),
-          j.at("combine_degree").template get<int>()};
+::FlexFlow::CombineAttrs
+    adl_serializer<::FlexFlow::CombineAttrs>::from_json(json const &j) {
+  return ::FlexFlow::CombineAttrs{
+      j.at("combine_dim").template get<::FlexFlow::ff_dim_t>(),
+      j.at("combine_degree").template get<int>()};
 }
-void adl_serializer<FlexFlow::CombineAttrs>::to_json(
-    json &j, FlexFlow::CombineAttrs const &v) {
+void adl_serializer<::FlexFlow::CombineAttrs>::to_json(
+    json &j, ::FlexFlow::CombineAttrs const &v) {
   j["__type"] = "CombineAttrs";
   j["combine_dim"] = v.combine_dim;
   j["combine_degree"] = v.combine_degree;
@@ -70,8 +71,8 @@ void adl_serializer<FlexFlow::CombineAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::CombineAttrs> Arbitrary<FlexFlow::CombineAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::CombineAttrs>(
+Gen<::FlexFlow::CombineAttrs> Arbitrary<::FlexFlow::CombineAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::CombineAttrs>(
       gen::arbitrary<::FlexFlow::ff_dim_t>(), gen::arbitrary<int>());
 }
 } // namespace rc
diff --git a/lib/op-attrs/src/op-attrs/ops/concat_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/concat_attrs.dtg.cc
index 20db25d485..2bbd9ba50e 100644
--- a/lib/op-attrs/src/op-attrs/ops/concat_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/concat_attrs.dtg.cc
@@ -45,7 +45,7 @@ bool ConcatAttrs::operator>=(ConcatAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ConcatAttrs>::operator()(
-    FlexFlow::ConcatAttrs const &x) const {
+    ::FlexFlow::ConcatAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::ff_dim_t>{}(x.axis) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -56,13 +56,14 @@ size_t hash<FlexFlow::ConcatAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ConcatAttrs
-    adl_serializer<FlexFlow::ConcatAttrs>::from_json(json const &j) {
-  return {j.at("axis").template get<::FlexFlow::ff_dim_t>(),
-          j.at("num_inputs").template get<int>()};
+::FlexFlow::ConcatAttrs
+    adl_serializer<::FlexFlow::ConcatAttrs>::from_json(json const &j) {
+  return ::FlexFlow::ConcatAttrs{
+      j.at("axis").template get<::FlexFlow::ff_dim_t>(),
+      j.at("num_inputs").template get<int>()};
 }
-void adl_serializer<FlexFlow::ConcatAttrs>::to_json(
-    json &j, FlexFlow::ConcatAttrs const &v) {
+void adl_serializer<::FlexFlow::ConcatAttrs>::to_json(
+    json &j, ::FlexFlow::ConcatAttrs const &v) {
   j["__type"] = "ConcatAttrs";
   j["axis"] = v.axis;
   j["num_inputs"] = v.num_inputs;
@@ -70,8 +71,8 @@ void adl_serializer<FlexFlow::ConcatAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ConcatAttrs> Arbitrary<FlexFlow::ConcatAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::ConcatAttrs>(
+Gen<::FlexFlow::ConcatAttrs> Arbitrary<::FlexFlow::ConcatAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::ConcatAttrs>(
       gen::arbitrary<::FlexFlow::ff_dim_t>(), gen::arbitrary<int>());
 }
 } // namespace rc
diff --git a/lib/op-attrs/src/op-attrs/ops/conv_2d.cc b/lib/op-attrs/src/op-attrs/ops/conv_2d.cc
index c9ec467af4..03ae18a1d9 100644
--- a/lib/op-attrs/src/op-attrs/ops/conv_2d.cc
+++ b/lib/op-attrs/src/op-attrs/ops/conv_2d.cc
@@ -57,120 +57,75 @@ TensorShape get_output_shape(Conv2DAttrs const &attrs,
                      input.datatype};
 }
 
-ParallelTensorShape
-    get_kernel_shape(Conv2DAttrs const &attrs,
-                     ParallelTensorShape const &raw_input_shape) {
+ParallelTensorShape get_kernel_shape(Conv2DAttrs const &attrs,
+                                     ParallelTensorShape const &input) {
   assert(attrs.groups == 1); // TODO(@lockshaw): currently not supported
-  Conv2DParallelInputShape input = parse_parallel_input_shape(raw_input_shape);
-
-  ShardParallelDim output_channels_dim = {size_t_from_int(attrs.out_channels),
-                                          input.discard_copy_reduction_degree};
-  ShardParallelDim input_channels_dim = {
-      size_t_from_int(input.channel_dim.size), input.channel_dim.degree};
-  ShardParallelDim kernel_height_dim = {size_t_from_int(attrs.kernel_h), 1};
-  ShardParallelDim kernel_width_dim = {size_t_from_int(attrs.kernel_w), 1};
-
-  int sum_degree = 1;
-  int discard_copy_degree = input.height_dim.degree * input.width_dim.degree *
-                            input.sum_reduction_degree;
-
-  ParallelTensorShape result = ParallelTensorShape{
-      ParallelTensorDims{
-          FFOrdered<ShardParallelDim>{
-              output_channels_dim,
-              input_channels_dim,
-              kernel_height_dim,
-              kernel_width_dim,
-          },
-          ReplicaParallelDimSet{
-              sum_degree,
-              discard_copy_degree,
-          },
-      },
-      input.datatype,
-  };
 
-  assert(total_parallel_degree(result.dims) ==
-         total_parallel_degree(raw_input_shape.dims));
+  Conv2DParallelInputShape parsed = parse_parallel_input_shape(input);
+
+  TensorShape unpar = get_kernel_shape(attrs, get_reduced_shape(input));
+
+  assert(parsed.height_dim.degree == 1);
+  assert(parsed.width_dim.degree == 1);
 
-  return result;
+  SumDegree sum_degree = SumDegree{1};
+  DiscardCopyDegree discard_copy_degree =
+      DiscardCopyDegree{parsed.sample_dim.degree * parsed.sum_reduction_degree};
+  FFOrdered<int> shard_degrees = {
+      parsed.discard_copy_reduction_degree,
+      parsed.channel_dim.degree,
+      1,
+      1,
+  };
+
+  return lift_to_parallel_with_degrees(
+      unpar, sum_degree, discard_copy_degree, shard_degrees);
 }
 
 ParallelTensorShape get_bias_shape(Conv2DAttrs const &attrs,
-                                   ParallelTensorShape const &raw_input_shape) {
+                                   ParallelTensorShape const &input) {
   assert(attrs.groups == 1); // TODO(@lockshaw): currently not supported
-  Conv2DParallelInputShape input = parse_parallel_input_shape(raw_input_shape);
-
-  ShardParallelDim output_channels_dim = {size_t_from_int(attrs.out_channels),
-                                          input.discard_copy_reduction_degree};
-
-  int sum_degree = 1;
-  int discard_copy_degree = input.height_dim.degree * input.width_dim.degree *
-                            input.sum_reduction_degree *
-                            input.channel_dim.degree;
-
-  ParallelTensorShape result = ParallelTensorShape{
-      ParallelTensorDims{
-          FFOrdered<ShardParallelDim>{
-              output_channels_dim,
-          },
-          ReplicaParallelDimSet{
-              sum_degree,
-              discard_copy_degree,
-          },
-      },
-      input.datatype,
-  };
 
-  assert(total_parallel_degree(result.dims) ==
-         total_parallel_degree(raw_input_shape.dims));
+  Conv2DParallelInputShape parsed = parse_parallel_input_shape(input);
 
-  return result;
+  TensorShape unpar = get_bias_shape(attrs, get_reduced_shape(input));
+
+  SumDegree sum_degree =
+      SumDegree{parsed.sum_reduction_degree * parsed.channel_dim.degree};
+  DiscardCopyDegree discard_copy_degree =
+      DiscardCopyDegree{parsed.height_dim.degree * parsed.width_dim.degree *
+                        parsed.sample_dim.degree};
+  FFOrdered<int> shard_degrees = {
+      parsed.discard_copy_reduction_degree,
+  };
+
+  return lift_to_parallel_with_degrees(
+      unpar, sum_degree, discard_copy_degree, shard_degrees);
 }
 
-ParallelTensorShape
-    get_output_shape(Conv2DAttrs const &attrs,
-                     ParallelTensorShape const &raw_input_shape) {
+ParallelTensorShape get_output_shape(Conv2DAttrs const &attrs,
+                                     ParallelTensorShape const &input) {
   assert(attrs.groups == 1); // TODO(@lockshaw): currently not supported
-  Conv2DParallelInputShape input = parse_parallel_input_shape(raw_input_shape);
-
-  TensorShape unpar_output_shape =
-      get_output_shape(attrs, get_reduced_shape(raw_input_shape));
-
-  size_t num_samples = dim_at_idx(unpar_output_shape, ff_dim_t{0});
-  size_t num_channels = dim_at_idx(unpar_output_shape, ff_dim_t{1});
-  size_t height = dim_at_idx(unpar_output_shape, ff_dim_t{2});
-  size_t width = dim_at_idx(unpar_output_shape, ff_dim_t{3});
-
-  ShardParallelDim sample_dim = {num_samples, input.sample_dim.degree};
-  ShardParallelDim channel_dim = {num_channels,
-                                  input.discard_copy_reduction_degree};
-  ShardParallelDim height_dim = {height, input.height_dim.degree};
-  ShardParallelDim width_dim = {width, input.width_dim.degree};
-
-  int sum_degree = input.channel_dim.degree * input.sum_reduction_degree;
-  int discard_copy_degree = 1;
-
-  ParallelTensorShape result = ParallelTensorShape{
-      ParallelTensorDims{
-          FFOrdered<ShardParallelDim>{
-              sample_dim,
-              channel_dim,
-              height_dim,
-              width_dim,
-          },
-          ReplicaParallelDimSet{
-              sum_degree,
-              discard_copy_degree,
-          },
-      },
-      input.datatype,
-  };
 
-  assert(total_parallel_degree(result.dims) ==
-         total_parallel_degree(raw_input_shape.dims));
+  Conv2DParallelInputShape parsed = parse_parallel_input_shape(input);
+
+  TensorShape unpar = get_output_shape(attrs, get_reduced_shape(input));
+
+  assert(parsed.height_dim.degree == 1);
+  assert(parsed.width_dim.degree == 1);
+
+  SumDegree sum_degree =
+      SumDegree{parsed.sum_reduction_degree * parsed.channel_dim.degree};
+  DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{1};
+  FFOrdered<int> shard_degrees = {
+      parsed.sample_dim.degree,
+      parsed.discard_copy_reduction_degree,
+      1,
+      1,
+  };
 
-  return result;
+  return lift_to_parallel_with_degrees(
+      unpar, sum_degree, discard_copy_degree, shard_degrees);
 }
 
 } // namespace FlexFlow
diff --git a/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_input_shape.dtg.cc b/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_input_shape.dtg.cc
index 74df30e2d7..90df5ae1a3 100644
--- a/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_input_shape.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_input_shape.dtg.cc
@@ -91,7 +91,7 @@ bool Conv2DInputShape::operator>=(Conv2DInputShape const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::Conv2DInputShape>::operator()(
-    FlexFlow::Conv2DInputShape const &x) const {
+    ::FlexFlow::Conv2DInputShape const &x) const {
   size_t result = 0;
   result ^= std::hash<size_t>{}(x.num_samples) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -108,16 +108,17 @@ size_t hash<FlexFlow::Conv2DInputShape>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::Conv2DInputShape
-    adl_serializer<FlexFlow::Conv2DInputShape>::from_json(json const &j) {
-  return {j.at("num_samples").template get<size_t>(),
-          j.at("num_channels").template get<size_t>(),
-          j.at("height").template get<size_t>(),
-          j.at("width").template get<size_t>(),
-          j.at("datatype").template get<::FlexFlow::DataType>()};
+::FlexFlow::Conv2DInputShape
+    adl_serializer<::FlexFlow::Conv2DInputShape>::from_json(json const &j) {
+  return ::FlexFlow::Conv2DInputShape{
+      j.at("num_samples").template get<size_t>(),
+      j.at("num_channels").template get<size_t>(),
+      j.at("height").template get<size_t>(),
+      j.at("width").template get<size_t>(),
+      j.at("datatype").template get<::FlexFlow::DataType>()};
 }
-void adl_serializer<FlexFlow::Conv2DInputShape>::to_json(
-    json &j, FlexFlow::Conv2DInputShape const &v) {
+void adl_serializer<::FlexFlow::Conv2DInputShape>::to_json(
+    json &j, ::FlexFlow::Conv2DInputShape const &v) {
   j["__type"] = "Conv2DInputShape";
   j["num_samples"] = v.num_samples;
   j["num_channels"] = v.num_channels;
@@ -128,9 +129,9 @@ void adl_serializer<FlexFlow::Conv2DInputShape>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::Conv2DInputShape>
-    Arbitrary<FlexFlow::Conv2DInputShape>::arbitrary() {
-  return gen::construct<FlexFlow::Conv2DInputShape>(
+Gen<::FlexFlow::Conv2DInputShape>
+    Arbitrary<::FlexFlow::Conv2DInputShape>::arbitrary() {
+  return gen::construct<::FlexFlow::Conv2DInputShape>(
       gen::arbitrary<size_t>(),
       gen::arbitrary<size_t>(),
       gen::arbitrary<size_t>(),
diff --git a/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.cc b/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.cc
index 32ac4547f1..98f69d14c9 100644
--- a/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.cc
+++ b/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.cc
@@ -12,7 +12,7 @@ Conv2DParallelInputShape
   ShardParallelDim height_dim = shard_dim_at_idx(input, ff_dim_t{2});
   ShardParallelDim width_dim = shard_dim_at_idx(input, ff_dim_t{3});
 
-  return Conv2DParallelInputShape{
+  Conv2DParallelInputShape parsed = Conv2DParallelInputShape{
       sample_dim,
       channel_dim,
       height_dim,
@@ -21,6 +21,11 @@ Conv2DParallelInputShape
       get_discard_copy_degree(input),
       input.data_type,
   };
+
+  assert(parsed.height_dim.degree == 1);
+  assert(parsed.width_dim.degree == 1);
+
+  return parsed;
 }
 
 } // namespace FlexFlow
diff --git a/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.dtg.cc b/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.dtg.cc
index df854c2b8f..efb73dba1b 100644
--- a/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.dtg.cc
@@ -132,7 +132,7 @@ bool Conv2DParallelInputShape::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::Conv2DParallelInputShape>::operator()(
-    FlexFlow::Conv2DParallelInputShape const &x) const {
+    ::FlexFlow::Conv2DParallelInputShape const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::ShardParallelDim>{}(x.sample_dim) +
             0x9e3779b9 + (result << 6) + (result >> 2);
@@ -153,19 +153,20 @@ size_t hash<FlexFlow::Conv2DParallelInputShape>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::Conv2DParallelInputShape
-    adl_serializer<FlexFlow::Conv2DParallelInputShape>::from_json(
+::FlexFlow::Conv2DParallelInputShape
+    adl_serializer<::FlexFlow::Conv2DParallelInputShape>::from_json(
         json const &j) {
-  return {j.at("sample_dim").template get<::FlexFlow::ShardParallelDim>(),
-          j.at("channel_dim").template get<::FlexFlow::ShardParallelDim>(),
-          j.at("height_dim").template get<::FlexFlow::ShardParallelDim>(),
-          j.at("width_dim").template get<::FlexFlow::ShardParallelDim>(),
-          j.at("sum_reduction_degree").template get<int>(),
-          j.at("discard_copy_reduction_degree").template get<int>(),
-          j.at("datatype").template get<::FlexFlow::DataType>()};
+  return ::FlexFlow::Conv2DParallelInputShape{
+      j.at("sample_dim").template get<::FlexFlow::ShardParallelDim>(),
+      j.at("channel_dim").template get<::FlexFlow::ShardParallelDim>(),
+      j.at("height_dim").template get<::FlexFlow::ShardParallelDim>(),
+      j.at("width_dim").template get<::FlexFlow::ShardParallelDim>(),
+      j.at("sum_reduction_degree").template get<int>(),
+      j.at("discard_copy_reduction_degree").template get<int>(),
+      j.at("datatype").template get<::FlexFlow::DataType>()};
 }
-void adl_serializer<FlexFlow::Conv2DParallelInputShape>::to_json(
-    json &j, FlexFlow::Conv2DParallelInputShape const &v) {
+void adl_serializer<::FlexFlow::Conv2DParallelInputShape>::to_json(
+    json &j, ::FlexFlow::Conv2DParallelInputShape const &v) {
   j["__type"] = "Conv2DParallelInputShape";
   j["sample_dim"] = v.sample_dim;
   j["channel_dim"] = v.channel_dim;
@@ -178,9 +179,9 @@ void adl_serializer<FlexFlow::Conv2DParallelInputShape>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::Conv2DParallelInputShape>
-    Arbitrary<FlexFlow::Conv2DParallelInputShape>::arbitrary() {
-  return gen::construct<FlexFlow::Conv2DParallelInputShape>(
+Gen<::FlexFlow::Conv2DParallelInputShape>
+    Arbitrary<::FlexFlow::Conv2DParallelInputShape>::arbitrary() {
+  return gen::construct<::FlexFlow::Conv2DParallelInputShape>(
       gen::arbitrary<::FlexFlow::ShardParallelDim>(),
       gen::arbitrary<::FlexFlow::ShardParallelDim>(),
       gen::arbitrary<::FlexFlow::ShardParallelDim>(),
diff --git a/lib/op-attrs/src/op-attrs/ops/conv_2d_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/conv_2d_attrs.dtg.cc
index 238b349cbe..696fe08a6f 100644
--- a/lib/op-attrs/src/op-attrs/ops/conv_2d_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/conv_2d_attrs.dtg.cc
@@ -160,7 +160,7 @@ bool Conv2DAttrs::operator>=(Conv2DAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::Conv2DAttrs>::operator()(
-    FlexFlow::Conv2DAttrs const &x) const {
+    ::FlexFlow::Conv2DAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<int>{}(x.out_channels) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -187,9 +187,9 @@ size_t hash<FlexFlow::Conv2DAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::Conv2DAttrs
-    adl_serializer<FlexFlow::Conv2DAttrs>::from_json(json const &j) {
-  return {
+::FlexFlow::Conv2DAttrs
+    adl_serializer<::FlexFlow::Conv2DAttrs>::from_json(json const &j) {
+  return ::FlexFlow::Conv2DAttrs{
       j.at("out_channels").template get<int>(),
       j.at("kernel_h").template get<int>(),
       j.at("kernel_w").template get<int>(),
@@ -201,8 +201,8 @@ FlexFlow::Conv2DAttrs
       j.at("activation").template get<std::optional<::FlexFlow::Activation>>(),
       j.at("use_bias").template get<bool>()};
 }
-void adl_serializer<FlexFlow::Conv2DAttrs>::to_json(
-    json &j, FlexFlow::Conv2DAttrs const &v) {
+void adl_serializer<::FlexFlow::Conv2DAttrs>::to_json(
+    json &j, ::FlexFlow::Conv2DAttrs const &v) {
   j["__type"] = "Conv2DAttrs";
   j["out_channels"] = v.out_channels;
   j["kernel_h"] = v.kernel_h;
@@ -218,8 +218,8 @@ void adl_serializer<FlexFlow::Conv2DAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::Conv2DAttrs> Arbitrary<FlexFlow::Conv2DAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::Conv2DAttrs>(
+Gen<::FlexFlow::Conv2DAttrs> Arbitrary<::FlexFlow::Conv2DAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::Conv2DAttrs>(
       gen::arbitrary<int>(),
       gen::arbitrary<int>(),
       gen::arbitrary<int>(),
diff --git a/lib/op-attrs/src/op-attrs/ops/dropout_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/dropout_attrs.dtg.cc
index 284443a0e4..15f6ad8bb1 100644
--- a/lib/op-attrs/src/op-attrs/ops/dropout_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/dropout_attrs.dtg.cc
@@ -36,7 +36,7 @@ bool DropoutAttrs::operator>=(DropoutAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::DropoutAttrs>::operator()(
-    FlexFlow::DropoutAttrs const &x) const {
+    ::FlexFlow::DropoutAttrs const &x) const {
   size_t result = 0;
   result ^=
       std::hash<float>{}(x.rate) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -47,13 +47,14 @@ size_t hash<FlexFlow::DropoutAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::DropoutAttrs
-    adl_serializer<FlexFlow::DropoutAttrs>::from_json(json const &j) {
-  return {j.at("rate").template get<float>(),
-          j.at("seed").template get<unsigned long long>()};
+::FlexFlow::DropoutAttrs
+    adl_serializer<::FlexFlow::DropoutAttrs>::from_json(json const &j) {
+  return ::FlexFlow::DropoutAttrs{
+      j.at("rate").template get<float>(),
+      j.at("seed").template get<unsigned long long>()};
 }
-void adl_serializer<FlexFlow::DropoutAttrs>::to_json(
-    json &j, FlexFlow::DropoutAttrs const &v) {
+void adl_serializer<::FlexFlow::DropoutAttrs>::to_json(
+    json &j, ::FlexFlow::DropoutAttrs const &v) {
   j["__type"] = "DropoutAttrs";
   j["rate"] = v.rate;
   j["seed"] = v.seed;
@@ -61,8 +62,8 @@ void adl_serializer<FlexFlow::DropoutAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::DropoutAttrs> Arbitrary<FlexFlow::DropoutAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::DropoutAttrs>(
+Gen<::FlexFlow::DropoutAttrs> Arbitrary<::FlexFlow::DropoutAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::DropoutAttrs>(
       gen::arbitrary<float>(), gen::arbitrary<unsigned long long>());
 }
 } // namespace rc
diff --git a/lib/op-attrs/src/op-attrs/ops/element_binary_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/element_binary_attrs.dtg.cc
index a0e555cb12..568371c4fe 100644
--- a/lib/op-attrs/src/op-attrs/ops/element_binary_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/element_binary_attrs.dtg.cc
@@ -85,7 +85,7 @@ bool ElementBinaryAttrs::operator>=(ElementBinaryAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ElementBinaryAttrs>::operator()(
-    FlexFlow::ElementBinaryAttrs const &x) const {
+    ::FlexFlow::ElementBinaryAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::OperatorType>{}(x.type) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -100,15 +100,16 @@ size_t hash<FlexFlow::ElementBinaryAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ElementBinaryAttrs
-    adl_serializer<FlexFlow::ElementBinaryAttrs>::from_json(json const &j) {
-  return {j.at("type").template get<::FlexFlow::OperatorType>(),
-          j.at("compute_type").template get<::FlexFlow::DataType>(),
-          j.at("should_broadcast_lhs").template get<bool>(),
-          j.at("should_broadcast_rhs").template get<bool>()};
+::FlexFlow::ElementBinaryAttrs
+    adl_serializer<::FlexFlow::ElementBinaryAttrs>::from_json(json const &j) {
+  return ::FlexFlow::ElementBinaryAttrs{
+      j.at("type").template get<::FlexFlow::OperatorType>(),
+      j.at("compute_type").template get<::FlexFlow::DataType>(),
+      j.at("should_broadcast_lhs").template get<bool>(),
+      j.at("should_broadcast_rhs").template get<bool>()};
 }
-void adl_serializer<FlexFlow::ElementBinaryAttrs>::to_json(
-    json &j, FlexFlow::ElementBinaryAttrs const &v) {
+void adl_serializer<::FlexFlow::ElementBinaryAttrs>::to_json(
+    json &j, ::FlexFlow::ElementBinaryAttrs const &v) {
   j["__type"] = "ElementBinaryAttrs";
   j["type"] = v.type;
   j["compute_type"] = v.compute_type;
@@ -118,9 +119,9 @@ void adl_serializer<FlexFlow::ElementBinaryAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ElementBinaryAttrs>
-    Arbitrary<FlexFlow::ElementBinaryAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::ElementBinaryAttrs>(
+Gen<::FlexFlow::ElementBinaryAttrs>
+    Arbitrary<::FlexFlow::ElementBinaryAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::ElementBinaryAttrs>(
       gen::arbitrary<::FlexFlow::OperatorType>(),
       gen::arbitrary<::FlexFlow::DataType>(),
       gen::arbitrary<bool>(),
diff --git a/lib/op-attrs/src/op-attrs/ops/element_unary_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/element_unary_attrs.dtg.cc
index 9a7052dd5c..4c246906eb 100644
--- a/lib/op-attrs/src/op-attrs/ops/element_unary_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/element_unary_attrs.dtg.cc
@@ -45,7 +45,7 @@ bool ElementUnaryAttrs::operator>=(ElementUnaryAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ElementUnaryAttrs>::operator()(
-    FlexFlow::ElementUnaryAttrs const &x) const {
+    ::FlexFlow::ElementUnaryAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::OperatorType>{}(x.op_type) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -56,13 +56,14 @@ size_t hash<FlexFlow::ElementUnaryAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ElementUnaryAttrs
-    adl_serializer<FlexFlow::ElementUnaryAttrs>::from_json(json const &j) {
-  return {j.at("op_type").template get<::FlexFlow::OperatorType>(),
-          j.at("scalar").template get<std::optional<float>>()};
+::FlexFlow::ElementUnaryAttrs
+    adl_serializer<::FlexFlow::ElementUnaryAttrs>::from_json(json const &j) {
+  return ::FlexFlow::ElementUnaryAttrs{
+      j.at("op_type").template get<::FlexFlow::OperatorType>(),
+      j.at("scalar").template get<std::optional<float>>()};
 }
-void adl_serializer<FlexFlow::ElementUnaryAttrs>::to_json(
-    json &j, FlexFlow::ElementUnaryAttrs const &v) {
+void adl_serializer<::FlexFlow::ElementUnaryAttrs>::to_json(
+    json &j, ::FlexFlow::ElementUnaryAttrs const &v) {
   j["__type"] = "ElementUnaryAttrs";
   j["op_type"] = v.op_type;
   j["scalar"] = v.scalar;
@@ -70,9 +71,9 @@ void adl_serializer<FlexFlow::ElementUnaryAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ElementUnaryAttrs>
-    Arbitrary<FlexFlow::ElementUnaryAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::ElementUnaryAttrs>(
+Gen<::FlexFlow::ElementUnaryAttrs>
+    Arbitrary<::FlexFlow::ElementUnaryAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::ElementUnaryAttrs>(
       gen::arbitrary<::FlexFlow::OperatorType>(),
       gen::arbitrary<std::optional<float>>());
 }
diff --git a/lib/op-attrs/src/op-attrs/ops/embedding.cc b/lib/op-attrs/src/op-attrs/ops/embedding.cc
index 9e9ad3a194..be7b91c24f 100644
--- a/lib/op-attrs/src/op-attrs/ops/embedding.cc
+++ b/lib/op-attrs/src/op-attrs/ops/embedding.cc
@@ -71,8 +71,9 @@ tl::expected<ParallelTensorShape, std::string>
     result_unpar.value();
   });
 
-  SumDegree sum_degree = shard_dim_at_idx(input, ff_dim_t{-1}).degree;
-  DiscardCopyDegree discard_copy_degree = 1;
+  SumDegree sum_degree =
+      SumDegree{shard_dim_at_idx(input, ff_dim_t{-1}).degree};
+  DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{1};
   FFOrdered<int> shard_degrees =
       transform(input.dims.shard_dims,
                 [](ShardParallelDim const &d) { return d.degree; });
@@ -94,7 +95,7 @@ tl::expected<ParallelTensorShape, std::string>
     result_unpar.value();
   });
 
-  SumDegree sum_degree = 1;
+  SumDegree sum_degree = SumDegree{1};
   DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{product(
       transform(ff_ordered_shard_dims(input.dims),
                 [](ShardParallelDim const &d) -> int { return d.degree; }))};
diff --git a/lib/op-attrs/src/op-attrs/ops/embedding_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/embedding_attrs.dtg.cc
index b4d4657e08..8f5778d794 100644
--- a/lib/op-attrs/src/op-attrs/ops/embedding_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/embedding_attrs.dtg.cc
@@ -80,7 +80,7 @@ bool EmbeddingAttrs::operator>=(EmbeddingAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::EmbeddingAttrs>::operator()(
-    FlexFlow::EmbeddingAttrs const &x) const {
+    ::FlexFlow::EmbeddingAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<int>{}(x.num_entries) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -95,15 +95,16 @@ size_t hash<FlexFlow::EmbeddingAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::EmbeddingAttrs
-    adl_serializer<FlexFlow::EmbeddingAttrs>::from_json(json const &j) {
-  return {j.at("num_entries").template get<int>(),
-          j.at("out_channels").template get<int>(),
-          j.at("aggr").template get<std::optional<::FlexFlow::AggregateOp>>(),
-          j.at("data_type").template get<::FlexFlow::DataType>()};
+::FlexFlow::EmbeddingAttrs
+    adl_serializer<::FlexFlow::EmbeddingAttrs>::from_json(json const &j) {
+  return ::FlexFlow::EmbeddingAttrs{
+      j.at("num_entries").template get<int>(),
+      j.at("out_channels").template get<int>(),
+      j.at("aggr").template get<std::optional<::FlexFlow::AggregateOp>>(),
+      j.at("data_type").template get<::FlexFlow::DataType>()};
 }
-void adl_serializer<FlexFlow::EmbeddingAttrs>::to_json(
-    json &j, FlexFlow::EmbeddingAttrs const &v) {
+void adl_serializer<::FlexFlow::EmbeddingAttrs>::to_json(
+    json &j, ::FlexFlow::EmbeddingAttrs const &v) {
   j["__type"] = "EmbeddingAttrs";
   j["num_entries"] = v.num_entries;
   j["out_channels"] = v.out_channels;
@@ -113,8 +114,9 @@ void adl_serializer<FlexFlow::EmbeddingAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::EmbeddingAttrs> Arbitrary<FlexFlow::EmbeddingAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::EmbeddingAttrs>(
+Gen<::FlexFlow::EmbeddingAttrs>
+    Arbitrary<::FlexFlow::EmbeddingAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::EmbeddingAttrs>(
       gen::arbitrary<int>(),
       gen::arbitrary<int>(),
       gen::arbitrary<std::optional<::FlexFlow::AggregateOp>>(),
diff --git a/lib/op-attrs/src/op-attrs/ops/flat_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/flat_attrs.dtg.cc
index ef34d97a89..ff2cdcace5 100644
--- a/lib/op-attrs/src/op-attrs/ops/flat_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/flat_attrs.dtg.cc
@@ -33,27 +33,27 @@ bool FlatAttrs::operator>=(FlatAttrs const &other) const {
 } // namespace FlexFlow
 
 namespace std {
-size_t
-    hash<FlexFlow::FlatAttrs>::operator()(FlexFlow::FlatAttrs const &x) const {
+size_t hash<FlexFlow::FlatAttrs>::operator()(
+    ::FlexFlow::FlatAttrs const &x) const {
   size_t result = 0;
   return result;
 }
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::FlatAttrs
-    adl_serializer<FlexFlow::FlatAttrs>::from_json(json const &j) {
-  return {};
+::FlexFlow::FlatAttrs
+    adl_serializer<::FlexFlow::FlatAttrs>::from_json(json const &j) {
+  return ::FlexFlow::FlatAttrs{};
 }
-void adl_serializer<FlexFlow::FlatAttrs>::to_json(
-    json &j, FlexFlow::FlatAttrs const &v) {
+void adl_serializer<::FlexFlow::FlatAttrs>::to_json(
+    json &j, ::FlexFlow::FlatAttrs const &v) {
   j["__type"] = "FlatAttrs";
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::FlatAttrs> Arbitrary<FlexFlow::FlatAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::FlatAttrs>();
+Gen<::FlexFlow::FlatAttrs> Arbitrary<::FlexFlow::FlatAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::FlatAttrs>();
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/ops/gather_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/gather_attrs.dtg.cc
index 713c0f391e..a056d812ca 100644
--- a/lib/op-attrs/src/op-attrs/ops/gather_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/gather_attrs.dtg.cc
@@ -37,7 +37,7 @@ bool GatherAttrs::operator>=(GatherAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::GatherAttrs>::operator()(
-    FlexFlow::GatherAttrs const &x) const {
+    ::FlexFlow::GatherAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::ff_dim_t>{}(x.dim) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -46,20 +46,21 @@ size_t hash<FlexFlow::GatherAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::GatherAttrs
-    adl_serializer<FlexFlow::GatherAttrs>::from_json(json const &j) {
-  return {j.at("dim").template get<::FlexFlow::ff_dim_t>()};
+::FlexFlow::GatherAttrs
+    adl_serializer<::FlexFlow::GatherAttrs>::from_json(json const &j) {
+  return ::FlexFlow::GatherAttrs{
+      j.at("dim").template get<::FlexFlow::ff_dim_t>()};
 }
-void adl_serializer<FlexFlow::GatherAttrs>::to_json(
-    json &j, FlexFlow::GatherAttrs const &v) {
+void adl_serializer<::FlexFlow::GatherAttrs>::to_json(
+    json &j, ::FlexFlow::GatherAttrs const &v) {
   j["__type"] = "GatherAttrs";
   j["dim"] = v.dim;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::GatherAttrs> Arbitrary<FlexFlow::GatherAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::GatherAttrs>(
+Gen<::FlexFlow::GatherAttrs> Arbitrary<::FlexFlow::GatherAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::GatherAttrs>(
       gen::arbitrary<::FlexFlow::ff_dim_t>());
 }
 } // namespace rc
diff --git a/lib/op-attrs/src/op-attrs/ops/input_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/input_attrs.dtg.cc
index 35544402f7..b3b092bcfd 100644
--- a/lib/op-attrs/src/op-attrs/ops/input_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/input_attrs.dtg.cc
@@ -34,26 +34,26 @@ bool InputAttrs::operator>=(InputAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::InputAttrs>::operator()(
-    FlexFlow::InputAttrs const &x) const {
+    ::FlexFlow::InputAttrs const &x) const {
   size_t result = 0;
   return result;
 }
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::InputAttrs
-    adl_serializer<FlexFlow::InputAttrs>::from_json(json const &j) {
-  return {};
+::FlexFlow::InputAttrs
+    adl_serializer<::FlexFlow::InputAttrs>::from_json(json const &j) {
+  return ::FlexFlow::InputAttrs{};
 }
-void adl_serializer<FlexFlow::InputAttrs>::to_json(
-    json &j, FlexFlow::InputAttrs const &v) {
+void adl_serializer<::FlexFlow::InputAttrs>::to_json(
+    json &j, ::FlexFlow::InputAttrs const &v) {
   j["__type"] = "InputAttrs";
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::InputAttrs> Arbitrary<FlexFlow::InputAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::InputAttrs>();
+Gen<::FlexFlow::InputAttrs> Arbitrary<::FlexFlow::InputAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::InputAttrs>();
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/ops/layer_norm_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/layer_norm_attrs.dtg.cc
index 163f2e2f91..66db8e278a 100644
--- a/lib/op-attrs/src/op-attrs/ops/layer_norm_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/layer_norm_attrs.dtg.cc
@@ -48,7 +48,7 @@ bool LayerNormAttrs::operator>=(LayerNormAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::LayerNormAttrs>::operator()(
-    FlexFlow::LayerNormAttrs const &x) const {
+    ::FlexFlow::LayerNormAttrs const &x) const {
   size_t result = 0;
   result ^=
       std::hash<
@@ -64,17 +64,17 @@ size_t hash<FlexFlow::LayerNormAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::LayerNormAttrs
-    adl_serializer<FlexFlow::LayerNormAttrs>::from_json(json const &j) {
-  return {
+::FlexFlow::LayerNormAttrs
+    adl_serializer<::FlexFlow::LayerNormAttrs>::from_json(json const &j) {
+  return ::FlexFlow::LayerNormAttrs{
       j.at("axes")
           .template get<
               ::FlexFlow::stack_vector<::FlexFlow::ff_dim_t, MAX_TENSOR_DIM>>(),
       j.at("elementwise_affine").template get<bool>(),
       j.at("eps").template get<float>()};
 }
-void adl_serializer<FlexFlow::LayerNormAttrs>::to_json(
-    json &j, FlexFlow::LayerNormAttrs const &v) {
+void adl_serializer<::FlexFlow::LayerNormAttrs>::to_json(
+    json &j, ::FlexFlow::LayerNormAttrs const &v) {
   j["__type"] = "LayerNormAttrs";
   j["axes"] = v.axes;
   j["elementwise_affine"] = v.elementwise_affine;
@@ -83,8 +83,9 @@ void adl_serializer<FlexFlow::LayerNormAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::LayerNormAttrs> Arbitrary<FlexFlow::LayerNormAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::LayerNormAttrs>(
+Gen<::FlexFlow::LayerNormAttrs>
+    Arbitrary<::FlexFlow::LayerNormAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::LayerNormAttrs>(
       gen::arbitrary<
           ::FlexFlow::stack_vector<::FlexFlow::ff_dim_t, MAX_TENSOR_DIM>>(),
       gen::arbitrary<bool>(),
diff --git a/lib/op-attrs/src/op-attrs/ops/linear.cc b/lib/op-attrs/src/op-attrs/ops/linear.cc
index 8283673378..2bd0cea950 100644
--- a/lib/op-attrs/src/op-attrs/ops/linear.cc
+++ b/lib/op-attrs/src/op-attrs/ops/linear.cc
@@ -50,7 +50,7 @@ tl::expected<ParallelTensorShape, std::string>
     result_unpar.value();
   });
 
-  SumDegree sum_degree = 1;
+  SumDegree sum_degree = SumDegree{1};
   DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{
       get_sum_degree(input) *
       product(
@@ -75,10 +75,10 @@ tl::expected<ParallelTensorShape, std::string>
     result_unpar.value();
   });
 
-  SumDegree sum_degree =
-      get_sum_degree(input) * shard_dim_at_idx(input, ff_dim_t{-1}).degree;
-  DiscardCopyDegree discard_copy_degree = product(
-      slice(ff_ordered_shard_degrees(input), std::nullopt, ff_dim_t{-1}));
+  SumDegree sum_degree = SumDegree{
+      get_sum_degree(input) * shard_dim_at_idx(input, ff_dim_t{-1}).degree};
+  DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{product(
+      slice(ff_ordered_shard_degrees(input), std::nullopt, ff_dim_t{-1}))};
   FFOrdered<int> shard_degrees = FFOrdered<int>{get_discard_copy_degree(input)};
 
   return lift_to_parallel_with_degrees(
@@ -97,9 +97,9 @@ tl::expected<ParallelTensorShape, std::string>
     result_unpar.value();
   });
 
-  SumDegree sum_degree =
-      get_sum_degree(input) * shard_dim_at_idx(input, ff_dim_t{-1}).degree;
-  DiscardCopyDegree discard_copy_degree = 1;
+  SumDegree sum_degree = SumDegree{
+      get_sum_degree(input) * shard_dim_at_idx(input, ff_dim_t{-1}).degree};
+  DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{1};
   FFOrdered<int> shard_degrees = ff_ordered_shard_degrees(input);
   shard_degrees.at(ff_dim_t{-1}) = get_discard_copy_degree(input);
 
diff --git a/lib/op-attrs/src/op-attrs/ops/linear_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/linear_attrs.dtg.cc
index f3359da219..3099a6c7e4 100644
--- a/lib/op-attrs/src/op-attrs/ops/linear_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/linear_attrs.dtg.cc
@@ -94,7 +94,7 @@ bool LinearAttrs::operator>=(LinearAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::LinearAttrs>::operator()(
-    FlexFlow::LinearAttrs const &x) const {
+    ::FlexFlow::LinearAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<int>{}(x.out_channels) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -112,9 +112,9 @@ size_t hash<FlexFlow::LinearAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::LinearAttrs
-    adl_serializer<FlexFlow::LinearAttrs>::from_json(json const &j) {
-  return {
+::FlexFlow::LinearAttrs
+    adl_serializer<::FlexFlow::LinearAttrs>::from_json(json const &j) {
+  return ::FlexFlow::LinearAttrs{
       j.at("out_channels").template get<int>(),
       j.at("use_bias").template get<bool>(),
       j.at("data_type").template get<::FlexFlow::DataType>(),
@@ -122,8 +122,8 @@ FlexFlow::LinearAttrs
       j.at("regularizer")
           .template get<std::optional<::FlexFlow::RegularizerAttrs>>()};
 }
-void adl_serializer<FlexFlow::LinearAttrs>::to_json(
-    json &j, FlexFlow::LinearAttrs const &v) {
+void adl_serializer<::FlexFlow::LinearAttrs>::to_json(
+    json &j, ::FlexFlow::LinearAttrs const &v) {
   j["__type"] = "LinearAttrs";
   j["out_channels"] = v.out_channels;
   j["use_bias"] = v.use_bias;
@@ -134,8 +134,8 @@ void adl_serializer<FlexFlow::LinearAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::LinearAttrs> Arbitrary<FlexFlow::LinearAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::LinearAttrs>(
+Gen<::FlexFlow::LinearAttrs> Arbitrary<::FlexFlow::LinearAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::LinearAttrs>(
       gen::arbitrary<int>(),
       gen::arbitrary<bool>(),
       gen::arbitrary<::FlexFlow::DataType>(),
diff --git a/lib/op-attrs/src/op-attrs/ops/noop_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/noop_attrs.dtg.cc
index 3ef3a0119b..9600011c06 100644
--- a/lib/op-attrs/src/op-attrs/ops/noop_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/noop_attrs.dtg.cc
@@ -33,27 +33,27 @@ bool NoopAttrs::operator>=(NoopAttrs const &other) const {
 } // namespace FlexFlow
 
 namespace std {
-size_t
-    hash<FlexFlow::NoopAttrs>::operator()(FlexFlow::NoopAttrs const &x) const {
+size_t hash<FlexFlow::NoopAttrs>::operator()(
+    ::FlexFlow::NoopAttrs const &x) const {
   size_t result = 0;
   return result;
 }
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::NoopAttrs
-    adl_serializer<FlexFlow::NoopAttrs>::from_json(json const &j) {
-  return {};
+::FlexFlow::NoopAttrs
+    adl_serializer<::FlexFlow::NoopAttrs>::from_json(json const &j) {
+  return ::FlexFlow::NoopAttrs{};
 }
-void adl_serializer<FlexFlow::NoopAttrs>::to_json(
-    json &j, FlexFlow::NoopAttrs const &v) {
+void adl_serializer<::FlexFlow::NoopAttrs>::to_json(
+    json &j, ::FlexFlow::NoopAttrs const &v) {
   j["__type"] = "NoopAttrs";
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::NoopAttrs> Arbitrary<FlexFlow::NoopAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::NoopAttrs>();
+Gen<::FlexFlow::NoopAttrs> Arbitrary<::FlexFlow::NoopAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::NoopAttrs>();
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/ops/parallel_attention_inputs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/parallel_attention_inputs.dtg.cc
index ac8da6d2d7..67a46ef5fb 100644
--- a/lib/op-attrs/src/op-attrs/ops/parallel_attention_inputs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/parallel_attention_inputs.dtg.cc
@@ -32,7 +32,7 @@ bool ParallelMultiHeadAttentionInputs::operator!=(
 
 namespace std {
 size_t hash<FlexFlow::ParallelMultiHeadAttentionInputs>::operator()(
-    FlexFlow::ParallelMultiHeadAttentionInputs const &x) const {
+    ::FlexFlow::ParallelMultiHeadAttentionInputs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::ParallelTensorShape>{}(x.query) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -45,15 +45,16 @@ size_t hash<FlexFlow::ParallelMultiHeadAttentionInputs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ParallelMultiHeadAttentionInputs
-    adl_serializer<FlexFlow::ParallelMultiHeadAttentionInputs>::from_json(
+::FlexFlow::ParallelMultiHeadAttentionInputs
+    adl_serializer<::FlexFlow::ParallelMultiHeadAttentionInputs>::from_json(
         json const &j) {
-  return {j.at("query").template get<::FlexFlow::ParallelTensorShape>(),
-          j.at("key").template get<::FlexFlow::ParallelTensorShape>(),
-          j.at("value").template get<::FlexFlow::ParallelTensorShape>()};
+  return ::FlexFlow::ParallelMultiHeadAttentionInputs{
+      j.at("query").template get<::FlexFlow::ParallelTensorShape>(),
+      j.at("key").template get<::FlexFlow::ParallelTensorShape>(),
+      j.at("value").template get<::FlexFlow::ParallelTensorShape>()};
 }
-void adl_serializer<FlexFlow::ParallelMultiHeadAttentionInputs>::to_json(
-    json &j, FlexFlow::ParallelMultiHeadAttentionInputs const &v) {
+void adl_serializer<::FlexFlow::ParallelMultiHeadAttentionInputs>::to_json(
+    json &j, ::FlexFlow::ParallelMultiHeadAttentionInputs const &v) {
   j["__type"] = "ParallelMultiHeadAttentionInputs";
   j["query"] = v.query;
   j["key"] = v.key;
@@ -62,9 +63,9 @@ void adl_serializer<FlexFlow::ParallelMultiHeadAttentionInputs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ParallelMultiHeadAttentionInputs>
-    Arbitrary<FlexFlow::ParallelMultiHeadAttentionInputs>::arbitrary() {
-  return gen::construct<FlexFlow::ParallelMultiHeadAttentionInputs>(
+Gen<::FlexFlow::ParallelMultiHeadAttentionInputs>
+    Arbitrary<::FlexFlow::ParallelMultiHeadAttentionInputs>::arbitrary() {
+  return gen::construct<::FlexFlow::ParallelMultiHeadAttentionInputs>(
       gen::arbitrary<::FlexFlow::ParallelTensorShape>(),
       gen::arbitrary<::FlexFlow::ParallelTensorShape>(),
       gen::arbitrary<::FlexFlow::ParallelTensorShape>());
diff --git a/lib/op-attrs/src/op-attrs/ops/pool_2d_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/pool_2d_attrs.dtg.cc
index 8c445d8b84..057b030a96 100644
--- a/lib/op-attrs/src/op-attrs/ops/pool_2d_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/pool_2d_attrs.dtg.cc
@@ -131,7 +131,7 @@ bool Pool2DAttrs::operator>=(Pool2DAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::Pool2DAttrs>::operator()(
-    FlexFlow::Pool2DAttrs const &x) const {
+    ::FlexFlow::Pool2DAttrs const &x) const {
   size_t result = 0;
   result ^=
       std::hash<int>{}(x.kernel_h) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -154,19 +154,20 @@ size_t hash<FlexFlow::Pool2DAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::Pool2DAttrs
-    adl_serializer<FlexFlow::Pool2DAttrs>::from_json(json const &j) {
-  return {j.at("kernel_h").template get<int>(),
-          j.at("kernel_w").template get<int>(),
-          j.at("stride_h").template get<int>(),
-          j.at("stride_w").template get<int>(),
-          j.at("padding_h").template get<int>(),
-          j.at("padding_w").template get<int>(),
-          j.at("pool_type").template get<::FlexFlow::PoolOp>(),
-          j.at("activation").template get<::FlexFlow::Activation>()};
+::FlexFlow::Pool2DAttrs
+    adl_serializer<::FlexFlow::Pool2DAttrs>::from_json(json const &j) {
+  return ::FlexFlow::Pool2DAttrs{
+      j.at("kernel_h").template get<int>(),
+      j.at("kernel_w").template get<int>(),
+      j.at("stride_h").template get<int>(),
+      j.at("stride_w").template get<int>(),
+      j.at("padding_h").template get<int>(),
+      j.at("padding_w").template get<int>(),
+      j.at("pool_type").template get<::FlexFlow::PoolOp>(),
+      j.at("activation").template get<::FlexFlow::Activation>()};
 }
-void adl_serializer<FlexFlow::Pool2DAttrs>::to_json(
-    json &j, FlexFlow::Pool2DAttrs const &v) {
+void adl_serializer<::FlexFlow::Pool2DAttrs>::to_json(
+    json &j, ::FlexFlow::Pool2DAttrs const &v) {
   j["__type"] = "Pool2DAttrs";
   j["kernel_h"] = v.kernel_h;
   j["kernel_w"] = v.kernel_w;
@@ -180,8 +181,8 @@ void adl_serializer<FlexFlow::Pool2DAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::Pool2DAttrs> Arbitrary<FlexFlow::Pool2DAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::Pool2DAttrs>(
+Gen<::FlexFlow::Pool2DAttrs> Arbitrary<::FlexFlow::Pool2DAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::Pool2DAttrs>(
       gen::arbitrary<int>(),
       gen::arbitrary<int>(),
       gen::arbitrary<int>(),
diff --git a/lib/op-attrs/src/op-attrs/ops/reduce_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/reduce_attrs.dtg.cc
index 2aa9546956..c365819440 100644
--- a/lib/op-attrs/src/op-attrs/ops/reduce_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/reduce_attrs.dtg.cc
@@ -49,7 +49,7 @@ bool ReduceAttrs::operator>=(ReduceAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ReduceAttrs>::operator()(
-    FlexFlow::ReduceAttrs const &x) const {
+    ::FlexFlow::ReduceAttrs const &x) const {
   size_t result = 0;
   result ^=
       std::hash<
@@ -65,17 +65,17 @@ size_t hash<FlexFlow::ReduceAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ReduceAttrs
-    adl_serializer<FlexFlow::ReduceAttrs>::from_json(json const &j) {
-  return {
+::FlexFlow::ReduceAttrs
+    adl_serializer<::FlexFlow::ReduceAttrs>::from_json(json const &j) {
+  return ::FlexFlow::ReduceAttrs{
       j.at("axes")
           .template get<
               ::FlexFlow::stack_vector<::FlexFlow::ff_dim_t, MAX_TENSOR_DIM>>(),
       j.at("op_type").template get<::FlexFlow::OperatorType>(),
       j.at("keepdims").template get<bool>()};
 }
-void adl_serializer<FlexFlow::ReduceAttrs>::to_json(
-    json &j, FlexFlow::ReduceAttrs const &v) {
+void adl_serializer<::FlexFlow::ReduceAttrs>::to_json(
+    json &j, ::FlexFlow::ReduceAttrs const &v) {
   j["__type"] = "ReduceAttrs";
   j["axes"] = v.axes;
   j["op_type"] = v.op_type;
@@ -84,8 +84,8 @@ void adl_serializer<FlexFlow::ReduceAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ReduceAttrs> Arbitrary<FlexFlow::ReduceAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::ReduceAttrs>(
+Gen<::FlexFlow::ReduceAttrs> Arbitrary<::FlexFlow::ReduceAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::ReduceAttrs>(
       gen::arbitrary<
           ::FlexFlow::stack_vector<::FlexFlow::ff_dim_t, MAX_TENSOR_DIM>>(),
       gen::arbitrary<::FlexFlow::OperatorType>(),
diff --git a/lib/op-attrs/src/op-attrs/ops/reduction_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/reduction_attrs.dtg.cc
index 2f1550bb66..b861676f2b 100644
--- a/lib/op-attrs/src/op-attrs/ops/reduction_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/reduction_attrs.dtg.cc
@@ -36,7 +36,7 @@ bool ReductionAttrs::operator>=(ReductionAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ReductionAttrs>::operator()(
-    FlexFlow::ReductionAttrs const &x) const {
+    ::FlexFlow::ReductionAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<int>{}(x.reduction_degree) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -45,20 +45,22 @@ size_t hash<FlexFlow::ReductionAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ReductionAttrs
-    adl_serializer<FlexFlow::ReductionAttrs>::from_json(json const &j) {
-  return {j.at("reduction_degree").template get<int>()};
+::FlexFlow::ReductionAttrs
+    adl_serializer<::FlexFlow::ReductionAttrs>::from_json(json const &j) {
+  return ::FlexFlow::ReductionAttrs{
+      j.at("reduction_degree").template get<int>()};
 }
-void adl_serializer<FlexFlow::ReductionAttrs>::to_json(
-    json &j, FlexFlow::ReductionAttrs const &v) {
+void adl_serializer<::FlexFlow::ReductionAttrs>::to_json(
+    json &j, ::FlexFlow::ReductionAttrs const &v) {
   j["__type"] = "ReductionAttrs";
   j["reduction_degree"] = v.reduction_degree;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ReductionAttrs> Arbitrary<FlexFlow::ReductionAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::ReductionAttrs>(gen::arbitrary<int>());
+Gen<::FlexFlow::ReductionAttrs>
+    Arbitrary<::FlexFlow::ReductionAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::ReductionAttrs>(gen::arbitrary<int>());
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/ops/repartition_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/repartition_attrs.dtg.cc
index 6270298c87..110e16c36a 100644
--- a/lib/op-attrs/src/op-attrs/ops/repartition_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/repartition_attrs.dtg.cc
@@ -46,7 +46,7 @@ bool RepartitionAttrs::operator>=(RepartitionAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::RepartitionAttrs>::operator()(
-    FlexFlow::RepartitionAttrs const &x) const {
+    ::FlexFlow::RepartitionAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::ff_dim_t>{}(x.repartition_dim) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -57,13 +57,14 @@ size_t hash<FlexFlow::RepartitionAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::RepartitionAttrs
-    adl_serializer<FlexFlow::RepartitionAttrs>::from_json(json const &j) {
-  return {j.at("repartition_dim").template get<::FlexFlow::ff_dim_t>(),
-          j.at("repartition_degree").template get<int>()};
+::FlexFlow::RepartitionAttrs
+    adl_serializer<::FlexFlow::RepartitionAttrs>::from_json(json const &j) {
+  return ::FlexFlow::RepartitionAttrs{
+      j.at("repartition_dim").template get<::FlexFlow::ff_dim_t>(),
+      j.at("repartition_degree").template get<int>()};
 }
-void adl_serializer<FlexFlow::RepartitionAttrs>::to_json(
-    json &j, FlexFlow::RepartitionAttrs const &v) {
+void adl_serializer<::FlexFlow::RepartitionAttrs>::to_json(
+    json &j, ::FlexFlow::RepartitionAttrs const &v) {
   j["__type"] = "RepartitionAttrs";
   j["repartition_dim"] = v.repartition_dim;
   j["repartition_degree"] = v.repartition_degree;
@@ -71,9 +72,9 @@ void adl_serializer<FlexFlow::RepartitionAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::RepartitionAttrs>
-    Arbitrary<FlexFlow::RepartitionAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::RepartitionAttrs>(
+Gen<::FlexFlow::RepartitionAttrs>
+    Arbitrary<::FlexFlow::RepartitionAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::RepartitionAttrs>(
       gen::arbitrary<::FlexFlow::ff_dim_t>(), gen::arbitrary<int>());
 }
 } // namespace rc
diff --git a/lib/op-attrs/src/op-attrs/ops/replicate_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/replicate_attrs.dtg.cc
index 930c5beaf4..bdac2d8c81 100644
--- a/lib/op-attrs/src/op-attrs/ops/replicate_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/replicate_attrs.dtg.cc
@@ -36,7 +36,7 @@ bool ReplicateAttrs::operator>=(ReplicateAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ReplicateAttrs>::operator()(
-    FlexFlow::ReplicateAttrs const &x) const {
+    ::FlexFlow::ReplicateAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<int>{}(x.replicate_degree) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -45,20 +45,22 @@ size_t hash<FlexFlow::ReplicateAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ReplicateAttrs
-    adl_serializer<FlexFlow::ReplicateAttrs>::from_json(json const &j) {
-  return {j.at("replicate_degree").template get<int>()};
+::FlexFlow::ReplicateAttrs
+    adl_serializer<::FlexFlow::ReplicateAttrs>::from_json(json const &j) {
+  return ::FlexFlow::ReplicateAttrs{
+      j.at("replicate_degree").template get<int>()};
 }
-void adl_serializer<FlexFlow::ReplicateAttrs>::to_json(
-    json &j, FlexFlow::ReplicateAttrs const &v) {
+void adl_serializer<::FlexFlow::ReplicateAttrs>::to_json(
+    json &j, ::FlexFlow::ReplicateAttrs const &v) {
   j["__type"] = "ReplicateAttrs";
   j["replicate_degree"] = v.replicate_degree;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ReplicateAttrs> Arbitrary<FlexFlow::ReplicateAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::ReplicateAttrs>(gen::arbitrary<int>());
+Gen<::FlexFlow::ReplicateAttrs>
+    Arbitrary<::FlexFlow::ReplicateAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::ReplicateAttrs>(gen::arbitrary<int>());
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/ops/reshape_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/reshape_attrs.dtg.cc
index b1fb350b88..de18a192ff 100644
--- a/lib/op-attrs/src/op-attrs/ops/reshape_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/reshape_attrs.dtg.cc
@@ -37,7 +37,7 @@ bool ReshapeAttrs::operator>=(ReshapeAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ReshapeAttrs>::operator()(
-    FlexFlow::ReshapeAttrs const &x) const {
+    ::FlexFlow::ReshapeAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::TensorShape>{}(x.shape) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -46,20 +46,21 @@ size_t hash<FlexFlow::ReshapeAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ReshapeAttrs
-    adl_serializer<FlexFlow::ReshapeAttrs>::from_json(json const &j) {
-  return {j.at("shape").template get<::FlexFlow::TensorShape>()};
+::FlexFlow::ReshapeAttrs
+    adl_serializer<::FlexFlow::ReshapeAttrs>::from_json(json const &j) {
+  return ::FlexFlow::ReshapeAttrs{
+      j.at("shape").template get<::FlexFlow::TensorShape>()};
 }
-void adl_serializer<FlexFlow::ReshapeAttrs>::to_json(
-    json &j, FlexFlow::ReshapeAttrs const &v) {
+void adl_serializer<::FlexFlow::ReshapeAttrs>::to_json(
+    json &j, ::FlexFlow::ReshapeAttrs const &v) {
   j["__type"] = "ReshapeAttrs";
   j["shape"] = v.shape;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ReshapeAttrs> Arbitrary<FlexFlow::ReshapeAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::ReshapeAttrs>(
+Gen<::FlexFlow::ReshapeAttrs> Arbitrary<::FlexFlow::ReshapeAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::ReshapeAttrs>(
       gen::arbitrary<::FlexFlow::TensorShape>());
 }
 } // namespace rc
diff --git a/lib/op-attrs/src/op-attrs/ops/reverse_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/reverse_attrs.dtg.cc
index 9ac9abeb82..9e8079d666 100644
--- a/lib/op-attrs/src/op-attrs/ops/reverse_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/reverse_attrs.dtg.cc
@@ -37,7 +37,7 @@ bool ReverseAttrs::operator>=(ReverseAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ReverseAttrs>::operator()(
-    FlexFlow::ReverseAttrs const &x) const {
+    ::FlexFlow::ReverseAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::ff_dim_t>{}(x.axis) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -46,20 +46,21 @@ size_t hash<FlexFlow::ReverseAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ReverseAttrs
-    adl_serializer<FlexFlow::ReverseAttrs>::from_json(json const &j) {
-  return {j.at("axis").template get<::FlexFlow::ff_dim_t>()};
+::FlexFlow::ReverseAttrs
+    adl_serializer<::FlexFlow::ReverseAttrs>::from_json(json const &j) {
+  return ::FlexFlow::ReverseAttrs{
+      j.at("axis").template get<::FlexFlow::ff_dim_t>()};
 }
-void adl_serializer<FlexFlow::ReverseAttrs>::to_json(
-    json &j, FlexFlow::ReverseAttrs const &v) {
+void adl_serializer<::FlexFlow::ReverseAttrs>::to_json(
+    json &j, ::FlexFlow::ReverseAttrs const &v) {
   j["__type"] = "ReverseAttrs";
   j["axis"] = v.axis;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ReverseAttrs> Arbitrary<FlexFlow::ReverseAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::ReverseAttrs>(
+Gen<::FlexFlow::ReverseAttrs> Arbitrary<::FlexFlow::ReverseAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::ReverseAttrs>(
       gen::arbitrary<::FlexFlow::ff_dim_t>());
 }
 } // namespace rc
diff --git a/lib/op-attrs/src/op-attrs/ops/softmax_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/softmax_attrs.dtg.cc
index 4941b7438a..1d4d396ef3 100644
--- a/lib/op-attrs/src/op-attrs/ops/softmax_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/softmax_attrs.dtg.cc
@@ -37,7 +37,7 @@ bool SoftmaxAttrs::operator>=(SoftmaxAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::SoftmaxAttrs>::operator()(
-    FlexFlow::SoftmaxAttrs const &x) const {
+    ::FlexFlow::SoftmaxAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::ff_dim_t>{}(x.dim) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -46,20 +46,21 @@ size_t hash<FlexFlow::SoftmaxAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::SoftmaxAttrs
-    adl_serializer<FlexFlow::SoftmaxAttrs>::from_json(json const &j) {
-  return {j.at("dim").template get<::FlexFlow::ff_dim_t>()};
+::FlexFlow::SoftmaxAttrs
+    adl_serializer<::FlexFlow::SoftmaxAttrs>::from_json(json const &j) {
+  return ::FlexFlow::SoftmaxAttrs{
+      j.at("dim").template get<::FlexFlow::ff_dim_t>()};
 }
-void adl_serializer<FlexFlow::SoftmaxAttrs>::to_json(
-    json &j, FlexFlow::SoftmaxAttrs const &v) {
+void adl_serializer<::FlexFlow::SoftmaxAttrs>::to_json(
+    json &j, ::FlexFlow::SoftmaxAttrs const &v) {
   j["__type"] = "SoftmaxAttrs";
   j["dim"] = v.dim;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::SoftmaxAttrs> Arbitrary<FlexFlow::SoftmaxAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::SoftmaxAttrs>(
+Gen<::FlexFlow::SoftmaxAttrs> Arbitrary<::FlexFlow::SoftmaxAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::SoftmaxAttrs>(
       gen::arbitrary<::FlexFlow::ff_dim_t>());
 }
 } // namespace rc
diff --git a/lib/op-attrs/src/op-attrs/ops/split_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/split_attrs.dtg.cc
index c6f7e75dbf..bdae47681e 100644
--- a/lib/op-attrs/src/op-attrs/ops/split_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/split_attrs.dtg.cc
@@ -47,7 +47,7 @@ bool SplitAttrs::operator>=(SplitAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::SplitAttrs>::operator()(
-    FlexFlow::SplitAttrs const &x) const {
+    ::FlexFlow::SplitAttrs const &x) const {
   size_t result = 0;
   result ^=
       std::hash<::FlexFlow::stack_vector<int, MAX_NUM_OUTPUTS>>{}(x.splits) +
@@ -59,14 +59,15 @@ size_t hash<FlexFlow::SplitAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::SplitAttrs
-    adl_serializer<FlexFlow::SplitAttrs>::from_json(json const &j) {
-  return {j.at("splits")
-              .template get<::FlexFlow::stack_vector<int, MAX_NUM_OUTPUTS>>(),
-          j.at("axis").template get<::FlexFlow::ff_dim_t>()};
+::FlexFlow::SplitAttrs
+    adl_serializer<::FlexFlow::SplitAttrs>::from_json(json const &j) {
+  return ::FlexFlow::SplitAttrs{
+      j.at("splits")
+          .template get<::FlexFlow::stack_vector<int, MAX_NUM_OUTPUTS>>(),
+      j.at("axis").template get<::FlexFlow::ff_dim_t>()};
 }
-void adl_serializer<FlexFlow::SplitAttrs>::to_json(
-    json &j, FlexFlow::SplitAttrs const &v) {
+void adl_serializer<::FlexFlow::SplitAttrs>::to_json(
+    json &j, ::FlexFlow::SplitAttrs const &v) {
   j["__type"] = "SplitAttrs";
   j["splits"] = v.splits;
   j["axis"] = v.axis;
@@ -74,8 +75,8 @@ void adl_serializer<FlexFlow::SplitAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::SplitAttrs> Arbitrary<FlexFlow::SplitAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::SplitAttrs>(
+Gen<::FlexFlow::SplitAttrs> Arbitrary<::FlexFlow::SplitAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::SplitAttrs>(
       gen::arbitrary<::FlexFlow::stack_vector<int, MAX_NUM_OUTPUTS>>(),
       gen::arbitrary<::FlexFlow::ff_dim_t>());
 }
diff --git a/lib/op-attrs/src/op-attrs/ops/topk_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/topk_attrs.dtg.cc
index 55ead7d858..9723c063a5 100644
--- a/lib/op-attrs/src/op-attrs/ops/topk_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/topk_attrs.dtg.cc
@@ -34,8 +34,8 @@ bool TopKAttrs::operator>=(TopKAttrs const &other) const {
 } // namespace FlexFlow
 
 namespace std {
-size_t
-    hash<FlexFlow::TopKAttrs>::operator()(FlexFlow::TopKAttrs const &x) const {
+size_t hash<FlexFlow::TopKAttrs>::operator()(
+    ::FlexFlow::TopKAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<int>{}(x.k) + 0x9e3779b9 + (result << 6) + (result >> 2);
   result ^=
@@ -45,12 +45,13 @@ size_t
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::TopKAttrs
-    adl_serializer<FlexFlow::TopKAttrs>::from_json(json const &j) {
-  return {j.at("k").template get<int>(), j.at("sorted").template get<bool>()};
+::FlexFlow::TopKAttrs
+    adl_serializer<::FlexFlow::TopKAttrs>::from_json(json const &j) {
+  return ::FlexFlow::TopKAttrs{j.at("k").template get<int>(),
+                               j.at("sorted").template get<bool>()};
 }
-void adl_serializer<FlexFlow::TopKAttrs>::to_json(
-    json &j, FlexFlow::TopKAttrs const &v) {
+void adl_serializer<::FlexFlow::TopKAttrs>::to_json(
+    json &j, ::FlexFlow::TopKAttrs const &v) {
   j["__type"] = "TopKAttrs";
   j["k"] = v.k;
   j["sorted"] = v.sorted;
@@ -58,9 +59,9 @@ void adl_serializer<FlexFlow::TopKAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::TopKAttrs> Arbitrary<FlexFlow::TopKAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::TopKAttrs>(gen::arbitrary<int>(),
-                                             gen::arbitrary<bool>());
+Gen<::FlexFlow::TopKAttrs> Arbitrary<::FlexFlow::TopKAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::TopKAttrs>(gen::arbitrary<int>(),
+                                               gen::arbitrary<bool>());
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/ops/transpose_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/transpose_attrs.dtg.cc
index 0a774b992e..23e78beb7a 100644
--- a/lib/op-attrs/src/op-attrs/ops/transpose_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/transpose_attrs.dtg.cc
@@ -40,7 +40,7 @@ bool TransposeAttrs::operator>=(TransposeAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::TransposeAttrs>::operator()(
-    FlexFlow::TransposeAttrs const &x) const {
+    ::FlexFlow::TransposeAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::FFOrdered<::FlexFlow::ff_dim_t>>{}(x.perm) +
             0x9e3779b9 + (result << 6) + (result >> 2);
@@ -49,21 +49,22 @@ size_t hash<FlexFlow::TransposeAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::TransposeAttrs
-    adl_serializer<FlexFlow::TransposeAttrs>::from_json(json const &j) {
-  return {
+::FlexFlow::TransposeAttrs
+    adl_serializer<::FlexFlow::TransposeAttrs>::from_json(json const &j) {
+  return ::FlexFlow::TransposeAttrs{
       j.at("perm").template get<::FlexFlow::FFOrdered<::FlexFlow::ff_dim_t>>()};
 }
-void adl_serializer<FlexFlow::TransposeAttrs>::to_json(
-    json &j, FlexFlow::TransposeAttrs const &v) {
+void adl_serializer<::FlexFlow::TransposeAttrs>::to_json(
+    json &j, ::FlexFlow::TransposeAttrs const &v) {
   j["__type"] = "TransposeAttrs";
   j["perm"] = v.perm;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::TransposeAttrs> Arbitrary<FlexFlow::TransposeAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::TransposeAttrs>(
+Gen<::FlexFlow::TransposeAttrs>
+    Arbitrary<::FlexFlow::TransposeAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::TransposeAttrs>(
       gen::arbitrary<::FlexFlow::FFOrdered<::FlexFlow::ff_dim_t>>());
 }
 } // namespace rc
diff --git a/lib/op-attrs/src/op-attrs/ops/weight_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/ops/weight_attrs.dtg.cc
index a288161da2..03ad9f469c 100644
--- a/lib/op-attrs/src/op-attrs/ops/weight_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/ops/weight_attrs.dtg.cc
@@ -34,26 +34,26 @@ bool WeightAttrs::operator>=(WeightAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::WeightAttrs>::operator()(
-    FlexFlow::WeightAttrs const &x) const {
+    ::FlexFlow::WeightAttrs const &x) const {
   size_t result = 0;
   return result;
 }
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::WeightAttrs
-    adl_serializer<FlexFlow::WeightAttrs>::from_json(json const &j) {
-  return {};
+::FlexFlow::WeightAttrs
+    adl_serializer<::FlexFlow::WeightAttrs>::from_json(json const &j) {
+  return ::FlexFlow::WeightAttrs{};
 }
-void adl_serializer<FlexFlow::WeightAttrs>::to_json(
-    json &j, FlexFlow::WeightAttrs const &v) {
+void adl_serializer<::FlexFlow::WeightAttrs>::to_json(
+    json &j, ::FlexFlow::WeightAttrs const &v) {
   j["__type"] = "WeightAttrs";
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::WeightAttrs> Arbitrary<FlexFlow::WeightAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::WeightAttrs>();
+Gen<::FlexFlow::WeightAttrs> Arbitrary<::FlexFlow::WeightAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::WeightAttrs>();
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_dims.dtg.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_dims.dtg.cc
index 40be73cb9f..3cad12b4fa 100644
--- a/lib/op-attrs/src/op-attrs/parallel_tensor_dims.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/parallel_tensor_dims.dtg.cc
@@ -50,7 +50,7 @@ bool ParallelTensorDims::operator>=(ParallelTensorDims const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ParallelTensorDims>::operator()(
-    FlexFlow::ParallelTensorDims const &x) const {
+    ::FlexFlow::ParallelTensorDims const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::FFOrdered<::FlexFlow::ShardParallelDim>>{}(
                 x.shard_dims) +
@@ -62,15 +62,15 @@ size_t hash<FlexFlow::ParallelTensorDims>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ParallelTensorDims
-    adl_serializer<FlexFlow::ParallelTensorDims>::from_json(json const &j) {
-  return {
+::FlexFlow::ParallelTensorDims
+    adl_serializer<::FlexFlow::ParallelTensorDims>::from_json(json const &j) {
+  return ::FlexFlow::ParallelTensorDims{
       j.at("shard_dims")
           .template get<::FlexFlow::FFOrdered<::FlexFlow::ShardParallelDim>>(),
       j.at("replica_dims").template get<::FlexFlow::ReplicaParallelDimSet>()};
 }
-void adl_serializer<FlexFlow::ParallelTensorDims>::to_json(
-    json &j, FlexFlow::ParallelTensorDims const &v) {
+void adl_serializer<::FlexFlow::ParallelTensorDims>::to_json(
+    json &j, ::FlexFlow::ParallelTensorDims const &v) {
   j["__type"] = "ParallelTensorDims";
   j["shard_dims"] = v.shard_dims;
   j["replica_dims"] = v.replica_dims;
@@ -78,9 +78,9 @@ void adl_serializer<FlexFlow::ParallelTensorDims>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ParallelTensorDims>
-    Arbitrary<FlexFlow::ParallelTensorDims>::arbitrary() {
-  return gen::construct<FlexFlow::ParallelTensorDims>(
+Gen<::FlexFlow::ParallelTensorDims>
+    Arbitrary<::FlexFlow::ParallelTensorDims>::arbitrary() {
+  return gen::construct<::FlexFlow::ParallelTensorDims>(
       gen::arbitrary<::FlexFlow::FFOrdered<::FlexFlow::ShardParallelDim>>(),
       gen::arbitrary<::FlexFlow::ReplicaParallelDimSet>());
 }
diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc
index 516cbe191f..e2ba10c7bb 100644
--- a/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc
+++ b/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc
@@ -58,7 +58,7 @@ std::optional<ShardParallelDim>
 }
 
 ParallelTensorShape lift_to_parallel(TensorShape const &s) {
-  return {lift_to_parallel(s.dims), s.data_type};
+  return ParallelTensorShape{lift_to_parallel(s.dims), s.data_type};
 }
 
 ParallelTensorShape
diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_shape.dtg.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_shape.dtg.cc
index 1fe82ce108..3a509de7f0 100644
--- a/lib/op-attrs/src/op-attrs/parallel_tensor_shape.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/parallel_tensor_shape.dtg.cc
@@ -46,7 +46,7 @@ bool ParallelTensorShape::operator>=(ParallelTensorShape const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ParallelTensorShape>::operator()(
-    FlexFlow::ParallelTensorShape const &x) const {
+    ::FlexFlow::ParallelTensorShape const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::ParallelTensorDims>{}(x.dims) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -57,13 +57,14 @@ size_t hash<FlexFlow::ParallelTensorShape>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ParallelTensorShape
-    adl_serializer<FlexFlow::ParallelTensorShape>::from_json(json const &j) {
-  return {j.at("dims").template get<::FlexFlow::ParallelTensorDims>(),
-          j.at("data_type").template get<::FlexFlow::DataType>()};
+::FlexFlow::ParallelTensorShape
+    adl_serializer<::FlexFlow::ParallelTensorShape>::from_json(json const &j) {
+  return ::FlexFlow::ParallelTensorShape{
+      j.at("dims").template get<::FlexFlow::ParallelTensorDims>(),
+      j.at("data_type").template get<::FlexFlow::DataType>()};
 }
-void adl_serializer<FlexFlow::ParallelTensorShape>::to_json(
-    json &j, FlexFlow::ParallelTensorShape const &v) {
+void adl_serializer<::FlexFlow::ParallelTensorShape>::to_json(
+    json &j, ::FlexFlow::ParallelTensorShape const &v) {
   j["__type"] = "ParallelTensorShape";
   j["dims"] = v.dims;
   j["data_type"] = v.data_type;
@@ -71,9 +72,9 @@ void adl_serializer<FlexFlow::ParallelTensorShape>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ParallelTensorShape>
-    Arbitrary<FlexFlow::ParallelTensorShape>::arbitrary() {
-  return gen::construct<FlexFlow::ParallelTensorShape>(
+Gen<::FlexFlow::ParallelTensorShape>
+    Arbitrary<::FlexFlow::ParallelTensorShape>::arbitrary() {
+  return gen::construct<::FlexFlow::ParallelTensorShape>(
       gen::arbitrary<::FlexFlow::ParallelTensorDims>(),
       gen::arbitrary<::FlexFlow::DataType>());
 }
diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_shape/discard_copy_degree.dtg.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_shape/discard_copy_degree.dtg.cc
index 4547a5df9b..cdea7bb484 100644
--- a/lib/op-attrs/src/op-attrs/parallel_tensor_shape/discard_copy_degree.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/parallel_tensor_shape/discard_copy_degree.dtg.cc
@@ -35,7 +35,7 @@ bool DiscardCopyDegree::operator>=(DiscardCopyDegree const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::DiscardCopyDegree>::operator()(
-    FlexFlow::DiscardCopyDegree const &x) const {
+    ::FlexFlow::DiscardCopyDegree const &x) const {
   size_t result = 0;
   result ^=
       std::hash<int>{}(x.value) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -44,21 +44,21 @@ size_t hash<FlexFlow::DiscardCopyDegree>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::DiscardCopyDegree
-    adl_serializer<FlexFlow::DiscardCopyDegree>::from_json(json const &j) {
-  return {j.at("value").template get<int>()};
+::FlexFlow::DiscardCopyDegree
+    adl_serializer<::FlexFlow::DiscardCopyDegree>::from_json(json const &j) {
+  return ::FlexFlow::DiscardCopyDegree{j.at("value").template get<int>()};
 }
-void adl_serializer<FlexFlow::DiscardCopyDegree>::to_json(
-    json &j, FlexFlow::DiscardCopyDegree const &v) {
+void adl_serializer<::FlexFlow::DiscardCopyDegree>::to_json(
+    json &j, ::FlexFlow::DiscardCopyDegree const &v) {
   j["__type"] = "DiscardCopyDegree";
   j["value"] = v.value;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::DiscardCopyDegree>
-    Arbitrary<FlexFlow::DiscardCopyDegree>::arbitrary() {
-  return gen::construct<FlexFlow::DiscardCopyDegree>(gen::arbitrary<int>());
+Gen<::FlexFlow::DiscardCopyDegree>
+    Arbitrary<::FlexFlow::DiscardCopyDegree>::arbitrary() {
+  return gen::construct<::FlexFlow::DiscardCopyDegree>(gen::arbitrary<int>());
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_shape/sum_degree.dtg.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_shape/sum_degree.dtg.cc
index cf159a1ea7..9dbc095f84 100644
--- a/lib/op-attrs/src/op-attrs/parallel_tensor_shape/sum_degree.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/parallel_tensor_shape/sum_degree.dtg.cc
@@ -34,8 +34,8 @@ bool SumDegree::operator>=(SumDegree const &other) const {
 } // namespace FlexFlow
 
 namespace std {
-size_t
-    hash<FlexFlow::SumDegree>::operator()(FlexFlow::SumDegree const &x) const {
+size_t hash<FlexFlow::SumDegree>::operator()(
+    ::FlexFlow::SumDegree const &x) const {
   size_t result = 0;
   result ^=
       std::hash<int>{}(x.value) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -44,20 +44,20 @@ size_t
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::SumDegree
-    adl_serializer<FlexFlow::SumDegree>::from_json(json const &j) {
-  return {j.at("value").template get<int>()};
+::FlexFlow::SumDegree
+    adl_serializer<::FlexFlow::SumDegree>::from_json(json const &j) {
+  return ::FlexFlow::SumDegree{j.at("value").template get<int>()};
 }
-void adl_serializer<FlexFlow::SumDegree>::to_json(
-    json &j, FlexFlow::SumDegree const &v) {
+void adl_serializer<::FlexFlow::SumDegree>::to_json(
+    json &j, ::FlexFlow::SumDegree const &v) {
   j["__type"] = "SumDegree";
   j["value"] = v.value;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::SumDegree> Arbitrary<FlexFlow::SumDegree>::arbitrary() {
-  return gen::construct<FlexFlow::SumDegree>(gen::arbitrary<int>());
+Gen<::FlexFlow::SumDegree> Arbitrary<::FlexFlow::SumDegree>::arbitrary() {
+  return gen::construct<::FlexFlow::SumDegree>(gen::arbitrary<int>());
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/pcg_operator_attrs.dtg.cc b/lib/op-attrs/src/op-attrs/pcg_operator_attrs.dtg.cc
index 56031677c8..ada1178ac7 100644
--- a/lib/op-attrs/src/op-attrs/pcg_operator_attrs.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/pcg_operator_attrs.dtg.cc
@@ -3,7 +3,7 @@
 // lib/op-attrs/include/op-attrs/pcg_operator_attrs.variant.toml
 /* proj-data
 {
-  "generated_from": "9149c47c2055195f15966ae7a3f619ff"
+  "generated_from": "72d324ec59ca0c5a390458ea20e79338"
 }
 */
 
@@ -70,6 +70,8 @@ PCGOperatorAttrs::PCGOperatorAttrs(::FlexFlow::TopKAttrs const &v)
     : raw_variant(v) {}
 PCGOperatorAttrs::PCGOperatorAttrs(::FlexFlow::TransposeAttrs const &v)
     : raw_variant(v) {}
+PCGOperatorAttrs::PCGOperatorAttrs(::FlexFlow::WeightAttrs const &v)
+    : raw_variant(v) {}
 bool PCGOperatorAttrs::operator==(PCGOperatorAttrs const &other) const {
   return this->raw_variant == other.raw_variant;
 }
@@ -119,7 +121,8 @@ size_t hash<::FlexFlow::PCGOperatorAttrs>::operator()(
                                 ::FlexFlow::SplitAttrs,
                                 ::FlexFlow::SoftmaxAttrs,
                                 ::FlexFlow::TopKAttrs,
-                                ::FlexFlow::TransposeAttrs>>{}(x.raw_variant);
+                                ::FlexFlow::TransposeAttrs,
+                                ::FlexFlow::WeightAttrs>>{}(x.raw_variant);
 }
 } // namespace std
 namespace nlohmann {
@@ -210,6 +213,9 @@ ::FlexFlow::PCGOperatorAttrs
   } else if (key == "transpose") {
     return ::FlexFlow::PCGOperatorAttrs{
         j.at("value").template get<::FlexFlow::TransposeAttrs>()};
+  } else if (key == "weight") {
+    return ::FlexFlow::PCGOperatorAttrs{
+        j.at("value").template get<::FlexFlow::WeightAttrs>()};
   } else {
     throw std::runtime_error(fmt::format("Unknown type key {}", key));
   }
@@ -358,6 +364,11 @@ void adl_serializer<::FlexFlow::PCGOperatorAttrs>::to_json(
       j["value"] = x.get<::FlexFlow::TransposeAttrs>();
       break;
     }
+    case 28: {
+      j["type"] = "weight";
+      j["value"] = x.get<::FlexFlow::WeightAttrs>();
+      break;
+    }
     default: {
       throw std::runtime_error(
           fmt::format("Unknown index {} for type PCGOperatorAttrs", x.index()));
@@ -423,7 +434,9 @@ Gen<::FlexFlow::PCGOperatorAttrs>
                     gen::construct<::FlexFlow::PCGOperatorAttrs>(
                         gen::arbitrary<::FlexFlow::TopKAttrs>()),
                     gen::construct<::FlexFlow::PCGOperatorAttrs>(
-                        gen::arbitrary<::FlexFlow::TransposeAttrs>()));
+                        gen::arbitrary<::FlexFlow::TransposeAttrs>()),
+                    gen::construct<::FlexFlow::PCGOperatorAttrs>(
+                        gen::arbitrary<::FlexFlow::WeightAttrs>()));
 }
 } // namespace rc
 namespace FlexFlow {
@@ -566,6 +579,11 @@ std::string format_as(::FlexFlow::PCGOperatorAttrs const &x) {
           << x.get<::FlexFlow::TransposeAttrs>() << ">";
       break;
     }
+    case 28: {
+      oss << "<PCGOperatorAttrs weight=" << x.get<::FlexFlow::WeightAttrs>()
+          << ">";
+      break;
+    }
     default: {
       throw std::runtime_error(
           fmt::format("Unknown index {} for type PCGOperatorAttrs", x.index()));
diff --git a/lib/op-attrs/src/op-attrs/replica_parallel_dim.dtg.cc b/lib/op-attrs/src/op-attrs/replica_parallel_dim.dtg.cc
index a1256ad79a..ed45115c77 100644
--- a/lib/op-attrs/src/op-attrs/replica_parallel_dim.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/replica_parallel_dim.dtg.cc
@@ -44,7 +44,7 @@ bool ReplicaParallelDim::operator>=(ReplicaParallelDim const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ReplicaParallelDim>::operator()(
-    FlexFlow::ReplicaParallelDim const &x) const {
+    ::FlexFlow::ReplicaParallelDim const &x) const {
   size_t result = 0;
   result ^=
       std::hash<int>{}(x.degree) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -55,13 +55,14 @@ size_t hash<FlexFlow::ReplicaParallelDim>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ReplicaParallelDim
-    adl_serializer<FlexFlow::ReplicaParallelDim>::from_json(json const &j) {
-  return {j.at("degree").template get<int>(),
-          j.at("replica_type").template get<::FlexFlow::ReplicaType>()};
+::FlexFlow::ReplicaParallelDim
+    adl_serializer<::FlexFlow::ReplicaParallelDim>::from_json(json const &j) {
+  return ::FlexFlow::ReplicaParallelDim{
+      j.at("degree").template get<int>(),
+      j.at("replica_type").template get<::FlexFlow::ReplicaType>()};
 }
-void adl_serializer<FlexFlow::ReplicaParallelDim>::to_json(
-    json &j, FlexFlow::ReplicaParallelDim const &v) {
+void adl_serializer<::FlexFlow::ReplicaParallelDim>::to_json(
+    json &j, ::FlexFlow::ReplicaParallelDim const &v) {
   j["__type"] = "ReplicaParallelDim";
   j["degree"] = v.degree;
   j["replica_type"] = v.replica_type;
@@ -69,9 +70,9 @@ void adl_serializer<FlexFlow::ReplicaParallelDim>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ReplicaParallelDim>
-    Arbitrary<FlexFlow::ReplicaParallelDim>::arbitrary() {
-  return gen::construct<FlexFlow::ReplicaParallelDim>(
+Gen<::FlexFlow::ReplicaParallelDim>
+    Arbitrary<::FlexFlow::ReplicaParallelDim>::arbitrary() {
+  return gen::construct<::FlexFlow::ReplicaParallelDim>(
       gen::arbitrary<int>(), gen::arbitrary<::FlexFlow::ReplicaType>());
 }
 } // namespace rc
diff --git a/lib/op-attrs/src/op-attrs/replica_parallel_dim_set.cc b/lib/op-attrs/src/op-attrs/replica_parallel_dim_set.cc
index 7ef228e97e..20c88c77dc 100644
--- a/lib/op-attrs/src/op-attrs/replica_parallel_dim_set.cc
+++ b/lib/op-attrs/src/op-attrs/replica_parallel_dim_set.cc
@@ -4,7 +4,7 @@
 namespace FlexFlow {
 
 ReplicaParallelDimSet empty_replica_parallel_dim_set() {
-  return ReplicaParallelDimSet{1, 1};
+  return ReplicaParallelDimSet{SumDegree{1}, DiscardCopyDegree{1}};
 }
 
 int get_order_of_replica_type(ReplicaParallelDimSet const &s,
diff --git a/lib/op-attrs/src/op-attrs/replica_parallel_dim_set.dtg.cc b/lib/op-attrs/src/op-attrs/replica_parallel_dim_set.dtg.cc
index f8782be01b..1d11006523 100644
--- a/lib/op-attrs/src/op-attrs/replica_parallel_dim_set.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/replica_parallel_dim_set.dtg.cc
@@ -52,7 +52,7 @@ bool ReplicaParallelDimSet::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::ReplicaParallelDimSet>::operator()(
-    FlexFlow::ReplicaParallelDimSet const &x) const {
+    ::FlexFlow::ReplicaParallelDimSet const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::SumDegree>{}(x.sum_degree) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -63,14 +63,16 @@ size_t hash<FlexFlow::ReplicaParallelDimSet>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ReplicaParallelDimSet
-    adl_serializer<FlexFlow::ReplicaParallelDimSet>::from_json(json const &j) {
-  return {j.at("sum_degree").template get<::FlexFlow::SumDegree>(),
-          j.at("discard_copy_degree")
-              .template get<::FlexFlow::DiscardCopyDegree>()};
+::FlexFlow::ReplicaParallelDimSet
+    adl_serializer<::FlexFlow::ReplicaParallelDimSet>::from_json(
+        json const &j) {
+  return ::FlexFlow::ReplicaParallelDimSet{
+      j.at("sum_degree").template get<::FlexFlow::SumDegree>(),
+      j.at("discard_copy_degree")
+          .template get<::FlexFlow::DiscardCopyDegree>()};
 }
-void adl_serializer<FlexFlow::ReplicaParallelDimSet>::to_json(
-    json &j, FlexFlow::ReplicaParallelDimSet const &v) {
+void adl_serializer<::FlexFlow::ReplicaParallelDimSet>::to_json(
+    json &j, ::FlexFlow::ReplicaParallelDimSet const &v) {
   j["__type"] = "ReplicaParallelDimSet";
   j["sum_degree"] = v.sum_degree;
   j["discard_copy_degree"] = v.discard_copy_degree;
@@ -78,9 +80,9 @@ void adl_serializer<FlexFlow::ReplicaParallelDimSet>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ReplicaParallelDimSet>
-    Arbitrary<FlexFlow::ReplicaParallelDimSet>::arbitrary() {
-  return gen::construct<FlexFlow::ReplicaParallelDimSet>(
+Gen<::FlexFlow::ReplicaParallelDimSet>
+    Arbitrary<::FlexFlow::ReplicaParallelDimSet>::arbitrary() {
+  return gen::construct<::FlexFlow::ReplicaParallelDimSet>(
       gen::arbitrary<::FlexFlow::SumDegree>(),
       gen::arbitrary<::FlexFlow::DiscardCopyDegree>());
 }
diff --git a/lib/op-attrs/src/op-attrs/shard_parallel_dim.dtg.cc b/lib/op-attrs/src/op-attrs/shard_parallel_dim.dtg.cc
index 9566eb486b..fba9e1b8f7 100644
--- a/lib/op-attrs/src/op-attrs/shard_parallel_dim.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/shard_parallel_dim.dtg.cc
@@ -42,7 +42,7 @@ bool ShardParallelDim::operator>=(ShardParallelDim const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ShardParallelDim>::operator()(
-    FlexFlow::ShardParallelDim const &x) const {
+    ::FlexFlow::ShardParallelDim const &x) const {
   size_t result = 0;
   result ^=
       std::hash<size_t>{}(x.size) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -53,13 +53,13 @@ size_t hash<FlexFlow::ShardParallelDim>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ShardParallelDim
-    adl_serializer<FlexFlow::ShardParallelDim>::from_json(json const &j) {
-  return {j.at("size").template get<size_t>(),
-          j.at("degree").template get<int>()};
+::FlexFlow::ShardParallelDim
+    adl_serializer<::FlexFlow::ShardParallelDim>::from_json(json const &j) {
+  return ::FlexFlow::ShardParallelDim{j.at("size").template get<size_t>(),
+                                      j.at("degree").template get<int>()};
 }
-void adl_serializer<FlexFlow::ShardParallelDim>::to_json(
-    json &j, FlexFlow::ShardParallelDim const &v) {
+void adl_serializer<::FlexFlow::ShardParallelDim>::to_json(
+    json &j, ::FlexFlow::ShardParallelDim const &v) {
   j["__type"] = "ShardParallelDim";
   j["size"] = v.size;
   j["degree"] = v.degree;
@@ -67,10 +67,10 @@ void adl_serializer<FlexFlow::ShardParallelDim>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ShardParallelDim>
-    Arbitrary<FlexFlow::ShardParallelDim>::arbitrary() {
-  return gen::construct<FlexFlow::ShardParallelDim>(gen::arbitrary<size_t>(),
-                                                    gen::arbitrary<int>());
+Gen<::FlexFlow::ShardParallelDim>
+    Arbitrary<::FlexFlow::ShardParallelDim>::arbitrary() {
+  return gen::construct<::FlexFlow::ShardParallelDim>(gen::arbitrary<size_t>(),
+                                                      gen::arbitrary<int>());
 }
 } // namespace rc
 
diff --git a/lib/op-attrs/src/op-attrs/tensor_dims.cc b/lib/op-attrs/src/op-attrs/tensor_dims.cc
index ed40f509d9..de9c3d4adb 100644
--- a/lib/op-attrs/src/op-attrs/tensor_dims.cc
+++ b/lib/op-attrs/src/op-attrs/tensor_dims.cc
@@ -26,7 +26,8 @@ size_t &dim_at_idx(TensorDims &dims, ff_dim_t idx) {
 ParallelTensorDims lift_to_parallel(TensorDims const &dims) {
   std::vector<int> shard_degrees(num_dims(dims),
                                  1); // 1 repeated num_dims(dims) times
-  return lift_to_parallel_with_degrees(dims, 1, 1, shard_degrees);
+  return lift_to_parallel_with_degrees(
+      dims, SumDegree{1}, DiscardCopyDegree{1}, shard_degrees);
 }
 
 ParallelTensorDims
diff --git a/lib/op-attrs/src/op-attrs/tensor_dims.dtg.cc b/lib/op-attrs/src/op-attrs/tensor_dims.dtg.cc
index 909be323ac..ab78d44805 100644
--- a/lib/op-attrs/src/op-attrs/tensor_dims.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/tensor_dims.dtg.cc
@@ -37,7 +37,7 @@ bool TensorDims::operator>=(TensorDims const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::TensorDims>::operator()(
-    FlexFlow::TensorDims const &x) const {
+    ::FlexFlow::TensorDims const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::FFOrdered<size_t>>{}(x.ff_ordered) +
             0x9e3779b9 + (result << 6) + (result >> 2);
@@ -46,20 +46,21 @@ size_t hash<FlexFlow::TensorDims>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::TensorDims
-    adl_serializer<FlexFlow::TensorDims>::from_json(json const &j) {
-  return {j.at("ff_ordered").template get<::FlexFlow::FFOrdered<size_t>>()};
+::FlexFlow::TensorDims
+    adl_serializer<::FlexFlow::TensorDims>::from_json(json const &j) {
+  return ::FlexFlow::TensorDims{
+      j.at("ff_ordered").template get<::FlexFlow::FFOrdered<size_t>>()};
 }
-void adl_serializer<FlexFlow::TensorDims>::to_json(
-    json &j, FlexFlow::TensorDims const &v) {
+void adl_serializer<::FlexFlow::TensorDims>::to_json(
+    json &j, ::FlexFlow::TensorDims const &v) {
   j["__type"] = "TensorDims";
   j["ff_ordered"] = v.ff_ordered;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::TensorDims> Arbitrary<FlexFlow::TensorDims>::arbitrary() {
-  return gen::construct<FlexFlow::TensorDims>(
+Gen<::FlexFlow::TensorDims> Arbitrary<::FlexFlow::TensorDims>::arbitrary() {
+  return gen::construct<::FlexFlow::TensorDims>(
       gen::arbitrary<::FlexFlow::FFOrdered<size_t>>());
 }
 } // namespace rc
diff --git a/lib/op-attrs/src/op-attrs/tensor_shape.dtg.cc b/lib/op-attrs/src/op-attrs/tensor_shape.dtg.cc
index 92b31930fa..0c725dc443 100644
--- a/lib/op-attrs/src/op-attrs/tensor_shape.dtg.cc
+++ b/lib/op-attrs/src/op-attrs/tensor_shape.dtg.cc
@@ -45,7 +45,7 @@ bool TensorShape::operator>=(TensorShape const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::TensorShape>::operator()(
-    FlexFlow::TensorShape const &x) const {
+    ::FlexFlow::TensorShape const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::TensorDims>{}(x.dims) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -56,13 +56,14 @@ size_t hash<FlexFlow::TensorShape>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::TensorShape
-    adl_serializer<FlexFlow::TensorShape>::from_json(json const &j) {
-  return {j.at("dims").template get<::FlexFlow::TensorDims>(),
-          j.at("data_type").template get<::FlexFlow::DataType>()};
+::FlexFlow::TensorShape
+    adl_serializer<::FlexFlow::TensorShape>::from_json(json const &j) {
+  return ::FlexFlow::TensorShape{
+      j.at("dims").template get<::FlexFlow::TensorDims>(),
+      j.at("data_type").template get<::FlexFlow::DataType>()};
 }
-void adl_serializer<FlexFlow::TensorShape>::to_json(
-    json &j, FlexFlow::TensorShape const &v) {
+void adl_serializer<::FlexFlow::TensorShape>::to_json(
+    json &j, ::FlexFlow::TensorShape const &v) {
   j["__type"] = "TensorShape";
   j["dims"] = v.dims;
   j["data_type"] = v.data_type;
@@ -70,8 +71,8 @@ void adl_serializer<FlexFlow::TensorShape>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::TensorShape> Arbitrary<FlexFlow::TensorShape>::arbitrary() {
-  return gen::construct<FlexFlow::TensorShape>(
+Gen<::FlexFlow::TensorShape> Arbitrary<::FlexFlow::TensorShape>::arbitrary() {
+  return gen::construct<::FlexFlow::TensorShape>(
       gen::arbitrary<::FlexFlow::TensorDims>(),
       gen::arbitrary<::FlexFlow::DataType>());
 }
diff --git a/lib/op-attrs/test/src/datatype.cc b/lib/op-attrs/test/src/datatype.cc
new file mode 100644
index 0000000000..cc7e496c60
--- /dev/null
+++ b/lib/op-attrs/test/src/datatype.cc
@@ -0,0 +1,29 @@
+#include "op-attrs/datatype.h"
+#include "test/utils/doctest.h"
+#include "test/utils/rapidcheck.h"
+
+TEST_SUITE(FF_TEST_SUITE) {
+  TEST_CASE("can_promote_datatype_from_to(DataType, DataType)") {
+    CHECK(
+        can_strictly_promote_datatype_from_to(DataType::BOOL, DataType::INT32));
+    CHECK(can_strictly_promote_datatype_from_to(DataType::INT32,
+                                                DataType::INT64));
+    CHECK(can_strictly_promote_datatype_from_to(DataType::FLOAT,
+                                                DataType::DOUBLE));
+
+    RC_SUBCASE("is strict", [](DataType d) {
+      RC_ASSERT(!can_strictly_promote_datatype_from_to(d, d));
+    });
+
+    RC_SUBCASE("is asymmetric", [](DataType l, DataType r) {
+      RC_PRE(can_strictly_promote_datatype_from_to(l, r));
+      RC_ASSERT(!can_strictly_promote_datatype_from_to(r, l));
+    });
+
+    RC_SUBCASE("is transitive", [](DataType d1, DataType d2, DataType d3) {
+      RC_PRE(can_strictly_promote_datatype_from_to(d1, d2));
+      RC_PRE(can_strictly_promote_datatype_from_to(d2, d3));
+      RC_ASSERT(can_strictly_promote_datatype_from_to(d1, d3));
+    });
+  }
+}
diff --git a/lib/op-attrs/test/src/ops/attention.cc b/lib/op-attrs/test/src/ops/attention.cc
new file mode 100644
index 0000000000..ade219a6a9
--- /dev/null
+++ b/lib/op-attrs/test/src/ops/attention.cc
@@ -0,0 +1,315 @@
+#include "op-attrs/ops/attention.h"
+#include "op-attrs/parallel_tensor_shape.h"
+#include "test/utils/doctest.h"
+#include "utils/integer_conversions.h"
+
+TEST_SUITE(FF_TEST_SUITE) {
+  TEST_CASE("get_output_shape(MultiHeadAttentionAttrs, TensorShape, "
+            "TensorShape, TensorShape)") {
+    int embed_dim = 32;
+    int num_heads = 10;
+
+    /* Parameter meanings match those at
+     * https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html
+     */
+    MultiHeadAttentionAttrs attrs = MultiHeadAttentionAttrs{
+        /*embed_dim=*/embed_dim,
+        /*num_heads=*/num_heads,
+        /*kdim=*/embed_dim,
+        /*vdim=*/embed_dim,
+        /*dropout=*/0.0,
+        /*bias=*/true,
+        /*add_bias_kv=*/false,
+        /*add_zero_attn=*/false,
+    };
+
+    size_t batch_size = 40;
+    size_t seq_len = 48;
+    size_t feature_size = 36;
+
+    TensorShape input_q = TensorShape{
+        TensorDims{
+            FFOrdered<size_t>{
+                batch_size,
+                seq_len,
+                feature_size,
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    TensorShape input_k = TensorShape{
+        TensorDims{
+            FFOrdered<size_t>{
+                batch_size,
+                seq_len,
+                feature_size,
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    TensorShape input_v = TensorShape{
+        TensorDims{
+            FFOrdered<size_t>{
+                batch_size,
+                seq_len,
+                feature_size,
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    TensorShape output = TensorShape{
+        TensorDims{
+            FFOrdered<size_t>{
+                batch_size,
+                seq_len,
+                size_t_from_int(attrs.embed_dim),
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    TensorShape weights = TensorShape{
+        TensorDims{
+            FFOrdered<size_t>{
+                (feature_size * embed_dim) * 3 + (embed_dim * embed_dim),
+                size_t_from_int(num_heads),
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    TensorShape input_bias = TensorShape{
+        TensorDims{
+            FFOrdered<size_t>{
+                size_t_from_int(embed_dim * 3),
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    TensorShape output_bias = TensorShape{
+        TensorDims{
+            FFOrdered<size_t>{
+                size_t_from_int(embed_dim),
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    SUBCASE("get_output_shape") {
+      tl::expected<TensorShape, std::string> result =
+          get_output_shape(attrs, input_q, input_k, input_v);
+
+      tl::expected<TensorShape, std::string> correct = output;
+      CHECK(result == correct);
+    }
+
+    SUBCASE("get_weights_shape") {
+      tl::expected<TensorShape, std::string> result =
+          get_weights_shape(attrs, input_q, input_k, input_v);
+
+      tl::expected<TensorShape, std::string> correct = weights;
+      CHECK(result == correct);
+    }
+
+    SUBCASE("get_input_bias_shape") {
+      tl::expected<TensorShape, std::string> result =
+          get_input_bias_shape(attrs, input_q, input_k, input_v);
+      tl::expected<TensorShape, std::string> correct = input_bias;
+      CHECK(result == correct);
+    }
+
+    SUBCASE("get_output_bias_shape") {
+      tl::expected<TensorShape, std::string> result =
+          get_output_bias_shape(attrs, input_q, input_k, input_v);
+      tl::expected<TensorShape, std::string> correct = output_bias;
+      CHECK(result == correct);
+    }
+
+    SUBCASE("parallel shape inference") {
+      auto make_q = [&](SumDegree o_sum,
+                        DiscardCopyDegree o_eq,
+                        int o_batch,
+                        int o_seq_len,
+                        int o_q) {
+        return lift_to_parallel_with_degrees(
+            input_q, o_sum, o_eq, FFOrdered<int>{o_batch, o_seq_len, o_q});
+      };
+
+      auto make_k = [&](SumDegree o_sum,
+                        DiscardCopyDegree o_eq,
+                        int o_batch,
+                        int o_seq_len,
+                        int o_k) {
+        return lift_to_parallel_with_degrees(
+            input_k, o_sum, o_eq, FFOrdered<int>{o_batch, o_seq_len, o_k});
+      };
+
+      auto make_v = [&](SumDegree o_sum,
+                        DiscardCopyDegree o_eq,
+                        int o_batch,
+                        int o_seq_len,
+                        int o_v) {
+        return lift_to_parallel_with_degrees(
+            input_v, o_sum, o_eq, FFOrdered<int>{o_batch, o_seq_len, o_v});
+      };
+
+      auto make_o = [&](SumDegree o_sum,
+                        DiscardCopyDegree o_eq,
+                        int o_batch,
+                        int o_seq_len,
+                        int o_o) {
+        return lift_to_parallel_with_degrees(
+            output, o_sum, o_eq, FFOrdered<int>{o_batch, o_seq_len, o_o});
+      };
+
+      auto make_w =
+          [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o_e, int o_h) {
+            return lift_to_parallel_with_degrees(
+                weights, o_sum, o_eq, FFOrdered<int>{o_e, o_h});
+          };
+
+      auto make_input_bias =
+          [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o_in_proj_channel) {
+            return lift_to_parallel_with_degrees(
+                input_bias, o_sum, o_eq, FFOrdered<int>{o_in_proj_channel});
+          };
+
+      auto make_output_bias =
+          [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o_out_proj_channel) {
+            return lift_to_parallel_with_degrees(
+                output_bias, o_sum, o_eq, FFOrdered<int>{o_out_proj_channel});
+          };
+
+      SUBCASE("data parallelism") {
+        int o_b = 4;
+        ParallelTensorShape q =
+            make_q(SumDegree{1}, DiscardCopyDegree{1}, o_b, 1, 1);
+        ParallelTensorShape k =
+            make_k(SumDegree{1}, DiscardCopyDegree{1}, o_b, 1, 1);
+        ParallelTensorShape v =
+            make_v(SumDegree{1}, DiscardCopyDegree{1}, o_b, 1, 1);
+
+        SUBCASE("get_output_shape") {
+          tl::expected<ParallelTensorShape, std::string> result =
+              get_output_shape(attrs, q, k, v);
+          tl::expected<ParallelTensorShape, std::string> correct =
+              make_o(SumDegree{1}, DiscardCopyDegree{1}, o_b, 1, 1);
+          CHECK(result == correct);
+        }
+
+        SUBCASE("get_weights_shape") {
+          tl::expected<ParallelTensorShape, std::string> result =
+              get_weights_shape(attrs, q, k, v);
+          tl::expected<ParallelTensorShape, std::string> correct =
+              make_w(SumDegree{1}, DiscardCopyDegree{o_b}, 1, 1);
+          CHECK(result == correct);
+        }
+
+        SUBCASE("get_input_bias_shape") {
+          tl::expected<ParallelTensorShape, std::string> result =
+              get_input_bias_shape(attrs, q, k, v);
+          tl::expected<ParallelTensorShape, std::string> correct =
+              make_input_bias(SumDegree{1}, DiscardCopyDegree{o_b}, 1);
+          CHECK(result == correct);
+        }
+
+        SUBCASE("get_output_bias_shape") {
+          tl::expected<ParallelTensorShape, std::string> result =
+              get_output_bias_shape(attrs, q, k, v);
+          tl::expected<ParallelTensorShape, std::string> correct =
+              make_output_bias(SumDegree{1}, DiscardCopyDegree{o_b}, 1);
+          CHECK(result == correct);
+        }
+      }
+
+      SUBCASE("attention head parallelism") {
+        int o_h = 2;
+        ParallelTensorShape q =
+            make_q(SumDegree{1}, DiscardCopyDegree{o_h}, 1, 1, 1);
+        ParallelTensorShape k =
+            make_k(SumDegree{1}, DiscardCopyDegree{o_h}, 1, 1, 1);
+        ParallelTensorShape v =
+            make_v(SumDegree{1}, DiscardCopyDegree{o_h}, 1, 1, 1);
+
+        SUBCASE("get_output_shape") {
+          tl::expected<ParallelTensorShape, std::string> result =
+              get_output_shape(attrs, q, k, v);
+          tl::expected<ParallelTensorShape, std::string> correct =
+              make_o(SumDegree{o_h}, DiscardCopyDegree{1}, 1, 1, 1);
+          CHECK(result == correct);
+        }
+
+        SUBCASE("get_weight_shape") {
+          tl::expected<ParallelTensorShape, std::string> result =
+              get_weights_shape(attrs, q, k, v);
+          tl::expected<ParallelTensorShape, std::string> correct =
+              make_w(SumDegree{1}, DiscardCopyDegree{1}, 1, o_h);
+          CHECK(result == correct);
+        }
+
+        SUBCASE("get_input_bias_shape") {
+          tl::expected<ParallelTensorShape, std::string> result =
+              get_input_bias_shape(attrs, q, k, v);
+          tl::expected<ParallelTensorShape, std::string> correct =
+              make_input_bias(SumDegree{1}, DiscardCopyDegree{o_h}, 1);
+          CHECK(result == correct);
+        }
+
+        SUBCASE("get_output_bias_shape") {
+          tl::expected<ParallelTensorShape, std::string> result =
+              get_output_bias_shape(attrs, q, k, v);
+          tl::expected<ParallelTensorShape, std::string> correct =
+              make_output_bias(SumDegree{1}, DiscardCopyDegree{o_h}, 1);
+          CHECK(result == correct);
+        }
+      }
+
+      SUBCASE("combined data & attention head parallelism") {
+        int o_b = 4;
+        int o_h = 2;
+        ParallelTensorShape q =
+            make_q(SumDegree{1}, DiscardCopyDegree{o_h}, o_b, 1, 1);
+        ParallelTensorShape k =
+            make_k(SumDegree{1}, DiscardCopyDegree{o_h}, o_b, 1, 1);
+        ParallelTensorShape v =
+            make_v(SumDegree{1}, DiscardCopyDegree{o_h}, o_b, 1, 1);
+
+        SUBCASE("get_output_shape") {
+          tl::expected<ParallelTensorShape, std::string> result =
+              get_output_shape(attrs, q, k, v);
+          tl::expected<ParallelTensorShape, std::string> correct =
+              make_o(SumDegree{o_h}, DiscardCopyDegree{1}, o_b, 1, 1);
+          CHECK(result == correct);
+        }
+
+        SUBCASE("get_weights_shape") {
+          tl::expected<ParallelTensorShape, std::string> result =
+              get_weights_shape(attrs, q, k, v);
+          tl::expected<ParallelTensorShape, std::string> correct =
+              make_w(SumDegree{1}, DiscardCopyDegree{o_b}, 1, o_h);
+          CHECK(result == correct);
+        }
+
+        SUBCASE("get_input_bias_shape") {
+          tl::expected<ParallelTensorShape, std::string> result =
+              get_input_bias_shape(attrs, q, k, v);
+          tl::expected<ParallelTensorShape, std::string> correct =
+              make_input_bias(SumDegree{1}, DiscardCopyDegree{o_b * o_h}, 1);
+          CHECK(result == correct);
+        }
+
+        SUBCASE("get_output_bias_shape") {
+          tl::expected<ParallelTensorShape, std::string> result =
+              get_output_bias_shape(attrs, q, k, v);
+          tl::expected<ParallelTensorShape, std::string> correct =
+              make_output_bias(SumDegree{1}, DiscardCopyDegree{o_b * o_h}, 1);
+          CHECK(result == correct);
+        }
+      }
+    }
+  }
+}
diff --git a/lib/op-attrs/test/src/test_batch_matmul.cc b/lib/op-attrs/test/src/ops/batch_matmul.cc
similarity index 59%
rename from lib/op-attrs/test/src/test_batch_matmul.cc
rename to lib/op-attrs/test/src/ops/batch_matmul.cc
index f48478be10..3ff02ccece 100644
--- a/lib/op-attrs/test/src/test_batch_matmul.cc
+++ b/lib/op-attrs/test/src/ops/batch_matmul.cc
@@ -8,13 +8,13 @@ TEST_SUITE(FF_TEST_SUITE) {
     size_t n = 8;
     size_t p = 10;
 
-    BatchMatmulAttrs attrs = {
+    BatchMatmulAttrs attrs = BatchMatmulAttrs{
         /*a_seq_length_dim=*/0, // TODO figure out if these arguments are still
                                 // relevant
         /*b_seq_length_dim=*/0,
     };
 
-    TensorShape input_lhs_shape = {
+    TensorShape input_lhs_shape = TensorShape{
         TensorDims{
             FFOrdered<size_t>{
                 b,
@@ -26,7 +26,7 @@ TEST_SUITE(FF_TEST_SUITE) {
     };
 
     SUBCASE("valid") {
-      TensorShape input_rhs_shape = {
+      TensorShape input_rhs_shape = TensorShape{
           TensorDims{
               FFOrdered<size_t>{
                   b,
@@ -55,7 +55,7 @@ TEST_SUITE(FF_TEST_SUITE) {
     }
 
     SUBCASE("mismatched b") {
-      TensorShape input_rhs_shape = {
+      TensorShape input_rhs_shape = TensorShape{
           TensorDims{
               FFOrdered<size_t>{
                   b + 1,
@@ -73,7 +73,7 @@ TEST_SUITE(FF_TEST_SUITE) {
     }
 
     SUBCASE("mismatched m") {
-      TensorShape input_rhs_shape = {
+      TensorShape input_rhs_shape = TensorShape{
           TensorDims{
               FFOrdered<size_t>{
                   b,
@@ -102,13 +102,17 @@ TEST_SUITE(FF_TEST_SUITE) {
     int o_p = 7;
     int o_sum = 11;
 
-    BatchMatmulAttrs attrs = {
+    BatchMatmulAttrs attrs = BatchMatmulAttrs{
         /*a_seq_length_dim=*/0, // TODO figure out if these arguments are still
                                 // relevant
         /*b_seq_length_dim=*/0,
     };
 
-    auto make_lhs = [&](int o_sum, int o_eq, int o_b, int o_n, int o_m) {
+    auto make_lhs = [&](SumDegree o_sum,
+                        DiscardCopyDegree o_eq,
+                        int o_b,
+                        int o_n,
+                        int o_m) {
       return ParallelTensorShape{
           ParallelTensorDims{
               FFOrdered<ShardParallelDim>{
@@ -125,7 +129,11 @@ TEST_SUITE(FF_TEST_SUITE) {
       };
     };
 
-    auto make_rhs = [&](int o_sum, int o_eq, int o_b, int o_m, int o_p) {
+    auto make_rhs = [&](SumDegree o_sum,
+                        DiscardCopyDegree o_eq,
+                        int o_b,
+                        int o_m,
+                        int o_p) {
       return ParallelTensorShape{
           ParallelTensorDims{
               FFOrdered<ShardParallelDim>{
@@ -142,7 +150,11 @@ TEST_SUITE(FF_TEST_SUITE) {
       };
     };
 
-    auto make_output = [&](int o_sum, int o_eq, int o_b, int o_n, int o_p) {
+    auto make_output = [&](SumDegree o_sum,
+                           DiscardCopyDegree o_eq,
+                           int o_b,
+                           int o_n,
+                           int o_p) {
       return ParallelTensorShape{
           ParallelTensorDims{
               FFOrdered<ShardParallelDim>{
@@ -161,106 +173,121 @@ TEST_SUITE(FF_TEST_SUITE) {
 
     SUBCASE("data parallel") {
       tl::expected<ParallelTensorShape, std::string> result = get_output_shape(
-          attrs, make_lhs(1, 1, o_b, 1, 1), make_rhs(1, 1, o_b, 1, 1));
+          attrs,
+          make_lhs(SumDegree{1}, DiscardCopyDegree{1}, o_b, 1, 1),
+          make_rhs(SumDegree{1}, DiscardCopyDegree{1}, o_b, 1, 1));
       tl::expected<ParallelTensorShape, std::string> correct =
-          make_output(1, 1, o_b, 1, 1);
+          make_output(SumDegree{1}, DiscardCopyDegree{1}, o_b, 1, 1);
 
       CHECK(result == correct);
     }
 
     SUBCASE("n parallel") {
       tl::expected<ParallelTensorShape, std::string> result = get_output_shape(
-          attrs, make_lhs(1, 1, 1, o_n, 1), make_rhs(1, o_n, 1, 1, 1));
+          attrs,
+          make_lhs(SumDegree{1}, DiscardCopyDegree{1}, 1, o_n, 1),
+          make_rhs(SumDegree{1}, DiscardCopyDegree{o_n}, 1, 1, 1));
       tl::expected<ParallelTensorShape, std::string> correct =
-          make_output(1, 1, 1, o_n, 1);
+          make_output(SumDegree{1}, DiscardCopyDegree{1}, 1, o_n, 1);
 
       CHECK(result == correct);
     }
 
     SUBCASE("p parallel") {
       tl::expected<ParallelTensorShape, std::string> result = get_output_shape(
-          attrs, make_lhs(1, o_p, 1, 1, 1), make_rhs(1, 1, 1, 1, o_p));
+          attrs,
+          make_lhs(SumDegree{1}, DiscardCopyDegree{o_p}, 1, 1, 1),
+          make_rhs(SumDegree{1}, DiscardCopyDegree{1}, 1, 1, o_p));
       tl::expected<ParallelTensorShape, std::string> correct =
-          make_output(1, 1, 1, 1, o_p);
+          make_output(SumDegree{1}, DiscardCopyDegree{1}, 1, 1, o_p);
 
       CHECK(result == correct);
     }
 
     SUBCASE("reduction parallel") {
       tl::expected<ParallelTensorShape, std::string> result = get_output_shape(
-          attrs, make_lhs(1, 1, 1, 1, o_m), make_rhs(1, 1, 1, o_m, 1));
+          attrs,
+          make_lhs(SumDegree{1}, DiscardCopyDegree{1}, 1, 1, o_m),
+          make_rhs(SumDegree{1}, DiscardCopyDegree{1}, 1, o_m, 1));
       tl::expected<ParallelTensorShape, std::string> correct =
-          make_output(o_m, 1, 1, 1, 1);
+          make_output(SumDegree{o_m}, DiscardCopyDegree{1}, 1, 1, 1);
 
       CHECK(result == correct);
     }
 
     SUBCASE("propagate reduction lhs") {
       tl::expected<ParallelTensorShape, std::string> result = get_output_shape(
-          attrs, make_lhs(o_sum, 1, 1, 1, 1), make_rhs(1, o_sum, 1, 1, 1));
+          attrs,
+          make_lhs(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, 1, 1),
+          make_rhs(SumDegree{1}, DiscardCopyDegree{o_sum}, 1, 1, 1));
       tl::expected<ParallelTensorShape, std::string> correct =
-          make_output(o_sum, 1, 1, 1, 1);
+          make_output(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, 1, 1);
 
       CHECK(result == correct);
     }
 
     SUBCASE("propagate reduction rhs") {
       tl::expected<ParallelTensorShape, std::string> result = get_output_shape(
-          attrs, make_lhs(1, o_sum, 1, 1, 1), make_rhs(o_sum, 1, 1, 1, 1));
+          attrs,
+          make_lhs(SumDegree{1}, DiscardCopyDegree{o_sum}, 1, 1, 1),
+          make_rhs(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, 1, 1));
       tl::expected<ParallelTensorShape, std::string> correct =
-          make_output(o_sum, 1, 1, 1, 1);
+          make_output(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, 1, 1);
 
       CHECK(result == correct);
     }
 
     SUBCASE("reduction lhs & reduction rhs") {
-      tl::expected<ParallelTensorShape, std::string> result =
-          get_output_shape(attrs,
-                           make_lhs(o_sum, o_sum, 1, 1, 1),
-                           make_rhs(o_sum, o_sum, 1, 1, 1));
+      tl::expected<ParallelTensorShape, std::string> result = get_output_shape(
+          attrs,
+          make_lhs(SumDegree{o_sum}, DiscardCopyDegree{o_sum}, 1, 1, 1),
+          make_rhs(SumDegree{o_sum}, DiscardCopyDegree{o_sum}, 1, 1, 1));
       tl::expected<ParallelTensorShape, std::string> correct =
-          make_output(o_sum * o_sum, 1, 1, 1, 1);
+          make_output(SumDegree{o_sum * o_sum}, DiscardCopyDegree{1}, 1, 1, 1);
 
       CHECK(result == correct);
     }
 
     SUBCASE("reduction lhs & rhs (invalid)") {
       tl::expected<ParallelTensorShape, std::string> result = get_output_shape(
-          attrs, make_lhs(o_sum, 1, 1, 1, 1), make_rhs(o_sum, 1, 1, 1, 1));
+          attrs,
+          make_lhs(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, 1, 1),
+          make_rhs(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, 1, 1));
 
       CHECK_MESSAGE(
           !result.has_value(), "Unexpected successful value: ", result);
     }
 
     SUBCASE("reduction lhs & n") {
-      tl::expected<ParallelTensorShape, std::string> result =
-          get_output_shape(attrs,
-                           make_lhs(o_sum, 1, 1, o_n, 1),
-                           make_rhs(1, o_sum * o_n, 1, 1, 1));
+      tl::expected<ParallelTensorShape, std::string> result = get_output_shape(
+          attrs,
+          make_lhs(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, o_n, 1),
+          make_rhs(SumDegree{1}, DiscardCopyDegree{o_sum * o_n}, 1, 1, 1));
       tl::expected<ParallelTensorShape, std::string> correct =
-          make_output(o_sum, 1, 1, o_n, 1);
+          make_output(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, o_n, 1);
 
       CHECK(result == correct);
     }
 
     SUBCASE("reduction lhs & reduction rhs & n") {
-      tl::expected<ParallelTensorShape, std::string> result =
-          get_output_shape(attrs,
-                           make_lhs(o_sum, o_sum, 1, o_n, 1),
-                           make_rhs(o_sum, o_sum * o_n, 1, 1, 1));
-      tl::expected<ParallelTensorShape, std::string> correct =
-          make_output(o_sum * o_sum, 1, 1, o_n, 1);
+      tl::expected<ParallelTensorShape, std::string> result = get_output_shape(
+          attrs,
+          make_lhs(SumDegree{o_sum}, DiscardCopyDegree{o_sum}, 1, o_n, 1),
+          make_rhs(SumDegree{o_sum}, DiscardCopyDegree{o_sum * o_n}, 1, 1, 1));
+      tl::expected<ParallelTensorShape, std::string> correct = make_output(
+          SumDegree{o_sum * o_sum}, DiscardCopyDegree{1}, 1, o_n, 1);
 
       CHECK(result == correct);
     }
 
     SUBCASE("reduction lhs & reduction rhs & n & m") {
-      tl::expected<ParallelTensorShape, std::string> result =
-          get_output_shape(attrs,
-                           make_lhs(o_sum, o_sum, 1, o_n, o_m),
-                           make_rhs(o_sum, o_sum * o_n, 1, o_m, 1));
-      tl::expected<ParallelTensorShape, std::string> correct =
-          make_output(o_sum * o_sum * o_m, 1, 1, o_n, 1);
+      tl::expected<ParallelTensorShape, std::string> result = get_output_shape(
+          attrs,
+          make_lhs(SumDegree{o_sum}, DiscardCopyDegree{o_sum}, 1, o_n, o_m),
+          make_rhs(
+              SumDegree{o_sum}, DiscardCopyDegree{o_sum * o_n}, 1, o_m, 1));
+      tl::expected<ParallelTensorShape, std::string> correct = make_output(
+          SumDegree{o_sum * o_sum * o_m}, DiscardCopyDegree{1}, 1, o_n, 1);
 
       CHECK(result == correct);
     }
diff --git a/lib/op-attrs/test/src/ops/cast.cc b/lib/op-attrs/test/src/ops/cast.cc
new file mode 100644
index 0000000000..086d25d042
--- /dev/null
+++ b/lib/op-attrs/test/src/ops/cast.cc
@@ -0,0 +1,62 @@
+#include "op-attrs/ops/cast.h"
+#include "test/utils/doctest.h"
+
+TEST_SUITE(FF_TEST_SUITE) {
+  TEST_CASE("Cast shape inference") {
+    DataType input_datatype = DataType::FLOAT;
+    DataType output_datatype = DataType::DOUBLE;
+
+    CastAttrs attrs = CastAttrs{output_datatype};
+
+    size_t d1 = 12;
+    size_t d2 = 16;
+    TensorShape input = TensorShape{
+        TensorDims{FFOrdered<size_t>{d1, d2}},
+        input_datatype,
+    };
+
+    TensorShape output = TensorShape{
+        TensorDims{FFOrdered<size_t>{d1, d2}},
+        output_datatype,
+    };
+
+    SUBCASE("get_output_shape(CastAttrs, TensorShape)") {
+      tl::expected<TensorShape, std::string> result =
+          get_output_shape(attrs, input);
+      tl::expected<TensorShape, std::string> correct = output;
+      CHECK(result == correct);
+    }
+
+    SUBCASE("get_output_shape(CastAttrs, ParallelTensorShape)") {
+      auto make_input = [&](SumDegree o_sum,
+                            DiscardCopyDegree o_eq,
+                            int o_batch,
+                            int o_features) {
+        return lift_to_parallel_with_degrees(
+            input, o_sum, o_eq, FFOrdered<int>{o_batch, o_features});
+      };
+
+      auto make_output = [&](SumDegree o_sum,
+                             DiscardCopyDegree o_eq,
+                             int o_batch,
+                             int o_outchannels) {
+        return lift_to_parallel_with_degrees(
+            output, o_sum, o_eq, FFOrdered<int>{o_batch, o_outchannels});
+      };
+
+      SumDegree sum_degree = SumDegree{2};
+      DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{3};
+      int batch_degree = 4;
+      int feature_degree = 8;
+      ParallelTensorShape par_input = make_input(
+          sum_degree, discard_copy_degree, batch_degree, feature_degree);
+
+      tl::expected<ParallelTensorShape, std::string> result =
+          get_output_shape(attrs, par_input);
+      tl::expected<ParallelTensorShape, std::string> correct = make_output(
+          sum_degree, discard_copy_degree, batch_degree, feature_degree);
+
+      CHECK(result == correct);
+    }
+  }
+}
diff --git a/lib/op-attrs/test/src/ops/combine.cc b/lib/op-attrs/test/src/ops/combine.cc
index a50b3b01de..ac18bbc798 100644
--- a/lib/op-attrs/test/src/ops/combine.cc
+++ b/lib/op-attrs/test/src/ops/combine.cc
@@ -4,7 +4,7 @@
 TEST_SUITE(FF_TEST_SUITE) {
   TEST_CASE("Combine shape inference") {
 
-    ParallelTensorShape input = {
+    ParallelTensorShape input = ParallelTensorShape{
         ParallelTensorDims{
             FFOrdered<ShardParallelDim>{
                 ShardParallelDim{12, 2},
@@ -21,7 +21,7 @@ TEST_SUITE(FF_TEST_SUITE) {
     };
 
     SUBCASE("valid") {
-      ff_dim_t dim = 2;
+      ff_dim_t dim = ff_dim_t{2};
       int degree = 3;
       CombineAttrs attrs = CombineAttrs{
           /*repartition_dim=*/dim,
@@ -41,7 +41,7 @@ TEST_SUITE(FF_TEST_SUITE) {
     }
 
     SUBCASE("invalid") {
-      ff_dim_t dim = 2;
+      ff_dim_t dim = ff_dim_t{2};
       int degree = 4;
       CombineAttrs attrs = CombineAttrs{
           /*repartition_dim=*/dim,
diff --git a/lib/op-attrs/test/src/ops/conv_2d.cc b/lib/op-attrs/test/src/ops/conv_2d.cc
new file mode 100644
index 0000000000..6f5028cfeb
--- /dev/null
+++ b/lib/op-attrs/test/src/ops/conv_2d.cc
@@ -0,0 +1,241 @@
+#include "op-attrs/ops/conv_2d.h"
+#include "doctest/doctest.h"
+#include "utils/integer_conversions.h"
+
+TEST_SUITE(FF_TEST_SUITE) {
+  TEST_CASE("Conv2D shape inference") {
+    int out_channels = 4;
+    int kernel_h = 3;
+    int kernel_w = 2;
+    int stride_h = 2;
+    int stride_w = 2;
+    int padding_h = 1;
+    int padding_w = 1;
+    int groups = 1;
+    std::optional<Activation> activation = std::nullopt;
+    bool use_bias = true;
+
+    Conv2DAttrs attrs = Conv2DAttrs{
+        /*out_channels=*/out_channels,
+        /*kernel_h=*/kernel_h,
+        /*kernel_w=*/kernel_w,
+        /*stride_h=*/stride_h,
+        /*stride_w=*/stride_w,
+        /*padding_h=*/padding_h,
+        /*padding_w=*/padding_w,
+        /*groups=*/groups,
+        /*activation=*/activation,
+        /*use_bias=*/true,
+    };
+
+    size_t num_samples = 7;
+    size_t input_channels = 6;
+    size_t input_height = 10;
+    size_t input_width = 15;
+
+    TensorShape input = TensorShape{
+        TensorDims{FFOrdered<size_t>{
+            num_samples,
+            input_channels,
+            input_height,
+            input_width,
+        }},
+        DataType::FLOAT,
+    };
+
+    size_t output_height = 3;
+    size_t output_width = 6;
+
+    TensorShape output = TensorShape{
+        TensorDims{FFOrdered<size_t>{
+            num_samples,
+            size_t_from_int(out_channels),
+            output_height,
+            output_width,
+        }},
+        DataType::FLOAT,
+    };
+
+    TensorShape kernel = TensorShape{
+        TensorDims{FFOrdered<size_t>{
+            size_t_from_int(out_channels),
+            input_channels,
+            size_t_from_int(kernel_h),
+            size_t_from_int(kernel_w),
+        }},
+        DataType::FLOAT,
+    };
+
+    TensorShape bias = TensorShape{
+        TensorDims{FFOrdered<size_t>{
+            size_t_from_int(out_channels),
+        }},
+        DataType::FLOAT,
+    };
+
+    SUBCASE("get_output_shape(Conv2DAttrs, TensorShape)") {
+      TensorShape result_output = get_output_shape(attrs, input);
+      TensorShape correct_output = output;
+      CHECK(result_output == correct_output);
+    }
+
+    SUBCASE("get_kernel_shape(Conv2DAttrs, TensorShape)") {
+      TensorShape result_kernel = get_kernel_shape(attrs, input);
+      TensorShape correct_kernel = kernel;
+      CHECK(result_kernel == correct_kernel);
+    }
+
+    SUBCASE("get_bias_shape(Conv2DAttrs, TensorShape)") {
+      TensorShape result_bias = get_bias_shape(attrs, input);
+      TensorShape correct_bias = bias;
+      CHECK(result_bias == correct_bias);
+    }
+
+    auto make_input = [&](SumDegree o_sum,
+                          DiscardCopyDegree o_eq,
+                          int o_n,
+                          int o_c,
+                          int o_h,
+                          int o_w) {
+      return lift_to_parallel_with_degrees(
+          input, o_sum, o_eq, FFOrdered<int>{o_n, o_c, o_h, o_w});
+    };
+
+    auto make_output = [&](SumDegree o_sum,
+                           DiscardCopyDegree o_eq,
+                           int o_n,
+                           int o_c,
+                           int o_h,
+                           int o_w) {
+      return lift_to_parallel_with_degrees(
+          output, o_sum, o_eq, FFOrdered<int>{o_n, o_c, o_h, o_w});
+    };
+
+    auto make_kernel = [&](SumDegree o_sum,
+                           DiscardCopyDegree o_eq,
+                           int o_outchannels,
+                           int o_inchannels,
+                           int o_kernel_h,
+                           int o_kernel_w) {
+      return lift_to_parallel_with_degrees(
+          kernel,
+          o_sum,
+          o_eq,
+          FFOrdered<int>{o_outchannels, o_inchannels, o_kernel_h, o_kernel_w});
+    };
+
+    auto make_bias =
+        [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o_outchannels) {
+          return lift_to_parallel_with_degrees(
+              bias, o_sum, o_eq, FFOrdered<int>{o_outchannels});
+        };
+
+    SUBCASE("data parallelism") {
+      int degree = 2;
+      ParallelTensorShape par_input =
+          make_input(SumDegree{1}, DiscardCopyDegree{1}, degree, 1, 1, 1);
+
+      SUBCASE("get_output_shape") {
+        ParallelTensorShape result = get_output_shape(attrs, par_input);
+        ParallelTensorShape correct =
+            make_output(SumDegree{1}, DiscardCopyDegree{1}, degree, 1, 1, 1);
+        CHECK(result == correct);
+      }
+
+      SUBCASE("get_kernel_shape") {
+        ParallelTensorShape result = get_kernel_shape(attrs, par_input);
+        ParallelTensorShape correct =
+            make_kernel(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1, 1, 1);
+        CHECK(result == correct);
+      }
+
+      SUBCASE("get_bias_shape") {
+        ParallelTensorShape result = get_bias_shape(attrs, par_input);
+        ParallelTensorShape correct =
+            make_bias(SumDegree{1}, DiscardCopyDegree{degree}, 1);
+        CHECK(result == correct);
+      }
+    }
+
+    SUBCASE("input channel parallelism") {
+      int degree = 2;
+      ParallelTensorShape par_input =
+          make_input(SumDegree{1}, DiscardCopyDegree{1}, 1, degree, 1, 1);
+
+      SUBCASE("get_output_shape") {
+        ParallelTensorShape result = get_output_shape(attrs, par_input);
+        ParallelTensorShape correct =
+            make_output(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1, 1, 1);
+        CHECK(result == correct);
+      }
+
+      SUBCASE("get_kernel_shape") {
+        ParallelTensorShape result = get_kernel_shape(attrs, par_input);
+        ParallelTensorShape correct =
+            make_kernel(SumDegree{1}, DiscardCopyDegree{1}, 1, degree, 1, 1);
+        CHECK(result == correct);
+      }
+
+      SUBCASE("get_bias_shape") {
+        ParallelTensorShape result = get_bias_shape(attrs, par_input);
+        ParallelTensorShape correct =
+            make_bias(SumDegree{degree}, DiscardCopyDegree{1}, 1);
+        CHECK(result == correct);
+      }
+    }
+
+    SUBCASE("output channel parallelism") {
+      int degree = 2;
+      ParallelTensorShape par_input =
+          make_input(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1, 1, 1);
+
+      SUBCASE("get_output_shape") {
+        ParallelTensorShape result = get_output_shape(attrs, par_input);
+        ParallelTensorShape correct =
+            make_output(SumDegree{1}, DiscardCopyDegree{1}, 1, degree, 1, 1);
+        CHECK(result == correct);
+      }
+
+      SUBCASE("get_kernel_shape") {
+        ParallelTensorShape result = get_kernel_shape(attrs, par_input);
+        ParallelTensorShape correct =
+            make_kernel(SumDegree{1}, DiscardCopyDegree{1}, degree, 1, 1, 1);
+        CHECK(result == correct);
+      }
+
+      SUBCASE("get_bias_shape") {
+        ParallelTensorShape result = get_bias_shape(attrs, par_input);
+        ParallelTensorShape correct =
+            make_bias(SumDegree{1}, DiscardCopyDegree{1}, degree);
+        CHECK(result == correct);
+      }
+    }
+
+    SUBCASE("propagating sum degree") {
+      int degree = 2;
+      ParallelTensorShape par_input =
+          make_input(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1, 1, 1);
+
+      SUBCASE("get_output_shape") {
+        ParallelTensorShape result = get_output_shape(attrs, par_input);
+        ParallelTensorShape correct =
+            make_output(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1, 1, 1);
+        CHECK(result == correct);
+      }
+
+      SUBCASE("get_kernel_shape") {
+        ParallelTensorShape result = get_kernel_shape(attrs, par_input);
+        ParallelTensorShape correct =
+            make_kernel(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1, 1, 1);
+        CHECK(result == correct);
+      }
+
+      SUBCASE("get_bias_shape") {
+        ParallelTensorShape result = get_bias_shape(attrs, par_input);
+        ParallelTensorShape correct =
+            make_bias(SumDegree{degree}, DiscardCopyDegree{1}, 1);
+        CHECK(result == correct);
+      }
+    }
+  }
+}
diff --git a/lib/op-attrs/test/src/test_element_binary.cc b/lib/op-attrs/test/src/ops/element_binary.cc
similarity index 82%
rename from lib/op-attrs/test/src/test_element_binary.cc
rename to lib/op-attrs/test/src/ops/element_binary.cc
index b1aedbf6b5..0ed695eb89 100644
--- a/lib/op-attrs/test/src/test_element_binary.cc
+++ b/lib/op-attrs/test/src/ops/element_binary.cc
@@ -108,12 +108,14 @@ TEST_SUITE(FF_TEST_SUITE) {
     SUBCASE("data parallelism") {
       int degree = 4;
 
-      ParallelTensorShape input_lhs = make_lhs(1, 1, degree, 1, 1);
-      ParallelTensorShape input_rhs = make_rhs(1, 1, degree, 1, 1);
+      ParallelTensorShape input_lhs =
+          make_lhs(SumDegree{1}, DiscardCopyDegree{1}, degree, 1, 1);
+      ParallelTensorShape input_rhs =
+          make_rhs(SumDegree{1}, DiscardCopyDegree{1}, degree, 1, 1);
       tl::expected<ParallelTensorShape, std::string> result =
           get_output_shape(attrs, input_lhs, input_rhs);
       tl::expected<ParallelTensorShape, std::string> correct =
-          make_output(1, 1, degree, 1, 1);
+          make_output(SumDegree{1}, DiscardCopyDegree{1}, degree, 1, 1);
 
       CHECK(result == correct);
     }
@@ -121,12 +123,14 @@ TEST_SUITE(FF_TEST_SUITE) {
     SUBCASE("reduction parallelism") {
       int degree = 4;
 
-      ParallelTensorShape input_lhs = make_lhs(SumDegree{degree}, 1, 1, 1, 1);
-      ParallelTensorShape input_rhs = make_rhs(SumDegree{degree}, 1, 1, 1, 1);
+      ParallelTensorShape input_lhs =
+          make_lhs(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1, 1);
+      ParallelTensorShape input_rhs =
+          make_rhs(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1, 1);
       tl::expected<ParallelTensorShape, std::string> result =
           get_output_shape(attrs, input_lhs, input_rhs);
       tl::expected<ParallelTensorShape, std::string> correct =
-          make_output(SumDegree{degree}, 1, 1, 1, 1);
+          make_output(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1, 1);
 
       CHECK(result == correct);
     }
@@ -135,9 +139,9 @@ TEST_SUITE(FF_TEST_SUITE) {
       int degree = 4;
 
       ParallelTensorShape input_lhs =
-          make_lhs(1, DiscardCopyDegree{degree}, 1, 1, 1);
+          make_lhs(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1, 1);
       ParallelTensorShape input_rhs =
-          make_rhs(1, DiscardCopyDegree{degree}, 1, 1, 1);
+          make_rhs(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1, 1);
       tl::expected<ParallelTensorShape, std::string> result =
           get_output_shape(attrs, input_lhs, input_rhs);
 
@@ -149,8 +153,10 @@ TEST_SUITE(FF_TEST_SUITE) {
     SUBCASE("invalid mismatched parallelism degrees") {
       int degree = 4;
 
-      ParallelTensorShape input_lhs = make_lhs(1, 1, 1, degree, 1);
-      ParallelTensorShape input_rhs = make_rhs(1, 1, 1, 1, degree);
+      ParallelTensorShape input_lhs =
+          make_lhs(SumDegree{1}, DiscardCopyDegree{1}, 1, degree, 1);
+      ParallelTensorShape input_rhs =
+          make_rhs(SumDegree{1}, DiscardCopyDegree{1}, 1, 1, degree);
       tl::expected<ParallelTensorShape, std::string> result =
           get_output_shape(attrs, input_lhs, input_rhs);
 
diff --git a/lib/op-attrs/test/src/test_element_unary.cc b/lib/op-attrs/test/src/ops/element_unary.cc
similarity index 84%
rename from lib/op-attrs/test/src/test_element_unary.cc
rename to lib/op-attrs/test/src/ops/element_unary.cc
index b479a7e3cd..4239782d55 100644
--- a/lib/op-attrs/test/src/test_element_unary.cc
+++ b/lib/op-attrs/test/src/ops/element_unary.cc
@@ -40,7 +40,8 @@ TEST_SUITE(FF_TEST_SUITE) {
     SUBCASE("partition i.e., sharding parallelism") {
       int degree1 = 4;
       int degree2 = 8;
-      ParallelTensorShape par_input = make_i(1, 1, degree1, 1, degree2);
+      ParallelTensorShape par_input =
+          make_i(SumDegree{1}, DiscardCopyDegree{1}, degree1, 1, degree2);
 
       tl::expected<ParallelTensorShape, std::string> result =
           get_output_shape(attrs, par_input);
@@ -52,8 +53,8 @@ TEST_SUITE(FF_TEST_SUITE) {
     SUBCASE("sum degree > 1") {
       int degree = 2;
 
-      tl::expected<ParallelTensorShape, std::string> result =
-          get_output_shape(attrs, make_i(SumDegree{degree}, 1, 1, 1, 1));
+      tl::expected<ParallelTensorShape, std::string> result = get_output_shape(
+          attrs, make_i(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1, 1));
 
       CHECK_MESSAGE(!result.has_value(),
                     "Unexpected successful result: ",
@@ -64,7 +65,7 @@ TEST_SUITE(FF_TEST_SUITE) {
       int degree = 2;
 
       tl::expected<ParallelTensorShape, std::string> result = get_output_shape(
-          attrs, make_i(1, DiscardCopyDegree{degree}, 1, 1, 1));
+          attrs, make_i(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1, 1));
 
       CHECK_MESSAGE(!result.has_value(),
                     "Unexpected successful result: ",
diff --git a/lib/op-attrs/test/src/test_embedding.cc b/lib/op-attrs/test/src/ops/embedding.cc
similarity index 99%
rename from lib/op-attrs/test/src/test_embedding.cc
rename to lib/op-attrs/test/src/ops/embedding.cc
index 7bce6bd4d9..9180f7055d 100644
--- a/lib/op-attrs/test/src/test_embedding.cc
+++ b/lib/op-attrs/test/src/ops/embedding.cc
@@ -17,7 +17,7 @@ TEST_SUITE(FF_TEST_SUITE) {
     size_t batch_size = 48;
     size_t features_dim = 56;
 
-    TensorShape input = {
+    TensorShape input = TensorShape{
         TensorDims{FFOrdered<size_t>{
             batch_size,
             features_dim,
diff --git a/lib/op-attrs/test/src/ops/reduction.cc b/lib/op-attrs/test/src/ops/reduction.cc
index 6f73951e00..59ed5bb5ee 100644
--- a/lib/op-attrs/test/src/ops/reduction.cc
+++ b/lib/op-attrs/test/src/ops/reduction.cc
@@ -4,7 +4,7 @@
 TEST_SUITE(FF_TEST_SUITE) {
   TEST_CASE("Reduction shape inference") {
 
-    ParallelTensorShape input = {
+    ParallelTensorShape input = ParallelTensorShape{
         ParallelTensorDims{
             FFOrdered<ShardParallelDim>{
                 ShardParallelDim{12, 2},
diff --git a/lib/op-attrs/test/src/ops/repartition.cc b/lib/op-attrs/test/src/ops/repartition.cc
index 3b3ae92b4c..af28a6d471 100644
--- a/lib/op-attrs/test/src/ops/repartition.cc
+++ b/lib/op-attrs/test/src/ops/repartition.cc
@@ -3,14 +3,14 @@
 
 TEST_SUITE(FF_TEST_SUITE) {
   TEST_CASE("Repartition shape inference") {
-    ff_dim_t dim = 2;
+    ff_dim_t dim = ff_dim_t{2};
     int degree = 4;
     RepartitionAttrs attrs = RepartitionAttrs{
         /*repartition_dim=*/dim,
         /*repartition_degree=*/degree,
     };
 
-    ParallelTensorShape input = {
+    ParallelTensorShape input = ParallelTensorShape{
         ParallelTensorDims{
             FFOrdered<ShardParallelDim>{
                 ShardParallelDim{12, 2},
diff --git a/lib/op-attrs/test/src/ops/replicate.cc b/lib/op-attrs/test/src/ops/replicate.cc
index b326038388..a0ec40cc14 100644
--- a/lib/op-attrs/test/src/ops/replicate.cc
+++ b/lib/op-attrs/test/src/ops/replicate.cc
@@ -7,7 +7,7 @@ TEST_SUITE(FF_TEST_SUITE) {
         /*replicate_degree=*/4,
     };
 
-    ParallelTensorShape input = {
+    ParallelTensorShape input = ParallelTensorShape{
         ParallelTensorDims{
             FFOrdered<ShardParallelDim>{
                 ShardParallelDim{10, 2},
@@ -26,7 +26,7 @@ TEST_SUITE(FF_TEST_SUITE) {
     ParallelTensorShape result = get_output_shape(attrs, input);
 
     ParallelTensorShape correct_output = input;
-    correct_output.dims.replica_dims.discard_copy_degree = 8;
+    correct_output.dims.replica_dims.discard_copy_degree = DiscardCopyDegree{8};
 
     CHECK(result == correct_output);
   }
diff --git a/lib/op-attrs/test/src/test_attention.cc b/lib/op-attrs/test/src/test_attention.cc
deleted file mode 100644
index 74ae4565ca..0000000000
--- a/lib/op-attrs/test/src/test_attention.cc
+++ /dev/null
@@ -1,272 +0,0 @@
-#include "op-attrs/ops/attention.h"
-#include "op-attrs/parallel_tensor_shape.h"
-#include "test/utils/doctest.h"
-#include "utils/integer_conversions.h"
-
-TEST_SUITE(FF_TEST_SUITE) {
-  TEST_CASE("get_output_shape(MultiHeadAttentionAttrs, TensorShape, "
-            "TensorShape, TensorShape)") {
-    int embed_dim = 32;
-
-    /* Parameter meanings match those at
-     * https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html
-     */
-    MultiHeadAttentionAttrs attrs = {
-        /*embed_dim=*/embed_dim,
-        /*num_heads=*/10,
-        /*kdim=*/embed_dim,
-        /*vdim=*/embed_dim,
-        /*dropout=*/0.0,
-        /*bias=*/true,
-        /*add_bias_kv=*/false,
-        /*add_zero_attn=*/false,
-    };
-
-    size_t batch_size = 40;
-    size_t seq_len = 48;
-
-    TensorShape input_q = {
-        TensorDims{FFOrdered<size_t>{
-            batch_size,
-            seq_len,
-            size_t_from_int(attrs.embed_dim),
-        }},
-        DataType::FLOAT,
-    };
-
-    TensorShape input_k = {
-        TensorDims{
-            FFOrdered<size_t>{
-                batch_size,
-                seq_len,
-                size_t_from_int(attrs.kdim),
-            },
-        },
-        DataType::FLOAT,
-    };
-
-    TensorShape input_v = {
-        TensorDims{
-            FFOrdered<size_t>{
-                batch_size,
-                seq_len,
-                size_t_from_int(attrs.vdim),
-            },
-        },
-        DataType::FLOAT,
-    };
-
-    SUBCASE("get_output_shape") {
-      tl::expected<TensorShape, std::string> result =
-          get_output_shape(attrs, input_q, input_k, input_v);
-
-      tl::expected<TensorShape, std::string> correct = TensorShape{
-          TensorDims{FFOrdered<size_t>{
-              batch_size,
-              seq_len,
-              size_t_from_int(attrs.embed_dim),
-          }},
-          DataType::FLOAT,
-      };
-
-      CHECK(result == correct);
-    }
-
-    SUBCASE("get_weights_shape") {
-      tl::expected<TensorShape, std::string> result =
-          get_weights_shape(attrs, input_q, input_k, input_v);
-
-      int qProjPerHeadWeightSize =
-          attrs.kdim * dim_at_idx(input_q, ff_dim_t{-1});
-      int kProjPerHeadWeightSize =
-          attrs.kdim * dim_at_idx(input_k, ff_dim_t{-1});
-      int vProjPerHeadWeightSize =
-          attrs.vdim * dim_at_idx(input_v, ff_dim_t{-1});
-      int oProjPerHeadWeightSize = attrs.embed_dim * attrs.vdim;
-      int perHeadWeightSize = qProjPerHeadWeightSize + kProjPerHeadWeightSize +
-                              vProjPerHeadWeightSize + oProjPerHeadWeightSize;
-
-      tl::expected<TensorShape, std::string> correct = TensorShape{
-          TensorDims{FFOrdered<size_t>{
-              size_t_from_int(perHeadWeightSize),
-              size_t_from_int(attrs.num_heads),
-          }},
-          DataType::FLOAT,
-      };
-
-      CHECK(result == correct);
-    }
-  }
-
-  TEST_CASE("parallel shape inference for MultiHeadAttentionAttrs") {
-    int embed_dim = 32;
-
-    /* Parameter meanings can be found at
-     * https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html
-     */
-    MultiHeadAttentionAttrs attrs = {
-        /*embed_dim=*/embed_dim,
-        /*num_heads=*/10,
-        /*kdim=*/embed_dim,
-        /*vdim=*/embed_dim,
-        /*dropout=*/0.0,
-        /*bias=*/true,
-        /*add_bias_kv=*/false,
-        /*add_zero_attn=*/false,
-    };
-
-    size_t batchsize = 40;
-    size_t seq_len = 48;
-    size_t q_size = 56;
-    size_t k_size = 64;
-    size_t v_size = 72;
-
-    TensorShape unpar_q_shape = TensorShape{
-        TensorDims{
-            FFOrdered<size_t>{
-                batchsize,
-                seq_len,
-                q_size,
-            },
-        },
-        DataType::FLOAT,
-    };
-
-    TensorShape unpar_k_shape = TensorShape{
-        TensorDims{
-            FFOrdered<size_t>{
-                batchsize,
-                seq_len,
-                k_size,
-            },
-        },
-        DataType::FLOAT,
-    };
-
-    TensorShape unpar_v_shape = TensorShape{
-        TensorDims{
-            FFOrdered<size_t>{
-                batchsize,
-                seq_len,
-                v_size,
-            },
-        },
-        DataType::FLOAT,
-    };
-
-    tl::expected<TensorShape, std::string> result_unpar_o_shape =
-        get_output_shape(attrs, unpar_q_shape, unpar_k_shape, unpar_v_shape);
-    REQUIRE(result_unpar_o_shape.has_value());
-    TensorShape unpar_o_shape = result_unpar_o_shape.value();
-
-    tl::expected<TensorShape, std::string> result_unpar_w_shape =
-        get_weights_shape(attrs, unpar_q_shape, unpar_k_shape, unpar_v_shape);
-    REQUIRE(result_unpar_o_shape.has_value());
-    TensorShape unpar_w_shape = result_unpar_w_shape.value();
-
-    auto make_q = [&](SumDegree o_sum,
-                      DiscardCopyDegree o_eq,
-                      int o_batch,
-                      int o_seq_len,
-                      int o_q) {
-      return lift_to_parallel_with_degrees(
-          unpar_q_shape, o_sum, o_eq, FFOrdered<int>{o_batch, o_seq_len, o_q});
-    };
-
-    auto make_k = [&](int o_sum,
-                      int o_eq,
-                      int o_batch,
-                      int o_seq_len,
-                      int o_k) {
-      return lift_to_parallel_with_degrees(
-          unpar_k_shape, o_sum, o_eq, FFOrdered<int>{o_batch, o_seq_len, o_k});
-    };
-
-    auto make_v = [&](int o_sum,
-                      int o_eq,
-                      int o_batch,
-                      int o_seq_len,
-                      int o_v) {
-      return lift_to_parallel_with_degrees(
-          unpar_v_shape, o_sum, o_eq, FFOrdered<int>{o_batch, o_seq_len, o_v});
-    };
-
-    auto make_o = [&](int o_sum,
-                      int o_eq,
-                      int o_batch,
-                      int o_seq_len,
-                      int o_o) {
-      return lift_to_parallel_with_degrees(
-          unpar_o_shape, o_sum, o_eq, FFOrdered<int>{o_batch, o_seq_len, o_o});
-    };
-
-    auto make_w = [&](int o_sum, int o_eq, int o_e, int o_h) {
-      return lift_to_parallel_with_degrees(
-          unpar_w_shape, o_sum, o_eq, FFOrdered<int>{o_e, o_h});
-    };
-
-    SUBCASE("data parallelism") {
-      int o_b = 4;
-      ParallelTensorShape q = make_q(1, 1, o_b, 1, 1);
-      ParallelTensorShape k = make_k(1, 1, o_b, 1, 1);
-      ParallelTensorShape v = make_v(1, 1, o_b, 1, 1);
-
-      tl::expected<ParallelTensorShape, std::string> result_o =
-          get_output_shape(attrs, q, k, v);
-      tl::expected<ParallelTensorShape, std::string> correct_o =
-          make_o(1, 1, o_b, 1, 1);
-
-      CHECK(result_o == correct_o);
-
-      tl::expected<ParallelTensorShape, std::string> result_w =
-          get_weights_shape(attrs, q, k, v);
-      tl::expected<ParallelTensorShape, std::string> correct_w =
-          make_w(1, o_b, 1, 1);
-
-      CHECK(result_w == correct_w);
-    }
-
-    SUBCASE("attention head parallelism") {
-      int o_h = 2;
-      ParallelTensorShape q = make_q(1, o_h, 1, 1, 1);
-      ParallelTensorShape k = make_k(1, o_h, 1, 1, 1);
-      ParallelTensorShape v = make_v(1, o_h, 1, 1, 1);
-
-      tl::expected<ParallelTensorShape, std::string> result_o =
-          get_output_shape(attrs, q, k, v);
-      tl::expected<ParallelTensorShape, std::string> correct_o =
-          make_o(o_h, 1, 1, 1, 1);
-
-      CHECK(result_o == correct_o);
-
-      tl::expected<ParallelTensorShape, std::string> result_w =
-          get_weights_shape(attrs, q, k, v);
-      tl::expected<ParallelTensorShape, std::string> correct_w =
-          make_w(1, 1, 1, o_h);
-
-      CHECK(result_w == correct_w);
-    }
-
-    SUBCASE("combined data & attention head parallelism") {
-      int o_b = 4;
-      int o_h = 2;
-      ParallelTensorShape q = make_q(1, o_h, o_b, 1, 1);
-      ParallelTensorShape k = make_k(1, o_h, o_b, 1, 1);
-      ParallelTensorShape v = make_v(1, o_h, o_b, 1, 1);
-
-      tl::expected<ParallelTensorShape, std::string> result_o =
-          get_output_shape(attrs, q, k, v);
-      tl::expected<ParallelTensorShape, std::string> correct_o =
-          make_o(o_h, 1, o_b, 1, 1);
-
-      CHECK(result_o == correct_o);
-
-      tl::expected<ParallelTensorShape, std::string> result_w =
-          get_weights_shape(attrs, q, k, v);
-      tl::expected<ParallelTensorShape, std::string> correct_w =
-          make_w(1, o_b, 1, o_h);
-
-      CHECK(result_w == correct_w);
-    }
-  }
-}
diff --git a/lib/op-attrs/test/src/test_conv_2d.cc b/lib/op-attrs/test/src/test_conv_2d.cc
deleted file mode 100644
index b16a26a7b1..0000000000
--- a/lib/op-attrs/test/src/test_conv_2d.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-#include "doctest/doctest.h"
-#include "op-attrs/ops/conv_2d.h"
-
-TEST_SUITE(FF_TEST_SUITE) {
-  TEST_CASE("get_output_shape(Conv2DAttrs, TensorShape)") {
-    int out_channels = 4;
-    int kernel_h = 3;
-    int kernel_w = 2;
-    int stride_h = 2;
-    int stride_w = 2;
-    int padding_h = 1;
-    int padding_w = 1;
-    int groups = 1;
-    std::optional<Activation> activation = std::nullopt;
-    bool use_bias = true;
-
-    Conv2DAttrs attrs = {
-        /*out_channels=*/out_channels,
-        /*kernel_h=*/kernel_h,
-        /*kernel_w=*/kernel_w,
-        /*stride_h=*/stride_h,
-        /*stride_w=*/stride_w,
-        /*padding_h=*/padding_h,
-        /*padding_w=*/padding_w,
-        /*groups=*/groups,
-        /*activation=*/activation,
-        /*use_bias=*/true,
-    };
-
-    size_t num_samples = 7;
-    size_t input_channels = 6;
-    size_t input_height = 10;
-    size_t input_width = 15;
-
-    TensorShape input_shape = {
-        TensorDims{FFOrdered<size_t>{
-            num_samples,
-            input_channels,
-            input_height,
-            input_width,
-        }},
-        DataType::FLOAT,
-    };
-
-    TensorShape result = get_output_shape(attrs, input_shape);
-
-    size_t correct_output_height = 3;
-    size_t correct_output_width = 6;
-
-    TensorShape correct_output_shape = {
-        TensorDims{FFOrdered<size_t>{
-            num_samples,
-            static_cast<size_t>(out_channels),
-            correct_output_height,
-            correct_output_width,
-        }},
-        DataType::FLOAT,
-    };
-
-    CHECK(result == correct_output_shape);
-  }
-}
diff --git a/lib/op-attrs/test/src/test_dim_ordered.cc b/lib/op-attrs/test/src/test_dim_ordered.cc
index 17f4bae05f..ac05767800 100644
--- a/lib/op-attrs/test/src/test_dim_ordered.cc
+++ b/lib/op-attrs/test/src/test_dim_ordered.cc
@@ -1,13 +1,17 @@
 #include "doctest/doctest.h"
 #include "op-attrs/dim_ordered.h"
-#include <rapidcheck.h>
+#include "test/utils/rapidcheck.h"
 
 using namespace FlexFlow;
 
 TEST_SUITE(FF_TEST_SUITE) {
 
-  TEST_CASE_TEMPLATE("RC", T, int, double, char) {
-    CHECK(rc::check("generate",
-                    [](FFOrdered<T> ff_dim, DimOrdered<int, T> dim) {}));
+  TEST_CASE_TEMPLATE(
+      "Arbitrary<DimOrdered<int, T>> with T=", T, int, double, char) {
+    RC_SUBCASE([](DimOrdered<int, T>) {});
+  }
+
+  TEST_CASE_TEMPLATE("Arbitrary<FFOrdered<T>> with T=", T, int, double, char) {
+    RC_SUBCASE([](FFOrdered<T>) {});
   }
 }
diff --git a/lib/op-attrs/test/src/test_regularizer_attrs.cc b/lib/op-attrs/test/src/test_regularizer_attrs.cc
index 198c3add38..35851463bb 100644
--- a/lib/op-attrs/test/src/test_regularizer_attrs.cc
+++ b/lib/op-attrs/test/src/test_regularizer_attrs.cc
@@ -1,14 +1,11 @@
-#include "doctest/doctest.h"
 #include "op-attrs/regularizer_attrs.dtg.h"
-#include <rapidcheck.h>
-
-using namespace FlexFlow;
+#include "test/utils/doctest.h"
+#include "test/utils/rapidcheck.h"
 
 TEST_SUITE(FF_TEST_SUITE) {
-
-  TEST_CASE("RC") {
-    CHECK(rc::check("valid variant", [](RegularizerAttrs reg) {
-      return reg.has<L1RegularizerAttrs>() || reg.has<L2RegularizerAttrs>();
-    }));
+  TEST_CASE("Arbitrary<RegularizerAttrs>") {
+    RC_SUBCASE([](RegularizerAttrs reg) {
+      RC_ASSERT(reg.has<L1RegularizerAttrs>() || reg.has<L2RegularizerAttrs>());
+    });
   }
 }
diff --git a/lib/pcg/include/pcg/computation_graph.dtg.h b/lib/pcg/include/pcg/computation_graph.dtg.h
index 217b940ce6..028d9ecfab 100644
--- a/lib/pcg/include/pcg/computation_graph.dtg.h
+++ b/lib/pcg/include/pcg/computation_graph.dtg.h
@@ -3,21 +3,21 @@
 // lib/pcg/include/pcg/computation_graph.struct.toml
 /* proj-data
 {
-  "generated_from": "8f1f0e13d75065944f7fe307e12fe280"
+  "generated_from": "bf8996bea2e022265a372d692c2db8ed"
 }
 */
 
 #ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_COMPUTATION_GRAPH_DTG_H
 #define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_COMPUTATION_GRAPH_DTG_H
 
-#include "pcg/dataflow_graph.h"
+#include "pcg/dataflow_graph/dataflow_graph.h"
 #include "pcg/layer_attrs.dtg.h"
 #include "pcg/tensor_attrs.dtg.h"
 
 namespace FlexFlow {
 struct ComputationGraph {
   ComputationGraph() = delete;
-  ComputationGraph(
+  explicit ComputationGraph(
       ::FlexFlow::DataflowGraph<::FlexFlow::LayerAttrs,
                                 ::FlexFlow::TensorAttrs> const &raw_graph);
 
diff --git a/lib/pcg/include/pcg/computation_graph.struct.toml b/lib/pcg/include/pcg/computation_graph.struct.toml
index a270cb8fbe..39c68b8e4f 100644
--- a/lib/pcg/include/pcg/computation_graph.struct.toml
+++ b/lib/pcg/include/pcg/computation_graph.struct.toml
@@ -5,7 +5,7 @@ features = [ ]
 includes = [
   "pcg/layer_attrs.dtg.h",
   "pcg/tensor_attrs.dtg.h",
-  "pcg/dataflow_graph.h",
+  "pcg/dataflow_graph/dataflow_graph.h",
 ]
 
 [[fields]]
diff --git a/lib/pcg/include/pcg/computation_graph/layer_added_result.dtg.h b/lib/pcg/include/pcg/computation_graph/layer_added_result.dtg.h
index 4fd78f2d44..5013871cc8 100644
--- a/lib/pcg/include/pcg/computation_graph/layer_added_result.dtg.h
+++ b/lib/pcg/include/pcg/computation_graph/layer_added_result.dtg.h
@@ -3,7 +3,7 @@
 // lib/pcg/include/pcg/computation_graph/layer_added_result.struct.toml
 /* proj-data
 {
-  "generated_from": "15bf9d73ef934599c9b11807d86ae5d4"
+  "generated_from": "234b5c222ae4ce1da36194b4eb519145"
 }
 */
 
@@ -13,14 +13,16 @@
 #include "fmt/format.h"
 #include "pcg/layer_guid_t.dtg.h"
 #include "pcg/tensor_guid_t.dtg.h"
+#include "utils/fmt/vector.h"
 #include <ostream>
 #include <tuple>
 
 namespace FlexFlow {
 struct LayerAddedResult {
   LayerAddedResult() = delete;
-  LayerAddedResult(::FlexFlow::layer_guid_t const &layer,
-                   std::vector<::FlexFlow::tensor_guid_t> const &outputs);
+  explicit LayerAddedResult(
+      ::FlexFlow::layer_guid_t const &layer,
+      std::vector<::FlexFlow::tensor_guid_t> const &outputs);
 
   bool operator==(LayerAddedResult const &) const;
   bool operator!=(LayerAddedResult const &) const;
diff --git a/lib/pcg/include/pcg/computation_graph/layer_added_result.struct.toml b/lib/pcg/include/pcg/computation_graph/layer_added_result.struct.toml
index b02e992ba1..d7b669fb3a 100644
--- a/lib/pcg/include/pcg/computation_graph/layer_added_result.struct.toml
+++ b/lib/pcg/include/pcg/computation_graph/layer_added_result.struct.toml
@@ -8,6 +8,7 @@ features = [
 includes = [
   "pcg/layer_guid_t.dtg.h",
   "pcg/tensor_guid_t.dtg.h",
+  "utils/fmt/vector.h"
 ]
 
 [[fields]]
diff --git a/lib/pcg/include/pcg/computation_graph_builder.h b/lib/pcg/include/pcg/computation_graph_builder.h
index 4bb04fc22a..0d2b5b272c 100644
--- a/lib/pcg/include/pcg/computation_graph_builder.h
+++ b/lib/pcg/include/pcg/computation_graph_builder.h
@@ -276,15 +276,6 @@ struct ComputationGraphBuilder {
                     tensor_guid_t const &input,
                     std::optional<float> scalar,
                     std::optional<std::string> const &name = std::nullopt);
-  tensor_guid_t element_scalar_unary(
-      OperatorType,
-      tensor_guid_t const &input,
-      float scalar,
-      std::optional<std::string> const &name = std::nullopt);
-  tensor_guid_t
-      element_unary(ElementUnaryAttrs const &,
-                    tensor_guid_t const &input,
-                    std::optional<std::string> const &name = std::nullopt);
 
 public:
   ComputationGraph computation_graph;
diff --git a/lib/pcg/include/pcg/cpu_id_t.dtg.h b/lib/pcg/include/pcg/cpu_id_t.dtg.h
index a6c81e80b0..b5c5bdd22f 100644
--- a/lib/pcg/include/pcg/cpu_id_t.dtg.h
+++ b/lib/pcg/include/pcg/cpu_id_t.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct cpu_id_t {
   cpu_id_t() = delete;
-  cpu_id_t(int const &cpu_index);
+  explicit cpu_id_t(int const &cpu_index);
 
   bool operator==(cpu_id_t const &) const;
   bool operator!=(cpu_id_t const &) const;
@@ -34,23 +34,23 @@ struct cpu_id_t {
 
 namespace std {
 template <>
-struct hash<FlexFlow::cpu_id_t> {
-  size_t operator()(FlexFlow::cpu_id_t const &) const;
+struct hash<::FlexFlow::cpu_id_t> {
+  size_t operator()(::FlexFlow::cpu_id_t const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::cpu_id_t> {
-  static FlexFlow::cpu_id_t from_json(json const &);
-  static void to_json(json &, FlexFlow::cpu_id_t const &);
+struct adl_serializer<::FlexFlow::cpu_id_t> {
+  static ::FlexFlow::cpu_id_t from_json(json const &);
+  static void to_json(json &, ::FlexFlow::cpu_id_t const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::cpu_id_t> {
-  static Gen<FlexFlow::cpu_id_t> arbitrary();
+struct Arbitrary<::FlexFlow::cpu_id_t> {
+  static Gen<::FlexFlow::cpu_id_t> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/pcg/include/pcg/dataflow_graph/algorithms.h b/lib/pcg/include/pcg/dataflow_graph/algorithms.h
new file mode 100644
index 0000000000..7673bae41f
--- /dev/null
+++ b/lib/pcg/include/pcg/dataflow_graph/algorithms.h
@@ -0,0 +1,37 @@
+#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_DATAFLOW_GRAPH_ALGORITHMS_H
+#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_DATAFLOW_GRAPH_ALGORITHMS_H
+
+#include "pcg/dataflow_graph/dataflow_graph.h"
+
+namespace FlexFlow {
+
+template <typename NodeLabel, typename OutputLabel>
+std::vector<MultiDiOutput>
+    get_inputs(DataflowGraph<NodeLabel, OutputLabel> const &g, Node const &n) {
+  std::vector<std::pair<int, MultiDiOutput>> input_edges =
+      transform(as_vector(get_incoming_edges(g.get_raw_graph(),
+                                             std::unordered_set<Node>{n})),
+                [&](MultiDiEdge const &e) {
+                  int idx = g.idx_for_port(e.dst_idx);
+                  MultiDiOutput val = static_cast<MultiDiOutput>(e);
+                  return std::make_pair(idx, val);
+                });
+
+  return vector_from_indexed_set(input_edges);
+}
+
+template <typename NodeLabel, typename OutputLabel>
+std::vector<MultiDiOutput>
+    get_outputs(DataflowGraph<NodeLabel, OutputLabel> const &g, Node const &n) {
+  return g.get_output_map().at(n);
+}
+
+template <typename NodeLabel, typename OutputLabel>
+std::vector<Node>
+    topological_ordering(DataflowGraph<NodeLabel, OutputLabel> const &g) {
+  return get_topological_ordering(g.get_raw_graph());
+}
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/pcg/include/pcg/dataflow_graph.h b/lib/pcg/include/pcg/dataflow_graph/dataflow_graph.h
similarity index 55%
rename from lib/pcg/include/pcg/dataflow_graph.h
rename to lib/pcg/include/pcg/dataflow_graph/dataflow_graph.h
index f649c0444c..c0650bc9b4 100644
--- a/lib/pcg/include/pcg/dataflow_graph.h
+++ b/lib/pcg/include/pcg/dataflow_graph/dataflow_graph.h
@@ -1,6 +1,7 @@
 #ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_OPERATOR_GRAPH_DATAFLOW_GRAPH_H
 #define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_OPERATOR_GRAPH_DATAFLOW_GRAPH_H
 
+#include "pcg/dataflow_graph/operator_added_result.dtg.h"
 #include "utils/containers/enumerate_vector.h"
 #include "utils/graph.h"
 
@@ -13,24 +14,28 @@ struct DataflowGraph {
       : g(OutputLabelledMultiDiGraph<NodeLabel, OutputLabel>::template create<
             UnorderedOutputLabelledMultiDiGraph<NodeLabel, OutputLabel>>()) {}
 
-  std::vector<MultiDiOutput>
+  OperatorAddedResult
       add_operator(NodeLabel const &func,
                    std::vector<MultiDiOutput> const &inputs,
-                   std::vector<OutputLabel> const &outputs) {
-    Node n = this->g.add_node(func);
+                   std::vector<OutputLabel> const &output_labels) {
+    Node node = this->g.add_node(func);
     for (auto const &[idx, input] : enumerate_vector(inputs)) {
       this->g.add_edge(MultiDiEdge{
-          input.src, input.src_idx, n, this->make_port_for_idx(idx)});
+          node, this->make_port_for_idx(idx), input.src, input.src_idx});
     }
 
-    std::vector<MultiDiOutput> result;
-    for (auto const &[idx, label] : enumerate_vector(outputs)) {
-      MultiDiOutput output = MultiDiOutput{n, this->make_port_for_idx(idx)};
+    std::vector<MultiDiOutput> outputs;
+    for (auto const &[idx, label] : enumerate_vector(output_labels)) {
+      MultiDiOutput output = MultiDiOutput{node, this->make_port_for_idx(idx)};
       this->g.add_output(output, label);
-      result.push_back(output);
+      outputs.push_back(output);
     }
+    this->output_map[node] = outputs;
 
-    return result;
+    return OperatorAddedResult{
+        node,
+        outputs,
+    };
   }
 
   NodePort make_port_for_idx(int idx) {
@@ -61,9 +66,17 @@ struct DataflowGraph {
     return this->g.at(o);
   }
 
+  std::unordered_map<Node, std::vector<MultiDiOutput>> const &
+      get_output_map() const {
+    return this->output_map;
+  }
+
 private:
   OutputLabelledMultiDiGraph<NodeLabel, OutputLabel> g;
   bidict<int, NodePort> port_mapping;
+  std::unordered_map<Node, std::vector<MultiDiOutput>>
+      output_map; // NOTE(@lockshaw): temporary workaround until not tracking
+                  // outputs independent of edges in multidigraph is resolved
 };
 
 template <typename NodeLabel, typename OutputLabel>
@@ -72,6 +85,21 @@ std::unordered_set<Node>
   return get_nodes(g.get_raw_graph());
 }
 
+template <typename T>
+std::vector<T>
+    vector_from_indexed_set(std::vector<std::pair<int, T>> const &s) {
+  std::vector<std::optional<T>> result{s.size(), std::nullopt};
+  for (auto const &[idx, value] : s) {
+    assert(idx < s.size() && idx >= 0);
+    assert(!result.at(idx).has_value());
+    result.at(idx) = value;
+  }
+  return transform(result, [](std::optional<T> const &v) {
+    assert(v.has_value());
+    return v.value();
+  });
+}
+
 } // namespace FlexFlow
 
 #endif
diff --git a/lib/pcg/include/pcg/dataflow_graph/operator_added_result.dtg.h b/lib/pcg/include/pcg/dataflow_graph/operator_added_result.dtg.h
new file mode 100644
index 0000000000..9e9803b8a0
--- /dev/null
+++ b/lib/pcg/include/pcg/dataflow_graph/operator_added_result.dtg.h
@@ -0,0 +1,43 @@
+// THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
+// If you would like to modify this datatype, instead modify
+// lib/pcg/include/pcg/dataflow_graph/operator_added_result.struct.toml
+/* proj-data
+{
+  "generated_from": "62224733c501773b41f1fc63a8677949"
+}
+*/
+
+#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_DATAFLOW_GRAPH_OPERATOR_ADDED_RESULT_DTG_H
+#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_DATAFLOW_GRAPH_OPERATOR_ADDED_RESULT_DTG_H
+
+#include "fmt/format.h"
+#include "utils/fmt/vector.h"
+#include "utils/graph.h"
+#include <ostream>
+#include <tuple>
+#include <vector>
+
+namespace FlexFlow {
+struct OperatorAddedResult {
+  OperatorAddedResult() = delete;
+  explicit OperatorAddedResult(
+      ::FlexFlow::Node const &node,
+      std::vector<::FlexFlow::MultiDiOutput> const &outputs);
+
+  bool operator==(OperatorAddedResult const &) const;
+  bool operator!=(OperatorAddedResult const &) const;
+  bool operator<(OperatorAddedResult const &) const;
+  bool operator>(OperatorAddedResult const &) const;
+  bool operator<=(OperatorAddedResult const &) const;
+  bool operator>=(OperatorAddedResult const &) const;
+  ::FlexFlow::Node node;
+  std::vector<::FlexFlow::MultiDiOutput> outputs;
+};
+} // namespace FlexFlow
+
+namespace FlexFlow {
+std::string format_as(OperatorAddedResult const &);
+std::ostream &operator<<(std::ostream &, OperatorAddedResult const &);
+} // namespace FlexFlow
+
+#endif // _FLEXFLOW_LIB_PCG_INCLUDE_PCG_DATAFLOW_GRAPH_OPERATOR_ADDED_RESULT_DTG_H
diff --git a/lib/pcg/include/pcg/dataflow_graph/operator_added_result.struct.toml b/lib/pcg/include/pcg/dataflow_graph/operator_added_result.struct.toml
new file mode 100644
index 0000000000..3c9cb87e85
--- /dev/null
+++ b/lib/pcg/include/pcg/dataflow_graph/operator_added_result.struct.toml
@@ -0,0 +1,22 @@
+namespace = "FlexFlow"
+name = "OperatorAddedResult"
+
+features = [
+  "eq",
+  "ord",
+  "fmt",
+]
+
+includes = [
+  "<vector>",
+  "utils/graph.h",
+  "utils/fmt/vector.h",
+]
+
+[[fields]]
+name = "node"
+type = "::FlexFlow::Node"
+
+[[fields]]
+name = "outputs"
+type = "std::vector<::FlexFlow::MultiDiOutput>"
diff --git a/lib/pcg/include/pcg/file_format/v1/graphs.h b/lib/pcg/include/pcg/file_format/v1/graphs.h
index dad73ce142..6090d60e1a 100644
--- a/lib/pcg/include/pcg/file_format/v1/graphs.h
+++ b/lib/pcg/include/pcg/file_format/v1/graphs.h
@@ -4,9 +4,9 @@
 #include "pcg/computation_graph.dtg.h"
 #include "pcg/file_format/v1/graphs/v1_jsonable_graph.dtg.h"
 #include "pcg/layer_attrs.dtg.h"
-#include "pcg/parallel_computation_graph.dtg.h"
-#include "pcg/parallel_layer_attrs.dtg.h"
-#include "pcg/parallel_tensor_attrs.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_layer_attrs.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.h"
 #include "pcg/tensor_attrs.dtg.h"
 #include "utils/json.h"
 
diff --git a/lib/pcg/include/pcg/file_format/v1/graphs/v1_graph_edge.dtg.h b/lib/pcg/include/pcg/file_format/v1/graphs/v1_graph_edge.dtg.h
index e9238301d0..3243cca010 100644
--- a/lib/pcg/include/pcg/file_format/v1/graphs/v1_graph_edge.dtg.h
+++ b/lib/pcg/include/pcg/file_format/v1/graphs/v1_graph_edge.dtg.h
@@ -19,10 +19,10 @@
 namespace FlexFlow {
 struct V1GraphEdge {
   V1GraphEdge() = delete;
-  V1GraphEdge(size_t const &srcNode,
-              size_t const &srcIdx,
-              size_t const &dstNode,
-              size_t const &dstIdx);
+  explicit V1GraphEdge(size_t const &srcNode,
+                       size_t const &srcIdx,
+                       size_t const &dstNode,
+                       size_t const &dstIdx);
 
   bool operator==(V1GraphEdge const &) const;
   bool operator!=(V1GraphEdge const &) const;
@@ -39,16 +39,16 @@ struct V1GraphEdge {
 
 namespace std {
 template <>
-struct hash<FlexFlow::V1GraphEdge> {
-  size_t operator()(FlexFlow::V1GraphEdge const &) const;
+struct hash<::FlexFlow::V1GraphEdge> {
+  size_t operator()(::FlexFlow::V1GraphEdge const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::V1GraphEdge> {
-  static FlexFlow::V1GraphEdge from_json(json const &);
-  static void to_json(json &, FlexFlow::V1GraphEdge const &);
+struct adl_serializer<::FlexFlow::V1GraphEdge> {
+  static ::FlexFlow::V1GraphEdge from_json(json const &);
+  static void to_json(json &, ::FlexFlow::V1GraphEdge const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/pcg/include/pcg/file_format/v1/graphs/v1_graph_output.dtg.h b/lib/pcg/include/pcg/file_format/v1/graphs/v1_graph_output.dtg.h
index 730282bdb9..eb9c013b36 100644
--- a/lib/pcg/include/pcg/file_format/v1/graphs/v1_graph_output.dtg.h
+++ b/lib/pcg/include/pcg/file_format/v1/graphs/v1_graph_output.dtg.h
@@ -19,7 +19,7 @@
 namespace FlexFlow {
 struct V1GraphOutput {
   V1GraphOutput() = delete;
-  V1GraphOutput(size_t const &srcNode, size_t const &srcIdx);
+  explicit V1GraphOutput(size_t const &srcNode, size_t const &srcIdx);
 
   bool operator==(V1GraphOutput const &) const;
   bool operator!=(V1GraphOutput const &) const;
@@ -34,16 +34,16 @@ struct V1GraphOutput {
 
 namespace std {
 template <>
-struct hash<FlexFlow::V1GraphOutput> {
-  size_t operator()(FlexFlow::V1GraphOutput const &) const;
+struct hash<::FlexFlow::V1GraphOutput> {
+  size_t operator()(::FlexFlow::V1GraphOutput const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::V1GraphOutput> {
-  static FlexFlow::V1GraphOutput from_json(json const &);
-  static void to_json(json &, FlexFlow::V1GraphOutput const &);
+struct adl_serializer<::FlexFlow::V1GraphOutput> {
+  static ::FlexFlow::V1GraphOutput from_json(json const &);
+  static void to_json(json &, ::FlexFlow::V1GraphOutput const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/pcg/include/pcg/file_format/v1/graphs/v1_jsonable_graph.dtg.h b/lib/pcg/include/pcg/file_format/v1/graphs/v1_jsonable_graph.dtg.h
index f183a14a9e..c6ffb55e3b 100644
--- a/lib/pcg/include/pcg/file_format/v1/graphs/v1_jsonable_graph.dtg.h
+++ b/lib/pcg/include/pcg/file_format/v1/graphs/v1_jsonable_graph.dtg.h
@@ -22,7 +22,7 @@ namespace FlexFlow {
 template <typename NodeT, typename TensorT>
 struct V1JsonableGraph {
   V1JsonableGraph() = delete;
-  V1JsonableGraph(
+  explicit V1JsonableGraph(
       std::unordered_map<size_t, NodeT> const &node_labels,
       std::unordered_map<size_t, ::FlexFlow::V1GraphOutput> const &outputs,
       std::unordered_map<size_t, TensorT> const &output_labels,
@@ -37,10 +37,10 @@ struct V1JsonableGraph {
 
 namespace nlohmann {
 template <typename NodeT, typename TensorT>
-struct adl_serializer<FlexFlow::V1JsonableGraph<NodeT, TensorT>> {
-  static FlexFlow::V1JsonableGraph<NodeT, TensorT> from_json(json const &);
+struct adl_serializer<::FlexFlow::V1JsonableGraph<NodeT, TensorT>> {
+  static ::FlexFlow::V1JsonableGraph<NodeT, TensorT> from_json(json const &);
   static void to_json(json &,
-                      FlexFlow::V1JsonableGraph<NodeT, TensorT> const &);
+                      ::FlexFlow::V1JsonableGraph<NodeT, TensorT> const &);
 };
 } // namespace nlohmann
 
@@ -65,10 +65,10 @@ V1JsonableGraph<NodeT, TensorT>::V1JsonableGraph(
 
 namespace nlohmann {
 template <typename NodeT, typename TensorT>
-FlexFlow::V1JsonableGraph<NodeT, TensorT>
-    adl_serializer<FlexFlow::V1JsonableGraph<NodeT, TensorT>>::from_json(
+::FlexFlow::V1JsonableGraph<NodeT, TensorT>
+    adl_serializer<::FlexFlow::V1JsonableGraph<NodeT, TensorT>>::from_json(
         json const &j) {
-  return {
+  return ::FlexFlow::V1JsonableGraph<NodeT, TensorT>{
       j.at("node_labels").template get<std::unordered_map<size_t, NodeT>>(),
       j.at("outputs")
           .template get<
@@ -77,8 +77,8 @@ FlexFlow::V1JsonableGraph<NodeT, TensorT>
       j.at("graph").template get<::FlexFlow::V1MultiDiGraph>()};
 }
 template <typename NodeT, typename TensorT>
-void adl_serializer<FlexFlow::V1JsonableGraph<NodeT, TensorT>>::to_json(
-    json &j, FlexFlow::V1JsonableGraph<NodeT, TensorT> const &v) {
+void adl_serializer<::FlexFlow::V1JsonableGraph<NodeT, TensorT>>::to_json(
+    json &j, ::FlexFlow::V1JsonableGraph<NodeT, TensorT> const &v) {
   j["__type"] = "V1JsonableGraph";
   j["node_labels"] = v.node_labels;
   j["outputs"] = v.outputs;
diff --git a/lib/pcg/include/pcg/file_format/v1/graphs/v1_multidigraph.dtg.h b/lib/pcg/include/pcg/file_format/v1/graphs/v1_multidigraph.dtg.h
index 5d7edcf1d8..5b214d2b58 100644
--- a/lib/pcg/include/pcg/file_format/v1/graphs/v1_multidigraph.dtg.h
+++ b/lib/pcg/include/pcg/file_format/v1/graphs/v1_multidigraph.dtg.h
@@ -3,7 +3,7 @@
 // lib/pcg/include/pcg/file_format/v1/graphs/v1_multidigraph.struct.toml
 /* proj-data
 {
-  "generated_from": "fb1033385645e54a19c9b44cef0be04b"
+  "generated_from": "582054edb983c3cc31d9273ce29421eb"
 }
 */
 
@@ -13,7 +13,8 @@
 #include "fmt/format.h"
 #include "nlohmann/json.hpp"
 #include "pcg/file_format/v1/graphs/v1_graph_edge.dtg.h"
-#include "utils/fmt.h"
+#include "utils/fmt/unordered_set.h"
+#include "utils/fmt/vector.h"
 #include <ostream>
 #include <unordered_set>
 #include <vector>
@@ -21,9 +22,10 @@
 namespace FlexFlow {
 struct V1MultiDiGraph {
   V1MultiDiGraph() = delete;
-  V1MultiDiGraph(std::vector<size_t> const &nodes,
-                 std::vector<size_t> const &ports,
-                 std::unordered_set<::FlexFlow::V1GraphEdge> const &edges);
+  explicit V1MultiDiGraph(
+      std::vector<size_t> const &nodes,
+      std::vector<size_t> const &ports,
+      std::unordered_set<::FlexFlow::V1GraphEdge> const &edges);
 
   std::vector<size_t> nodes;
   std::vector<size_t> ports;
@@ -33,9 +35,9 @@ struct V1MultiDiGraph {
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::V1MultiDiGraph> {
-  static FlexFlow::V1MultiDiGraph from_json(json const &);
-  static void to_json(json &, FlexFlow::V1MultiDiGraph const &);
+struct adl_serializer<::FlexFlow::V1MultiDiGraph> {
+  static ::FlexFlow::V1MultiDiGraph from_json(json const &);
+  static void to_json(json &, ::FlexFlow::V1MultiDiGraph const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/pcg/include/pcg/file_format/v1/graphs/v1_multidigraph.struct.toml b/lib/pcg/include/pcg/file_format/v1/graphs/v1_multidigraph.struct.toml
index 9650f3bd43..20ca69eed4 100644
--- a/lib/pcg/include/pcg/file_format/v1/graphs/v1_multidigraph.struct.toml
+++ b/lib/pcg/include/pcg/file_format/v1/graphs/v1_multidigraph.struct.toml
@@ -13,7 +13,8 @@ includes = [
   "<vector>",
   "<unordered_set>",
   "pcg/file_format/v1/graphs/v1_graph_edge.dtg.h",
-  "utils/fmt.h",
+  "utils/fmt/vector.h",
+  "utils/fmt/unordered_set.h",
 ]
 
 [[fields]]
diff --git a/lib/pcg/include/pcg/file_format/v1/graphs/v1_operator_graph.dtg.h b/lib/pcg/include/pcg/file_format/v1/graphs/v1_operator_graph.dtg.h
index 7e5554d44a..f1e9cb5a5c 100644
--- a/lib/pcg/include/pcg/file_format/v1/graphs/v1_operator_graph.dtg.h
+++ b/lib/pcg/include/pcg/file_format/v1/graphs/v1_operator_graph.dtg.h
@@ -3,7 +3,7 @@
 // lib/pcg/include/pcg/file_format/v1/graphs/v1_operator_graph.struct.toml
 /* proj-data
 {
-  "generated_from": "5bfd7d8755cfd8cd9dbf57d5c367038e"
+  "generated_from": "fed215ca219af1bd375801eb2e33b473"
 }
 */
 
@@ -13,7 +13,8 @@
 #include "fmt/format.h"
 #include "nlohmann/json.hpp"
 #include "pcg/file_format/v1/graphs/v1_graph_edge.dtg.h"
-#include "utils/fmt.h"
+#include "utils/fmt/unordered_set.h"
+#include "utils/fmt/vector.h"
 #include <ostream>
 #include <unordered_set>
 #include <vector>
@@ -21,8 +22,9 @@
 namespace FlexFlow {
 struct V1OperatorGraph {
   V1OperatorGraph() = delete;
-  V1OperatorGraph(std::vector<size_t> const &nodes,
-                  std::unordered_set<::FlexFlow::V1GraphEdge> const &edges);
+  explicit V1OperatorGraph(
+      std::vector<size_t> const &nodes,
+      std::unordered_set<::FlexFlow::V1GraphEdge> const &edges);
 
   std::vector<size_t> nodes;
   std::unordered_set<::FlexFlow::V1GraphEdge> edges;
@@ -31,9 +33,9 @@ struct V1OperatorGraph {
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::V1OperatorGraph> {
-  static FlexFlow::V1OperatorGraph from_json(json const &);
-  static void to_json(json &, FlexFlow::V1OperatorGraph const &);
+struct adl_serializer<::FlexFlow::V1OperatorGraph> {
+  static ::FlexFlow::V1OperatorGraph from_json(json const &);
+  static void to_json(json &, ::FlexFlow::V1OperatorGraph const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/pcg/include/pcg/file_format/v1/graphs/v1_operator_graph.struct.toml b/lib/pcg/include/pcg/file_format/v1/graphs/v1_operator_graph.struct.toml
index 61dc45ae2e..2715ae176b 100644
--- a/lib/pcg/include/pcg/file_format/v1/graphs/v1_operator_graph.struct.toml
+++ b/lib/pcg/include/pcg/file_format/v1/graphs/v1_operator_graph.struct.toml
@@ -13,7 +13,8 @@ includes = [
   "<vector>",
   "<unordered_set>",
   "pcg/file_format/v1/graphs/v1_graph_edge.dtg.h",
-  "utils/fmt.h",
+  "utils/fmt/unordered_set.h",
+  "utils/fmt/vector.h",
 ]
 
 [[fields]]
diff --git a/lib/pcg/include/pcg/gpu_id_t.dtg.h b/lib/pcg/include/pcg/gpu_id_t.dtg.h
index f0847848ca..e056b8e0e3 100644
--- a/lib/pcg/include/pcg/gpu_id_t.dtg.h
+++ b/lib/pcg/include/pcg/gpu_id_t.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct gpu_id_t {
   gpu_id_t() = delete;
-  gpu_id_t(int const &gpu_index);
+  explicit gpu_id_t(int const &gpu_index);
 
   bool operator==(gpu_id_t const &) const;
   bool operator!=(gpu_id_t const &) const;
@@ -34,23 +34,23 @@ struct gpu_id_t {
 
 namespace std {
 template <>
-struct hash<FlexFlow::gpu_id_t> {
-  size_t operator()(FlexFlow::gpu_id_t const &) const;
+struct hash<::FlexFlow::gpu_id_t> {
+  size_t operator()(::FlexFlow::gpu_id_t const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::gpu_id_t> {
-  static FlexFlow::gpu_id_t from_json(json const &);
-  static void to_json(json &, FlexFlow::gpu_id_t const &);
+struct adl_serializer<::FlexFlow::gpu_id_t> {
+  static ::FlexFlow::gpu_id_t from_json(json const &);
+  static void to_json(json &, ::FlexFlow::gpu_id_t const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::gpu_id_t> {
-  static Gen<FlexFlow::gpu_id_t> arbitrary();
+struct Arbitrary<::FlexFlow::gpu_id_t> {
+  static Gen<::FlexFlow::gpu_id_t> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/pcg/include/pcg/initializer_attrs.dtg.h b/lib/pcg/include/pcg/initializer_attrs.dtg.h
index 7f5a470a90..3de94dcc86 100644
--- a/lib/pcg/include/pcg/initializer_attrs.dtg.h
+++ b/lib/pcg/include/pcg/initializer_attrs.dtg.h
@@ -3,7 +3,7 @@
 // lib/pcg/include/pcg/initializer_attrs.variant.toml
 /* proj-data
 {
-  "generated_from": "f66f3a89ea937e96a058d83ab52e2826"
+  "generated_from": "f4d932a4a7728ebfc28a23f2e6ca3201"
 }
 */
 
@@ -15,8 +15,9 @@
 #include "pcg/initializers/constant_initializer_attrs.dtg.h"
 #include "pcg/initializers/glorot_uniform_attrs.dtg.h"
 #include "pcg/initializers/norm_initializer_attrs.dtg.h"
-#include "pcg/initializers/uniform_initializer_attrs.dtg.h"
+#include "pcg/initializers/uniform_initializer_attrs.h"
 #include "pcg/initializers/zero_initializer_attrs.dtg.h"
+#include "rapidcheck.h"
 #include <cstddef>
 #include <functional>
 #include <ostream>
@@ -161,6 +162,12 @@ struct adl_serializer<::FlexFlow::InitializerAttrs> {
   static void to_json(json &, ::FlexFlow::InitializerAttrs const &);
 };
 } // namespace nlohmann
+namespace rc {
+template <>
+struct Arbitrary<::FlexFlow::InitializerAttrs> {
+  static Gen<::FlexFlow::InitializerAttrs> arbitrary();
+};
+} // namespace rc
 namespace FlexFlow {
 std::string format_as(::FlexFlow::InitializerAttrs const &);
 std::ostream &operator<<(std::ostream &, ::FlexFlow::InitializerAttrs const &);
diff --git a/lib/pcg/include/pcg/initializer_attrs.variant.toml b/lib/pcg/include/pcg/initializer_attrs.variant.toml
index 14a5cfdcac..1ea9ce05a6 100644
--- a/lib/pcg/include/pcg/initializer_attrs.variant.toml
+++ b/lib/pcg/include/pcg/initializer_attrs.variant.toml
@@ -6,12 +6,13 @@ features = [
   "hash",
   "json",
   "fmt",
+  "rapidcheck",
 ]
 
 includes = [
   "pcg/initializers/glorot_uniform_attrs.dtg.h",
   "pcg/initializers/zero_initializer_attrs.dtg.h",
-  "pcg/initializers/uniform_initializer_attrs.dtg.h",
+  "pcg/initializers/uniform_initializer_attrs.h",
   "pcg/initializers/norm_initializer_attrs.dtg.h",
   "pcg/initializers/constant_initializer_attrs.dtg.h",
 ]
diff --git a/lib/pcg/include/pcg/initializers/constant_initializer_attrs.dtg.h b/lib/pcg/include/pcg/initializers/constant_initializer_attrs.dtg.h
index 1eb9eb8834..18876046b2 100644
--- a/lib/pcg/include/pcg/initializers/constant_initializer_attrs.dtg.h
+++ b/lib/pcg/include/pcg/initializers/constant_initializer_attrs.dtg.h
@@ -3,7 +3,7 @@
 // lib/pcg/include/pcg/initializers/constant_initializer_attrs.struct.toml
 /* proj-data
 {
-  "generated_from": "0162b9c49fe6cbfc65410c6fa8dec427"
+  "generated_from": "4ffc8ccd7dfdb7674556487433ea9913"
 }
 */
 
@@ -13,6 +13,7 @@
 #include "fmt/format.h"
 #include "nlohmann/json.hpp"
 #include "op-attrs/datatype.h"
+#include "rapidcheck.h"
 #include "utils/json.h"
 #include <functional>
 #include <ostream>
@@ -21,7 +22,7 @@
 namespace FlexFlow {
 struct ConstantInitializerAttrs {
   ConstantInitializerAttrs() = delete;
-  ConstantInitializerAttrs(::FlexFlow::DataTypeValue const &value);
+  explicit ConstantInitializerAttrs(::FlexFlow::DataTypeValue const &value);
 
   bool operator==(ConstantInitializerAttrs const &) const;
   bool operator!=(ConstantInitializerAttrs const &) const;
@@ -35,19 +36,26 @@ struct ConstantInitializerAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ConstantInitializerAttrs> {
-  size_t operator()(FlexFlow::ConstantInitializerAttrs const &) const;
+struct hash<::FlexFlow::ConstantInitializerAttrs> {
+  size_t operator()(::FlexFlow::ConstantInitializerAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ConstantInitializerAttrs> {
-  static FlexFlow::ConstantInitializerAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::ConstantInitializerAttrs const &);
+struct adl_serializer<::FlexFlow::ConstantInitializerAttrs> {
+  static ::FlexFlow::ConstantInitializerAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ConstantInitializerAttrs const &);
 };
 } // namespace nlohmann
 
+namespace rc {
+template <>
+struct Arbitrary<::FlexFlow::ConstantInitializerAttrs> {
+  static Gen<::FlexFlow::ConstantInitializerAttrs> arbitrary();
+};
+} // namespace rc
+
 namespace FlexFlow {
 std::string format_as(ConstantInitializerAttrs const &);
 std::ostream &operator<<(std::ostream &, ConstantInitializerAttrs const &);
diff --git a/lib/pcg/include/pcg/initializers/constant_initializer_attrs.struct.toml b/lib/pcg/include/pcg/initializers/constant_initializer_attrs.struct.toml
index 3a80559d7b..511ec057fa 100644
--- a/lib/pcg/include/pcg/initializers/constant_initializer_attrs.struct.toml
+++ b/lib/pcg/include/pcg/initializers/constant_initializer_attrs.struct.toml
@@ -5,7 +5,7 @@ features = [
   "ord",
   "hash",
   "json",
-  # "rapidcheck",
+  "rapidcheck",
   "fmt",
 ]
 
diff --git a/lib/pcg/include/pcg/initializers/glorot_uniform_attrs.dtg.h b/lib/pcg/include/pcg/initializers/glorot_uniform_attrs.dtg.h
index 04851fb333..e6fe29a048 100644
--- a/lib/pcg/include/pcg/initializers/glorot_uniform_attrs.dtg.h
+++ b/lib/pcg/include/pcg/initializers/glorot_uniform_attrs.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct GlorotUniformAttrs {
   GlorotUniformAttrs() = delete;
-  GlorotUniformAttrs(int const &seed);
+  explicit GlorotUniformAttrs(int const &seed);
 
   bool operator==(GlorotUniformAttrs const &) const;
   bool operator!=(GlorotUniformAttrs const &) const;
@@ -34,23 +34,23 @@ struct GlorotUniformAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::GlorotUniformAttrs> {
-  size_t operator()(FlexFlow::GlorotUniformAttrs const &) const;
+struct hash<::FlexFlow::GlorotUniformAttrs> {
+  size_t operator()(::FlexFlow::GlorotUniformAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::GlorotUniformAttrs> {
-  static FlexFlow::GlorotUniformAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::GlorotUniformAttrs const &);
+struct adl_serializer<::FlexFlow::GlorotUniformAttrs> {
+  static ::FlexFlow::GlorotUniformAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::GlorotUniformAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::GlorotUniformAttrs> {
-  static Gen<FlexFlow::GlorotUniformAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::GlorotUniformAttrs> {
+  static Gen<::FlexFlow::GlorotUniformAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/pcg/include/pcg/initializers/norm_initializer_attrs.dtg.h b/lib/pcg/include/pcg/initializers/norm_initializer_attrs.dtg.h
index e1d3e59ed7..602a877c30 100644
--- a/lib/pcg/include/pcg/initializers/norm_initializer_attrs.dtg.h
+++ b/lib/pcg/include/pcg/initializers/norm_initializer_attrs.dtg.h
@@ -20,7 +20,9 @@
 namespace FlexFlow {
 struct NormInitializerAttrs {
   NormInitializerAttrs() = delete;
-  NormInitializerAttrs(int const &seed, float const &mean, float const &stddev);
+  explicit NormInitializerAttrs(int const &seed,
+                                float const &mean,
+                                float const &stddev);
 
   bool operator==(NormInitializerAttrs const &) const;
   bool operator!=(NormInitializerAttrs const &) const;
@@ -36,23 +38,23 @@ struct NormInitializerAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::NormInitializerAttrs> {
-  size_t operator()(FlexFlow::NormInitializerAttrs const &) const;
+struct hash<::FlexFlow::NormInitializerAttrs> {
+  size_t operator()(::FlexFlow::NormInitializerAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::NormInitializerAttrs> {
-  static FlexFlow::NormInitializerAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::NormInitializerAttrs const &);
+struct adl_serializer<::FlexFlow::NormInitializerAttrs> {
+  static ::FlexFlow::NormInitializerAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::NormInitializerAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::NormInitializerAttrs> {
-  static Gen<FlexFlow::NormInitializerAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::NormInitializerAttrs> {
+  static Gen<::FlexFlow::NormInitializerAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/pcg/include/pcg/initializers/uniform_initializer_attrs.dtg.h b/lib/pcg/include/pcg/initializers/uniform_initializer_attrs.dtg.h
index 1f4deada06..2ff17a9e54 100644
--- a/lib/pcg/include/pcg/initializers/uniform_initializer_attrs.dtg.h
+++ b/lib/pcg/include/pcg/initializers/uniform_initializer_attrs.dtg.h
@@ -3,7 +3,7 @@
 // lib/pcg/include/pcg/initializers/uniform_initializer_attrs.struct.toml
 /* proj-data
 {
-  "generated_from": "f887e1db5d5dc710793ec5fa99bb7cd4"
+  "generated_from": "dd9cbe65dc4495b031aef40d353db928"
 }
 */
 
@@ -19,9 +19,9 @@
 namespace FlexFlow {
 struct UniformInitializerAttrs {
   UniformInitializerAttrs() = delete;
-  UniformInitializerAttrs(int const &seed,
-                          float const &min_val,
-                          float const &max_val);
+  explicit UniformInitializerAttrs(int const &seed,
+                                   float const &min_val,
+                                   float const &max_val);
 
   bool operator==(UniformInitializerAttrs const &) const;
   bool operator!=(UniformInitializerAttrs const &) const;
@@ -37,16 +37,16 @@ struct UniformInitializerAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::UniformInitializerAttrs> {
-  size_t operator()(FlexFlow::UniformInitializerAttrs const &) const;
+struct hash<::FlexFlow::UniformInitializerAttrs> {
+  size_t operator()(::FlexFlow::UniformInitializerAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::UniformInitializerAttrs> {
-  static FlexFlow::UniformInitializerAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::UniformInitializerAttrs const &);
+struct adl_serializer<::FlexFlow::UniformInitializerAttrs> {
+  static ::FlexFlow::UniformInitializerAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::UniformInitializerAttrs const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/pcg/include/pcg/initializers/uniform_initializer_attrs.h b/lib/pcg/include/pcg/initializers/uniform_initializer_attrs.h
new file mode 100644
index 0000000000..43623e51d3
--- /dev/null
+++ b/lib/pcg/include/pcg/initializers/uniform_initializer_attrs.h
@@ -0,0 +1,16 @@
+#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_INITIALIZERS_UNIFORM_INITIALIZER_ATTRS_H
+#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_INITIALIZERS_UNIFORM_INITIALIZER_ATTRS_H
+
+#include "pcg/initializers/uniform_initializer_attrs.dtg.h"
+#include <rapidcheck.h>
+
+namespace rc {
+
+template <>
+struct Arbitrary<::FlexFlow::UniformInitializerAttrs> {
+  static Gen<::FlexFlow::UniformInitializerAttrs> arbitrary();
+};
+
+} // namespace rc
+
+#endif
diff --git a/lib/pcg/include/pcg/initializers/uniform_initializer_attrs.struct.toml b/lib/pcg/include/pcg/initializers/uniform_initializer_attrs.struct.toml
index 11a6597c0a..8ee67b9d4b 100644
--- a/lib/pcg/include/pcg/initializers/uniform_initializer_attrs.struct.toml
+++ b/lib/pcg/include/pcg/initializers/uniform_initializer_attrs.struct.toml
@@ -5,7 +5,6 @@ features = [
   "ord",
   "hash",
   "json",
-  # "rapidcheck",
   "fmt",
 ]
 
diff --git a/lib/pcg/include/pcg/initializers/zero_initializer_attrs.dtg.h b/lib/pcg/include/pcg/initializers/zero_initializer_attrs.dtg.h
index f3086ea087..7a4a8ccd1f 100644
--- a/lib/pcg/include/pcg/initializers/zero_initializer_attrs.dtg.h
+++ b/lib/pcg/include/pcg/initializers/zero_initializer_attrs.dtg.h
@@ -30,23 +30,23 @@ struct ZeroInitializerAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ZeroInitializerAttrs> {
-  size_t operator()(FlexFlow::ZeroInitializerAttrs const &) const;
+struct hash<::FlexFlow::ZeroInitializerAttrs> {
+  size_t operator()(::FlexFlow::ZeroInitializerAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ZeroInitializerAttrs> {
-  static FlexFlow::ZeroInitializerAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::ZeroInitializerAttrs const &);
+struct adl_serializer<::FlexFlow::ZeroInitializerAttrs> {
+  static ::FlexFlow::ZeroInitializerAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ZeroInitializerAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::ZeroInitializerAttrs> {
-  static Gen<FlexFlow::ZeroInitializerAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::ZeroInitializerAttrs> {
+  static Gen<::FlexFlow::ZeroInitializerAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/pcg/include/pcg/layer_attrs.dtg.h b/lib/pcg/include/pcg/layer_attrs.dtg.h
index 6afa1757dc..d856bc1401 100644
--- a/lib/pcg/include/pcg/layer_attrs.dtg.h
+++ b/lib/pcg/include/pcg/layer_attrs.dtg.h
@@ -23,8 +23,9 @@
 namespace FlexFlow {
 struct LayerAttrs {
   LayerAttrs() = delete;
-  LayerAttrs(::FlexFlow::ComputationGraphOpAttrs const &attrs,
-             std::optional<::FlexFlow::stack_string<MAX_OPNAME>> const &name);
+  explicit LayerAttrs(
+      ::FlexFlow::ComputationGraphOpAttrs const &attrs,
+      std::optional<::FlexFlow::stack_string<MAX_OPNAME>> const &name);
 
   bool operator==(LayerAttrs const &) const;
   bool operator!=(LayerAttrs const &) const;
@@ -39,16 +40,16 @@ struct LayerAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::LayerAttrs> {
-  size_t operator()(FlexFlow::LayerAttrs const &) const;
+struct hash<::FlexFlow::LayerAttrs> {
+  size_t operator()(::FlexFlow::LayerAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::LayerAttrs> {
-  static FlexFlow::LayerAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::LayerAttrs const &);
+struct adl_serializer<::FlexFlow::LayerAttrs> {
+  static ::FlexFlow::LayerAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::LayerAttrs const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/pcg/include/pcg/layer_guid_t.dtg.h b/lib/pcg/include/pcg/layer_guid_t.dtg.h
index 4bbdd36fed..9b0e3338d9 100644
--- a/lib/pcg/include/pcg/layer_guid_t.dtg.h
+++ b/lib/pcg/include/pcg/layer_guid_t.dtg.h
@@ -19,7 +19,7 @@
 namespace FlexFlow {
 struct layer_guid_t {
   layer_guid_t() = delete;
-  layer_guid_t(::FlexFlow::Node const &raw_node);
+  explicit layer_guid_t(::FlexFlow::Node const &raw_node);
 
   bool operator==(layer_guid_t const &) const;
   bool operator!=(layer_guid_t const &) const;
@@ -33,8 +33,8 @@ struct layer_guid_t {
 
 namespace std {
 template <>
-struct hash<FlexFlow::layer_guid_t> {
-  size_t operator()(FlexFlow::layer_guid_t const &) const;
+struct hash<::FlexFlow::layer_guid_t> {
+  size_t operator()(::FlexFlow::layer_guid_t const &) const;
 };
 } // namespace std
 
diff --git a/lib/pcg/include/pcg/machine_specification.dtg.h b/lib/pcg/include/pcg/machine_specification.dtg.h
index cd6ffe6c0f..8b75a6dcb4 100644
--- a/lib/pcg/include/pcg/machine_specification.dtg.h
+++ b/lib/pcg/include/pcg/machine_specification.dtg.h
@@ -19,11 +19,11 @@
 namespace FlexFlow {
 struct MachineSpecification {
   MachineSpecification() = delete;
-  MachineSpecification(int const &num_nodes,
-                       int const &num_cpus_per_node,
-                       int const &num_gpus_per_node,
-                       float const &inter_node_bandwidth,
-                       float const &intra_node_bandwidth);
+  explicit MachineSpecification(int const &num_nodes,
+                                int const &num_cpus_per_node,
+                                int const &num_gpus_per_node,
+                                float const &inter_node_bandwidth,
+                                float const &intra_node_bandwidth);
 
   bool operator==(MachineSpecification const &) const;
   bool operator!=(MachineSpecification const &) const;
@@ -41,16 +41,16 @@ struct MachineSpecification {
 
 namespace std {
 template <>
-struct hash<FlexFlow::MachineSpecification> {
-  size_t operator()(FlexFlow::MachineSpecification const &) const;
+struct hash<::FlexFlow::MachineSpecification> {
+  size_t operator()(::FlexFlow::MachineSpecification const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::MachineSpecification> {
-  static FlexFlow::MachineSpecification from_json(json const &);
-  static void to_json(json &, FlexFlow::MachineSpecification const &);
+struct adl_serializer<::FlexFlow::MachineSpecification> {
+  static ::FlexFlow::MachineSpecification from_json(json const &);
+  static void to_json(json &, ::FlexFlow::MachineSpecification const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/pcg/include/pcg/machine_view.dtg.h b/lib/pcg/include/pcg/machine_view.dtg.h
index 2eae6e2c8b..2f058bacc5 100644
--- a/lib/pcg/include/pcg/machine_view.dtg.h
+++ b/lib/pcg/include/pcg/machine_view.dtg.h
@@ -21,8 +21,8 @@
 namespace FlexFlow {
 struct MachineView {
   MachineView() = delete;
-  MachineView(::FlexFlow::device_id_t const &start,
-              ::FlexFlow::StridedRectangle const &rect);
+  explicit MachineView(::FlexFlow::device_id_t const &start,
+                       ::FlexFlow::StridedRectangle const &rect);
 
   bool operator==(MachineView const &) const;
   bool operator!=(MachineView const &) const;
@@ -37,16 +37,16 @@ struct MachineView {
 
 namespace std {
 template <>
-struct hash<FlexFlow::MachineView> {
-  size_t operator()(FlexFlow::MachineView const &) const;
+struct hash<::FlexFlow::MachineView> {
+  size_t operator()(::FlexFlow::MachineView const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::MachineView> {
-  static FlexFlow::MachineView from_json(json const &);
-  static void to_json(json &, FlexFlow::MachineView const &);
+struct adl_serializer<::FlexFlow::MachineView> {
+  static ::FlexFlow::MachineView from_json(json const &);
+  static void to_json(json &, ::FlexFlow::MachineView const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/pcg/include/pcg/num_points_t.dtg.h b/lib/pcg/include/pcg/num_points_t.dtg.h
index 3b8e0e0c6c..52c2af8e7f 100644
--- a/lib/pcg/include/pcg/num_points_t.dtg.h
+++ b/lib/pcg/include/pcg/num_points_t.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct num_points_t {
   num_points_t() = delete;
-  num_points_t(int const &unwrapped);
+  explicit num_points_t(int const &unwrapped);
 
   bool operator==(num_points_t const &) const;
   bool operator!=(num_points_t const &) const;
@@ -34,23 +34,23 @@ struct num_points_t {
 
 namespace std {
 template <>
-struct hash<FlexFlow::num_points_t> {
-  size_t operator()(FlexFlow::num_points_t const &) const;
+struct hash<::FlexFlow::num_points_t> {
+  size_t operator()(::FlexFlow::num_points_t const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::num_points_t> {
-  static FlexFlow::num_points_t from_json(json const &);
-  static void to_json(json &, FlexFlow::num_points_t const &);
+struct adl_serializer<::FlexFlow::num_points_t> {
+  static ::FlexFlow::num_points_t from_json(json const &);
+  static void to_json(json &, ::FlexFlow::num_points_t const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::num_points_t> {
-  static Gen<FlexFlow::num_points_t> arbitrary();
+struct Arbitrary<::FlexFlow::num_points_t> {
+  static Gen<::FlexFlow::num_points_t> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/pcg/include/pcg/operator_graph/operator_graph_input.dtg.h b/lib/pcg/include/pcg/operator_graph/operator_graph_input.dtg.h
index 13904f220d..f0bedc0f3d 100644
--- a/lib/pcg/include/pcg/operator_graph/operator_graph_input.dtg.h
+++ b/lib/pcg/include/pcg/operator_graph/operator_graph_input.dtg.h
@@ -19,7 +19,7 @@
 namespace FlexFlow {
 struct OperatorGraphInput {
   OperatorGraphInput() = delete;
-  OperatorGraphInput(::FlexFlow::Node const &node, int const &idx);
+  explicit OperatorGraphInput(::FlexFlow::Node const &node, int const &idx);
 
   bool operator==(OperatorGraphInput const &) const;
   bool operator!=(OperatorGraphInput const &) const;
@@ -34,8 +34,8 @@ struct OperatorGraphInput {
 
 namespace std {
 template <>
-struct hash<FlexFlow::OperatorGraphInput> {
-  size_t operator()(FlexFlow::OperatorGraphInput const &) const;
+struct hash<::FlexFlow::OperatorGraphInput> {
+  size_t operator()(::FlexFlow::OperatorGraphInput const &) const;
 };
 } // namespace std
 
diff --git a/lib/pcg/include/pcg/operator_graph/operator_graph_output.dtg.h b/lib/pcg/include/pcg/operator_graph/operator_graph_output.dtg.h
index 40bdc245b8..4a99eba273 100644
--- a/lib/pcg/include/pcg/operator_graph/operator_graph_output.dtg.h
+++ b/lib/pcg/include/pcg/operator_graph/operator_graph_output.dtg.h
@@ -19,7 +19,7 @@
 namespace FlexFlow {
 struct OperatorGraphOutput {
   OperatorGraphOutput() = delete;
-  OperatorGraphOutput(::FlexFlow::Node const &node, int const &idx);
+  explicit OperatorGraphOutput(::FlexFlow::Node const &node, int const &idx);
 
   bool operator==(OperatorGraphOutput const &) const;
   bool operator!=(OperatorGraphOutput const &) const;
@@ -34,8 +34,8 @@ struct OperatorGraphOutput {
 
 namespace std {
 template <>
-struct hash<FlexFlow::OperatorGraphOutput> {
-  size_t operator()(FlexFlow::OperatorGraphOutput const &) const;
+struct hash<::FlexFlow::OperatorGraphOutput> {
+  size_t operator()(::FlexFlow::OperatorGraphOutput const &) const;
 };
 } // namespace std
 
diff --git a/lib/pcg/include/pcg/operator_guid_t.dtg.h b/lib/pcg/include/pcg/operator_guid_t.dtg.h
deleted file mode 100644
index bf08150e5e..0000000000
--- a/lib/pcg/include/pcg/operator_guid_t.dtg.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
-// If you would like to modify this datatype, instead modify
-// lib/pcg/include/pcg/operator_guid_t.struct.toml
-/* proj-data
-{
-  "generated_from": "348b5a610f4ff6f545884564ee9a1e6a"
-}
-*/
-
-#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_OPERATOR_GUID_T_DTG_H
-#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_OPERATOR_GUID_T_DTG_H
-
-#include "fmt/format.h"
-#include "utils/graph.h"
-#include <functional>
-#include <ostream>
-#include <tuple>
-
-namespace FlexFlow {
-struct operator_guid_t {
-  operator_guid_t() = delete;
-  operator_guid_t(::FlexFlow::Node const &raw_graph_node);
-
-  bool operator==(operator_guid_t const &) const;
-  bool operator!=(operator_guid_t const &) const;
-  bool operator<(operator_guid_t const &) const;
-  bool operator>(operator_guid_t const &) const;
-  bool operator<=(operator_guid_t const &) const;
-  bool operator>=(operator_guid_t const &) const;
-  ::FlexFlow::Node raw_graph_node;
-};
-} // namespace FlexFlow
-
-namespace std {
-template <>
-struct hash<FlexFlow::operator_guid_t> {
-  size_t operator()(FlexFlow::operator_guid_t const &) const;
-};
-} // namespace std
-
-namespace FlexFlow {
-std::string format_as(operator_guid_t const &);
-std::ostream &operator<<(std::ostream &, operator_guid_t const &);
-} // namespace FlexFlow
-
-#endif // _FLEXFLOW_LIB_PCG_INCLUDE_PCG_OPERATOR_GUID_T_DTG_H
diff --git a/lib/pcg/include/pcg/optimizers/adam_optimizer_attrs.dtg.h b/lib/pcg/include/pcg/optimizers/adam_optimizer_attrs.dtg.h
index a5a6a5ed0a..1dfbb4a4e1 100644
--- a/lib/pcg/include/pcg/optimizers/adam_optimizer_attrs.dtg.h
+++ b/lib/pcg/include/pcg/optimizers/adam_optimizer_attrs.dtg.h
@@ -20,13 +20,13 @@
 namespace FlexFlow {
 struct AdamOptimizerAttrs {
   AdamOptimizerAttrs() = delete;
-  AdamOptimizerAttrs(double const &alpha,
-                     double const &beta1,
-                     double const &beta2,
-                     double const &weight_decay,
-                     double const &alpha_t,
-                     double const &beta_t,
-                     double const &beta2_t);
+  explicit AdamOptimizerAttrs(double const &alpha,
+                              double const &beta1,
+                              double const &beta2,
+                              double const &weight_decay,
+                              double const &alpha_t,
+                              double const &beta_t,
+                              double const &beta2_t);
 
   bool operator==(AdamOptimizerAttrs const &) const;
   bool operator!=(AdamOptimizerAttrs const &) const;
@@ -46,23 +46,23 @@ struct AdamOptimizerAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::AdamOptimizerAttrs> {
-  size_t operator()(FlexFlow::AdamOptimizerAttrs const &) const;
+struct hash<::FlexFlow::AdamOptimizerAttrs> {
+  size_t operator()(::FlexFlow::AdamOptimizerAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::AdamOptimizerAttrs> {
-  static FlexFlow::AdamOptimizerAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::AdamOptimizerAttrs const &);
+struct adl_serializer<::FlexFlow::AdamOptimizerAttrs> {
+  static ::FlexFlow::AdamOptimizerAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::AdamOptimizerAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::AdamOptimizerAttrs> {
-  static Gen<FlexFlow::AdamOptimizerAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::AdamOptimizerAttrs> {
+  static Gen<::FlexFlow::AdamOptimizerAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/pcg/include/pcg/optimizers/sgd_optimizer_attrs.dtg.h b/lib/pcg/include/pcg/optimizers/sgd_optimizer_attrs.dtg.h
index f6a17f2354..5fa33bfbe7 100644
--- a/lib/pcg/include/pcg/optimizers/sgd_optimizer_attrs.dtg.h
+++ b/lib/pcg/include/pcg/optimizers/sgd_optimizer_attrs.dtg.h
@@ -20,10 +20,10 @@
 namespace FlexFlow {
 struct SGDOptimizerAttrs {
   SGDOptimizerAttrs() = delete;
-  SGDOptimizerAttrs(double const &lr,
-                    double const &momentum,
-                    bool const &nesterov,
-                    double const &weight_decay);
+  explicit SGDOptimizerAttrs(double const &lr,
+                             double const &momentum,
+                             bool const &nesterov,
+                             double const &weight_decay);
 
   bool operator==(SGDOptimizerAttrs const &) const;
   bool operator!=(SGDOptimizerAttrs const &) const;
@@ -40,23 +40,23 @@ struct SGDOptimizerAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::SGDOptimizerAttrs> {
-  size_t operator()(FlexFlow::SGDOptimizerAttrs const &) const;
+struct hash<::FlexFlow::SGDOptimizerAttrs> {
+  size_t operator()(::FlexFlow::SGDOptimizerAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::SGDOptimizerAttrs> {
-  static FlexFlow::SGDOptimizerAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::SGDOptimizerAttrs const &);
+struct adl_serializer<::FlexFlow::SGDOptimizerAttrs> {
+  static ::FlexFlow::SGDOptimizerAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::SGDOptimizerAttrs const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::SGDOptimizerAttrs> {
-  static Gen<FlexFlow::SGDOptimizerAttrs> arbitrary();
+struct Arbitrary<::FlexFlow::SGDOptimizerAttrs> {
+  static Gen<::FlexFlow::SGDOptimizerAttrs> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/pcg/include/pcg/parallel_computation_graph.h b/lib/pcg/include/pcg/parallel_computation_graph.h
deleted file mode 100644
index 9d7103f4fd..0000000000
--- a/lib/pcg/include/pcg/parallel_computation_graph.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _FLEXFLOW_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_H
-#define _FLEXFLOW_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_H
-
-#include "pcg/parallel_computation_graph_t.h"
-
-namespace FlexFlow {}
-
-#endif
diff --git a/lib/pcg/include/pcg/parallel_computation_graph.dtg.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.dtg.h
similarity index 61%
rename from lib/pcg/include/pcg/parallel_computation_graph.dtg.h
rename to lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.dtg.h
index 01fbb7d30c..a6f9f9455e 100644
--- a/lib/pcg/include/pcg/parallel_computation_graph.dtg.h
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.dtg.h
@@ -1,23 +1,23 @@
 // THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
 // If you would like to modify this datatype, instead modify
-// lib/pcg/include/pcg/parallel_computation_graph.struct.toml
+// lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.struct.toml
 /* proj-data
 {
-  "generated_from": "e4db0f603f7b8947dda13e01f96c40fb"
+  "generated_from": "1339be6e86e9818c36d6ecf5475e2d4b"
 }
 */
 
-#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_DTG_H
-#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_DTG_H
+#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_COMPUTATION_GRAPH_DTG_H
+#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_COMPUTATION_GRAPH_DTG_H
 
-#include "pcg/dataflow_graph.h"
-#include "pcg/parallel_layer_attrs.dtg.h"
-#include "pcg/parallel_tensor_attrs.dtg.h"
+#include "pcg/dataflow_graph/dataflow_graph.h"
+#include "pcg/parallel_computation_graph/parallel_layer_attrs.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.h"
 
 namespace FlexFlow {
 struct ParallelComputationGraph {
   ParallelComputationGraph() = delete;
-  ParallelComputationGraph(
+  explicit ParallelComputationGraph(
       ::FlexFlow::DataflowGraph<::FlexFlow::ParallelLayerAttrs,
                                 ::FlexFlow::ParallelTensorAttrs> const
           &raw_graph);
@@ -28,4 +28,4 @@ struct ParallelComputationGraph {
 };
 } // namespace FlexFlow
 
-#endif // _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_DTG_H
+#endif // _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_COMPUTATION_GRAPH_DTG_H
diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h
new file mode 100644
index 0000000000..a320a4bbc1
--- /dev/null
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h
@@ -0,0 +1,42 @@
+#ifndef _FLEXFLOW_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_H
+#define _FLEXFLOW_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_H
+
+#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_layer_added_result.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h"
+
+namespace FlexFlow {
+
+ParallelComputationGraph empty_parallel_computation_graph();
+
+std::unordered_set<parallel_layer_guid_t>
+    get_parallel_layers(ParallelComputationGraph const &);
+
+ParallelLayerAddedResult
+    add_parallel_layer(ParallelComputationGraph &pcg,
+                       ParallelLayerAttrs const &layer_attrs,
+                       std::vector<parallel_tensor_guid_t> const &inputs,
+                       std::vector<ParallelTensorAttrs> const &output_labels);
+
+std::vector<parallel_tensor_guid_t>
+    get_layer_inputs(ParallelComputationGraph const &,
+                     parallel_layer_guid_t const &);
+std::vector<parallel_tensor_guid_t>
+    get_layer_outputs(ParallelComputationGraph const &,
+                      parallel_layer_guid_t const &);
+
+parallel_layer_guid_t get_source_layer(ParallelComputationGraph const &,
+                                       parallel_tensor_guid_t const &);
+
+ParallelLayerAttrs get_parallel_layer_attrs(ParallelComputationGraph const &,
+                                            parallel_layer_guid_t const &);
+ParallelTensorAttrs get_parallel_tensor_attrs(ParallelComputationGraph const &,
+                                              parallel_tensor_guid_t const &);
+
+std::vector<parallel_layer_guid_t>
+    topological_ordering(ParallelComputationGraph const &);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/pcg/include/pcg/parallel_computation_graph.struct.toml b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.struct.toml
similarity index 56%
rename from lib/pcg/include/pcg/parallel_computation_graph.struct.toml
rename to lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.struct.toml
index d4e305abe5..759a8424d5 100644
--- a/lib/pcg/include/pcg/parallel_computation_graph.struct.toml
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.struct.toml
@@ -3,9 +3,9 @@ name = "ParallelComputationGraph"
 features = [ ]
 
 includes = [
-  "pcg/dataflow_graph.h",
-  "pcg/parallel_tensor_attrs.dtg.h",
-  "pcg/parallel_layer_attrs.dtg.h",
+  "pcg/dataflow_graph/dataflow_graph.h",
+  "pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.h",
+  "pcg/parallel_computation_graph/parallel_layer_attrs.dtg.h",
 ]
 
 [[fields]]
diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_builder.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_builder.h
new file mode 100644
index 0000000000..5b34ee641a
--- /dev/null
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_builder.h
@@ -0,0 +1,146 @@
+#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_BUILDER_H
+#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_BUILDER_H
+
+#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h"
+#include <optional>
+
+namespace FlexFlow {
+
+struct ParallelComputationGraphBuilder {
+public:
+  ParallelComputationGraphBuilder();
+
+  parallel_tensor_guid_t create_input_tensor(
+      ParallelTensorShape const &shape,
+      bool create_grad = true,
+      std::optional<std::string> const &name = std::nullopt);
+
+  parallel_tensor_guid_t
+      add(parallel_tensor_guid_t const &lhs,
+          parallel_tensor_guid_t const &rhs,
+          std::optional<std::string> const &name = std::nullopt);
+
+  parallel_tensor_guid_t
+      batch_matmul(parallel_tensor_guid_t const &a,
+                   parallel_tensor_guid_t const &b,
+                   std::optional<std::string> const &name = std::nullopt);
+
+  parallel_tensor_guid_t
+      cast(parallel_tensor_guid_t const &input,
+           DataType result_type,
+           std::optional<std::string> const &name = std::nullopt);
+
+  parallel_tensor_guid_t conv2d(
+      parallel_tensor_guid_t const &input,
+      int outChannels,
+      int kernelH,
+      int kernelW,
+      int strideH,
+      int strideW,
+      int paddingH,
+      int paddingW,
+      std::optional<Activation> const &activation = std::nullopt,
+      int groups = 1,
+      bool use_bias = true,
+      std::optional<InitializerAttrs> const &kernel_initializer = std::nullopt,
+      std::optional<InitializerAttrs> const &bias_initializer = std::nullopt,
+      std::optional<RegularizerAttrs> const &kernel_regularizer = std::nullopt,
+      std::optional<std::string> const &name = std::nullopt);
+
+  parallel_tensor_guid_t dense(
+      parallel_tensor_guid_t const &input,
+      int outDim,
+      std::optional<Activation> activation = std::nullopt,
+      bool use_bias = true,
+      DataType data_type = DataType::FLOAT,
+      std::optional<InitializerAttrs> const &kernel_initializer = std::nullopt,
+      std::optional<InitializerAttrs> const &bias_initializer = std::nullopt,
+      std::optional<std::string> const &name = std::nullopt);
+
+  parallel_tensor_guid_t embedding(
+      parallel_tensor_guid_t const &input,
+      int num_entries,
+      int outDim,
+      AggregateOp aggr,
+      DataType dtype = DataType::FLOAT,
+      std::optional<InitializerAttrs> const &kernel_initializer = std::nullopt,
+      std::optional<std::string> const &name = std::nullopt);
+
+  parallel_tensor_guid_t multihead_attention(
+      parallel_tensor_guid_t const &query,
+      parallel_tensor_guid_t const &key,
+      parallel_tensor_guid_t const &value,
+      int embed_dim,
+      int num_heads,
+      std::optional<int> kdim = std::nullopt,
+      std::optional<int> vdim = std::nullopt,
+      float dropout = 0.0f,
+      bool bias = true,
+      bool add_bias_kv = false,
+      bool add_zero_attn = false,
+      std::optional<InitializerAttrs> initializer = std::nullopt,
+      std::optional<InitializerAttrs> input_bias_initializer = std::nullopt,
+      std::optional<InitializerAttrs> output_bias_initializer = std::nullopt,
+      std::optional<std::string> const &name = std::nullopt);
+
+  parallel_tensor_guid_t
+      relu(parallel_tensor_guid_t const &x,
+           std::optional<std::string> const &name = std::nullopt);
+
+  parallel_tensor_guid_t
+      parallel_partition(parallel_tensor_guid_t const &x,
+                         ff_dim_t dim,
+                         int degree,
+                         std::optional<std::string> const &name = std::nullopt);
+  parallel_tensor_guid_t
+      parallel_combine(parallel_tensor_guid_t const &x,
+                       ff_dim_t dim,
+                       int degree,
+                       std::optional<std::string> const &name = std::nullopt);
+  parallel_tensor_guid_t
+      parallel_replicate(parallel_tensor_guid_t const &x,
+                         int degree,
+                         std::optional<std::string> const &name = std::nullopt);
+  parallel_tensor_guid_t
+      parallel_reduce(parallel_tensor_guid_t const &x,
+                      int degree,
+                      std::optional<std::string> const &name = std::nullopt);
+
+private:
+  parallel_tensor_guid_t as_type(parallel_tensor_guid_t const &,
+                                 DataType,
+                                 std::string const &name);
+
+private:
+  ParallelTensorShape get_shape(parallel_tensor_guid_t const &) const;
+
+private:
+  std::vector<parallel_tensor_guid_t>
+      add_layer(ParallelLayerAttrs const &layer,
+                std::vector<parallel_tensor_guid_t> const &inputs,
+                std::vector<ParallelTensorAttrs> const &weights,
+                std::vector<ParallelTensorAttrs> const &outputs);
+  std::vector<parallel_tensor_guid_t>
+      add_layer(ParallelLayerAttrs const &layer,
+                std::vector<parallel_tensor_guid_t> const &inputs,
+                std::vector<ParallelTensorAttrs> const &weights,
+                std::vector<ParallelTensorShape> const &output);
+  parallel_tensor_guid_t
+      add_layer(ParallelLayerAttrs const &layer,
+                std::vector<parallel_tensor_guid_t> const &inputs,
+                std::vector<ParallelTensorAttrs> const &weights,
+                ParallelTensorAttrs const &output);
+  parallel_tensor_guid_t
+      add_layer(ParallelLayerAttrs const &layer,
+                std::vector<parallel_tensor_guid_t> const &inputs,
+                std::vector<ParallelTensorAttrs> const &weights,
+                ParallelTensorShape const &output);
+
+public:
+  ParallelComputationGraph pcg;
+};
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_added_result.dtg.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_added_result.dtg.h
new file mode 100644
index 0000000000..8b59ab2b2f
--- /dev/null
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_added_result.dtg.h
@@ -0,0 +1,44 @@
+// THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
+// If you would like to modify this datatype, instead modify
+// lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_added_result.struct.toml
+/* proj-data
+{
+  "generated_from": "cb4fa8a3a6319d9b7de628a58d08bfed"
+}
+*/
+
+#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_LAYER_ADDED_RESULT_DTG_H
+#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_LAYER_ADDED_RESULT_DTG_H
+
+#include "fmt/format.h"
+#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h"
+#include "utils/fmt/vector.h"
+#include <ostream>
+#include <tuple>
+#include <vector>
+
+namespace FlexFlow {
+struct ParallelLayerAddedResult {
+  ParallelLayerAddedResult() = delete;
+  explicit ParallelLayerAddedResult(
+      ::FlexFlow::parallel_layer_guid_t const &parallel_layer,
+      std::vector<::FlexFlow::parallel_tensor_guid_t> const &outputs);
+
+  bool operator==(ParallelLayerAddedResult const &) const;
+  bool operator!=(ParallelLayerAddedResult const &) const;
+  bool operator<(ParallelLayerAddedResult const &) const;
+  bool operator>(ParallelLayerAddedResult const &) const;
+  bool operator<=(ParallelLayerAddedResult const &) const;
+  bool operator>=(ParallelLayerAddedResult const &) const;
+  ::FlexFlow::parallel_layer_guid_t parallel_layer;
+  std::vector<::FlexFlow::parallel_tensor_guid_t> outputs;
+};
+} // namespace FlexFlow
+
+namespace FlexFlow {
+std::string format_as(ParallelLayerAddedResult const &);
+std::ostream &operator<<(std::ostream &, ParallelLayerAddedResult const &);
+} // namespace FlexFlow
+
+#endif // _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_LAYER_ADDED_RESULT_DTG_H
diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_added_result.struct.toml b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_added_result.struct.toml
new file mode 100644
index 0000000000..f3113255ef
--- /dev/null
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_added_result.struct.toml
@@ -0,0 +1,23 @@
+namespace = "FlexFlow"
+name = "ParallelLayerAddedResult"
+
+features = [
+  "eq",
+  "ord",
+  "fmt",
+]
+
+includes = [
+  "<vector>",
+  "utils/fmt/vector.h",
+  "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h",
+  "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h",
+]
+
+[[fields]]
+name = "parallel_layer"
+type = "::FlexFlow::parallel_layer_guid_t"
+
+[[fields]]
+name = "outputs"
+type = "std::vector<::FlexFlow::parallel_tensor_guid_t>"
diff --git a/lib/pcg/include/pcg/parallel_layer_attrs.dtg.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_attrs.dtg.h
similarity index 53%
rename from lib/pcg/include/pcg/parallel_layer_attrs.dtg.h
rename to lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_attrs.dtg.h
index 4c7fce4038..8b23599f1d 100644
--- a/lib/pcg/include/pcg/parallel_layer_attrs.dtg.h
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_attrs.dtg.h
@@ -1,18 +1,19 @@
 // THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
 // If you would like to modify this datatype, instead modify
-// lib/pcg/include/pcg/parallel_layer_attrs.struct.toml
+// lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_attrs.struct.toml
 /* proj-data
 {
-  "generated_from": "97fa0b11c59ae892a8a530ffd67e33ad"
+  "generated_from": "1b3a0491865fd43c79afcf4939b56fae"
 }
 */
 
-#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_LAYER_ATTRS_DTG_H
-#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_LAYER_ATTRS_DTG_H
+#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_LAYER_ATTRS_DTG_H
+#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_LAYER_ATTRS_DTG_H
 
 #include "fmt/format.h"
 #include "nlohmann/json.hpp"
 #include "op-attrs/operator_attrs.h"
+#include "rapidcheck.h"
 #include "utils/stack_string.h"
 #include <functional>
 #include <optional>
@@ -22,8 +23,8 @@
 namespace FlexFlow {
 struct ParallelLayerAttrs {
   ParallelLayerAttrs() = delete;
-  ParallelLayerAttrs(
-      ::FlexFlow::PCGOperatorAttrs const &attrs,
+  explicit ParallelLayerAttrs(
+      ::FlexFlow::PCGOperatorAttrs const &op_attrs,
       std::optional<::FlexFlow::stack_string<MAX_OPNAME>> const &name);
 
   bool operator==(ParallelLayerAttrs const &) const;
@@ -32,29 +33,36 @@ struct ParallelLayerAttrs {
   bool operator>(ParallelLayerAttrs const &) const;
   bool operator<=(ParallelLayerAttrs const &) const;
   bool operator>=(ParallelLayerAttrs const &) const;
-  ::FlexFlow::PCGOperatorAttrs attrs;
+  ::FlexFlow::PCGOperatorAttrs op_attrs;
   std::optional<::FlexFlow::stack_string<MAX_OPNAME>> name;
 };
 } // namespace FlexFlow
 
 namespace std {
 template <>
-struct hash<FlexFlow::ParallelLayerAttrs> {
-  size_t operator()(FlexFlow::ParallelLayerAttrs const &) const;
+struct hash<::FlexFlow::ParallelLayerAttrs> {
+  size_t operator()(::FlexFlow::ParallelLayerAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ParallelLayerAttrs> {
-  static FlexFlow::ParallelLayerAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::ParallelLayerAttrs const &);
+struct adl_serializer<::FlexFlow::ParallelLayerAttrs> {
+  static ::FlexFlow::ParallelLayerAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ParallelLayerAttrs const &);
 };
 } // namespace nlohmann
 
+namespace rc {
+template <>
+struct Arbitrary<::FlexFlow::ParallelLayerAttrs> {
+  static Gen<::FlexFlow::ParallelLayerAttrs> arbitrary();
+};
+} // namespace rc
+
 namespace FlexFlow {
 std::string format_as(ParallelLayerAttrs const &);
 std::ostream &operator<<(std::ostream &, ParallelLayerAttrs const &);
 } // namespace FlexFlow
 
-#endif // _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_LAYER_ATTRS_DTG_H
+#endif // _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_LAYER_ATTRS_DTG_H
diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_attrs.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_attrs.h
new file mode 100644
index 0000000000..2b1a082a85
--- /dev/null
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_attrs.h
@@ -0,0 +1,12 @@
+#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_LAYER_ATTRS_H
+#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_LAYER_ATTRS_H
+
+#include "pcg/parallel_computation_graph/parallel_layer_attrs.dtg.h"
+
+namespace FlexFlow {
+
+OperatorType get_op_type(ParallelLayerAttrs const &);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/pcg/include/pcg/parallel_layer_attrs.struct.toml b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_attrs.struct.toml
similarity index 90%
rename from lib/pcg/include/pcg/parallel_layer_attrs.struct.toml
rename to lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_attrs.struct.toml
index 9b1f8f47aa..1ba9ac5487 100644
--- a/lib/pcg/include/pcg/parallel_layer_attrs.struct.toml
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_attrs.struct.toml
@@ -5,7 +5,7 @@ features = [
   "ord",
   "hash",
   "json",
-  # "rapidcheck",
+  "rapidcheck",
   "fmt",
 ]
 
@@ -16,7 +16,7 @@ includes = [
 ]
 
 [[fields]]
-name = "attrs"
+name = "op_attrs"
 type = "::FlexFlow::PCGOperatorAttrs"
 
 [[fields]]
diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h
new file mode 100644
index 0000000000..c204a5f95c
--- /dev/null
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h
@@ -0,0 +1,46 @@
+// THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
+// If you would like to modify this datatype, instead modify
+// lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.struct.toml
+/* proj-data
+{
+  "generated_from": "c31301efeb92e151b04943786aa7bec1"
+}
+*/
+
+#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_LAYER_GUID_T_DTG_H
+#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_LAYER_GUID_T_DTG_H
+
+#include "fmt/format.h"
+#include "utils/graph.h"
+#include <functional>
+#include <ostream>
+#include <tuple>
+
+namespace FlexFlow {
+struct parallel_layer_guid_t {
+  parallel_layer_guid_t() = delete;
+  explicit parallel_layer_guid_t(::FlexFlow::Node const &raw_graph_node);
+
+  bool operator==(parallel_layer_guid_t const &) const;
+  bool operator!=(parallel_layer_guid_t const &) const;
+  bool operator<(parallel_layer_guid_t const &) const;
+  bool operator>(parallel_layer_guid_t const &) const;
+  bool operator<=(parallel_layer_guid_t const &) const;
+  bool operator>=(parallel_layer_guid_t const &) const;
+  ::FlexFlow::Node raw_graph_node;
+};
+} // namespace FlexFlow
+
+namespace std {
+template <>
+struct hash<::FlexFlow::parallel_layer_guid_t> {
+  size_t operator()(::FlexFlow::parallel_layer_guid_t const &) const;
+};
+} // namespace std
+
+namespace FlexFlow {
+std::string format_as(parallel_layer_guid_t const &);
+std::ostream &operator<<(std::ostream &, parallel_layer_guid_t const &);
+} // namespace FlexFlow
+
+#endif // _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_LAYER_GUID_T_DTG_H
diff --git a/lib/pcg/include/pcg/operator_guid_t.struct.toml b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.struct.toml
similarity index 86%
rename from lib/pcg/include/pcg/operator_guid_t.struct.toml
rename to lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.struct.toml
index f89d30137e..63fb25a45b 100644
--- a/lib/pcg/include/pcg/operator_guid_t.struct.toml
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.struct.toml
@@ -1,5 +1,5 @@
 namespace = "FlexFlow"
-name = "operator_guid_t"
+name = "parallel_layer_guid_t"
 features = [
   "eq",
   "ord",
diff --git a/lib/pcg/include/pcg/parallel_tensor_attrs.dtg.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.h
similarity index 62%
rename from lib/pcg/include/pcg/parallel_tensor_attrs.dtg.h
rename to lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.h
index fa6b153b0a..c6baa1e138 100644
--- a/lib/pcg/include/pcg/parallel_tensor_attrs.dtg.h
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.h
@@ -1,14 +1,14 @@
 // THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
 // If you would like to modify this datatype, instead modify
-// lib/pcg/include/pcg/parallel_tensor_attrs.struct.toml
+// lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_attrs.struct.toml
 /* proj-data
 {
-  "generated_from": "b3e086b380bbc41d99332e1463a34b28"
+  "generated_from": "3d641c90950f49a7bef664d0153c97f6"
 }
 */
 
-#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_TENSOR_ATTRS_DTG_H
-#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_TENSOR_ATTRS_DTG_H
+#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_TENSOR_ATTRS_DTG_H
+#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_TENSOR_ATTRS_DTG_H
 
 #include "fmt/format.h"
 #include "nlohmann/json.hpp"
@@ -16,6 +16,7 @@
 #include "op-attrs/param_sync.dtg.h"
 #include "pcg/create_grad.dtg.h"
 #include "pcg/initializer_attrs.dtg.h"
+#include "rapidcheck.h"
 #include <functional>
 #include <optional>
 #include <ostream>
@@ -24,7 +25,7 @@
 namespace FlexFlow {
 struct ParallelTensorAttrs {
   ParallelTensorAttrs() = delete;
-  ParallelTensorAttrs(
+  explicit ParallelTensorAttrs(
       ::FlexFlow::ParallelTensorShape const &shape,
       std::optional<::FlexFlow::ParamSync> const &sync_type,
       std::optional<::FlexFlow::InitializerAttrs> const &initializer,
@@ -45,22 +46,29 @@ struct ParallelTensorAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ParallelTensorAttrs> {
-  size_t operator()(FlexFlow::ParallelTensorAttrs const &) const;
+struct hash<::FlexFlow::ParallelTensorAttrs> {
+  size_t operator()(::FlexFlow::ParallelTensorAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::ParallelTensorAttrs> {
-  static FlexFlow::ParallelTensorAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::ParallelTensorAttrs const &);
+struct adl_serializer<::FlexFlow::ParallelTensorAttrs> {
+  static ::FlexFlow::ParallelTensorAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::ParallelTensorAttrs const &);
 };
 } // namespace nlohmann
 
+namespace rc {
+template <>
+struct Arbitrary<::FlexFlow::ParallelTensorAttrs> {
+  static Gen<::FlexFlow::ParallelTensorAttrs> arbitrary();
+};
+} // namespace rc
+
 namespace FlexFlow {
 std::string format_as(ParallelTensorAttrs const &);
 std::ostream &operator<<(std::ostream &, ParallelTensorAttrs const &);
 } // namespace FlexFlow
 
-#endif // _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_TENSOR_ATTRS_DTG_H
+#endif // _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_TENSOR_ATTRS_DTG_H
diff --git a/lib/pcg/include/pcg/parallel_tensor_attrs.struct.toml b/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_attrs.struct.toml
similarity index 96%
rename from lib/pcg/include/pcg/parallel_tensor_attrs.struct.toml
rename to lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_attrs.struct.toml
index 1f81b56ec8..faf7159ad7 100644
--- a/lib/pcg/include/pcg/parallel_tensor_attrs.struct.toml
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_attrs.struct.toml
@@ -5,7 +5,7 @@ features = [
   "ord",
   "hash",
   "json",
-  # "rapidcheck",
+  "rapidcheck",
   "fmt",
 ]
 
diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h
new file mode 100644
index 0000000000..55a1ebcc75
--- /dev/null
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h
@@ -0,0 +1,47 @@
+// THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
+// If you would like to modify this datatype, instead modify
+// lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.struct.toml
+/* proj-data
+{
+  "generated_from": "de2c2d33bfa5cd72f0e51954d6879f38"
+}
+*/
+
+#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_TENSOR_GUID_T_DTG_H
+#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_TENSOR_GUID_T_DTG_H
+
+#include "fmt/format.h"
+#include "utils/graph/multidiedge.h"
+#include <functional>
+#include <ostream>
+#include <tuple>
+
+namespace FlexFlow {
+struct parallel_tensor_guid_t {
+  parallel_tensor_guid_t() = delete;
+  explicit parallel_tensor_guid_t(
+      ::FlexFlow::MultiDiOutput const &raw_graph_output);
+
+  bool operator==(parallel_tensor_guid_t const &) const;
+  bool operator!=(parallel_tensor_guid_t const &) const;
+  bool operator<(parallel_tensor_guid_t const &) const;
+  bool operator>(parallel_tensor_guid_t const &) const;
+  bool operator<=(parallel_tensor_guid_t const &) const;
+  bool operator>=(parallel_tensor_guid_t const &) const;
+  ::FlexFlow::MultiDiOutput raw_graph_output;
+};
+} // namespace FlexFlow
+
+namespace std {
+template <>
+struct hash<::FlexFlow::parallel_tensor_guid_t> {
+  size_t operator()(::FlexFlow::parallel_tensor_guid_t const &) const;
+};
+} // namespace std
+
+namespace FlexFlow {
+std::string format_as(parallel_tensor_guid_t const &);
+std::ostream &operator<<(std::ostream &, parallel_tensor_guid_t const &);
+} // namespace FlexFlow
+
+#endif // _FLEXFLOW_LIB_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_PARALLEL_TENSOR_GUID_T_DTG_H
diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.struct.toml b/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.struct.toml
new file mode 100644
index 0000000000..7837d7b39b
--- /dev/null
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.struct.toml
@@ -0,0 +1,16 @@
+namespace = "FlexFlow"
+name = "parallel_tensor_guid_t"
+features = [
+  "eq",
+  "ord",
+  "hash",
+  "fmt",
+]
+
+includes = [
+  "utils/graph/multidiedge.h"
+]
+
+[[fields]]
+name = "raw_graph_output"
+type = "::FlexFlow::MultiDiOutput"
diff --git a/lib/pcg/include/pcg/parallel_tensor.h b/lib/pcg/include/pcg/parallel_tensor.h
deleted file mode 100644
index de41e0fb21..0000000000
--- a/lib/pcg/include/pcg/parallel_tensor.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/**
- * @file parallel_tensor.h
- * @brief Parallel Tensor Representation
- *
- * @copyright Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford
- * (alphabetical)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _FLEXFLOW_PCG_INCLUDE_PCG_PARALLEL_TENSOR_H
-#define _FLEXFLOW_PCG_INCLUDE_PCG_PARALLEL_TENSOR_H
-
-#include "pcg/parallel_tensor_attrs.h"
-
-namespace FlexFlow {} // namespace FlexFlow
-
-namespace FlexFlow {
-static_assert(is_well_behaved_value_type<ParallelTensorAttrs>::value, "");
-}
-
-#endif
diff --git a/lib/pcg/include/pcg/parallel_tensor_guid_t.h b/lib/pcg/include/pcg/parallel_tensor_guid_t.h
deleted file mode 100644
index db8f84b7e2..0000000000
--- a/lib/pcg/include/pcg/parallel_tensor_guid_t.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef _FLEXFLOW_PCG_INCLUDE_PCG_PARALLEL_TENSOR_GUID_T_H
-#define _FLEXFLOW_PCG_INCLUDE_PCG_PARALLEL_TENSOR_GUID_T_H
-
-#include "utils/graph/multidiedge.h"
-#include "utils/strong_typedef.h"
-
-namespace FlexFlow {
-
-struct parallel_tensor_guid_t
-    : strong_typedef<parallel_tensor_guid_t, MultiDiOutput> {
-  using strong_typedef::strong_typedef;
-};
-FF_TYPEDEF_HASHABLE(parallel_tensor_guid_t);
-FF_TYPEDEF_PRINTABLE(parallel_tensor_guid_t, "parallel_tensor_guid");
-
-} // namespace FlexFlow
-
-#endif
diff --git a/lib/pcg/include/pcg/side_size_t.dtg.h b/lib/pcg/include/pcg/side_size_t.dtg.h
index fce31b1c9d..a0d65a0e6b 100644
--- a/lib/pcg/include/pcg/side_size_t.dtg.h
+++ b/lib/pcg/include/pcg/side_size_t.dtg.h
@@ -20,7 +20,7 @@
 namespace FlexFlow {
 struct side_size_t {
   side_size_t() = delete;
-  side_size_t(int const &unwrapped);
+  explicit side_size_t(int const &unwrapped);
 
   bool operator==(side_size_t const &) const;
   bool operator!=(side_size_t const &) const;
@@ -34,23 +34,23 @@ struct side_size_t {
 
 namespace std {
 template <>
-struct hash<FlexFlow::side_size_t> {
-  size_t operator()(FlexFlow::side_size_t const &) const;
+struct hash<::FlexFlow::side_size_t> {
+  size_t operator()(::FlexFlow::side_size_t const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::side_size_t> {
-  static FlexFlow::side_size_t from_json(json const &);
-  static void to_json(json &, FlexFlow::side_size_t const &);
+struct adl_serializer<::FlexFlow::side_size_t> {
+  static ::FlexFlow::side_size_t from_json(json const &);
+  static void to_json(json &, ::FlexFlow::side_size_t const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::side_size_t> {
-  static Gen<FlexFlow::side_size_t> arbitrary();
+struct Arbitrary<::FlexFlow::side_size_t> {
+  static Gen<::FlexFlow::side_size_t> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/pcg/include/pcg/strided_rectangle.dtg.h b/lib/pcg/include/pcg/strided_rectangle.dtg.h
index df6a16a0ad..932c139f91 100644
--- a/lib/pcg/include/pcg/strided_rectangle.dtg.h
+++ b/lib/pcg/include/pcg/strided_rectangle.dtg.h
@@ -22,7 +22,7 @@
 namespace FlexFlow {
 struct StridedRectangle {
   StridedRectangle() = delete;
-  StridedRectangle(
+  explicit StridedRectangle(
       ::FlexFlow::FFOrdered<::FlexFlow::StridedRectangleSide> const &sides);
 
   bool operator==(StridedRectangle const &) const;
@@ -37,23 +37,23 @@ struct StridedRectangle {
 
 namespace std {
 template <>
-struct hash<FlexFlow::StridedRectangle> {
-  size_t operator()(FlexFlow::StridedRectangle const &) const;
+struct hash<::FlexFlow::StridedRectangle> {
+  size_t operator()(::FlexFlow::StridedRectangle const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::StridedRectangle> {
-  static FlexFlow::StridedRectangle from_json(json const &);
-  static void to_json(json &, FlexFlow::StridedRectangle const &);
+struct adl_serializer<::FlexFlow::StridedRectangle> {
+  static ::FlexFlow::StridedRectangle from_json(json const &);
+  static void to_json(json &, ::FlexFlow::StridedRectangle const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::StridedRectangle> {
-  static Gen<FlexFlow::StridedRectangle> arbitrary();
+struct Arbitrary<::FlexFlow::StridedRectangle> {
+  static Gen<::FlexFlow::StridedRectangle> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/pcg/include/pcg/strided_rectangle_side.dtg.h b/lib/pcg/include/pcg/strided_rectangle_side.dtg.h
index 3e4365c24d..9b9347a7aa 100644
--- a/lib/pcg/include/pcg/strided_rectangle_side.dtg.h
+++ b/lib/pcg/include/pcg/strided_rectangle_side.dtg.h
@@ -21,8 +21,8 @@
 namespace FlexFlow {
 struct StridedRectangleSide {
   StridedRectangleSide() = delete;
-  StridedRectangleSide(::FlexFlow::num_points_t const &num_points,
-                       int const &stride);
+  explicit StridedRectangleSide(::FlexFlow::num_points_t const &num_points,
+                                int const &stride);
 
   bool operator==(StridedRectangleSide const &) const;
   bool operator!=(StridedRectangleSide const &) const;
@@ -37,23 +37,23 @@ struct StridedRectangleSide {
 
 namespace std {
 template <>
-struct hash<FlexFlow::StridedRectangleSide> {
-  size_t operator()(FlexFlow::StridedRectangleSide const &) const;
+struct hash<::FlexFlow::StridedRectangleSide> {
+  size_t operator()(::FlexFlow::StridedRectangleSide const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::StridedRectangleSide> {
-  static FlexFlow::StridedRectangleSide from_json(json const &);
-  static void to_json(json &, FlexFlow::StridedRectangleSide const &);
+struct adl_serializer<::FlexFlow::StridedRectangleSide> {
+  static ::FlexFlow::StridedRectangleSide from_json(json const &);
+  static void to_json(json &, ::FlexFlow::StridedRectangleSide const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::StridedRectangleSide> {
-  static Gen<FlexFlow::StridedRectangleSide> arbitrary();
+struct Arbitrary<::FlexFlow::StridedRectangleSide> {
+  static Gen<::FlexFlow::StridedRectangleSide> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/pcg/include/pcg/tensor_attrs.dtg.h b/lib/pcg/include/pcg/tensor_attrs.dtg.h
index 8bc9d3ce9d..38b18c9885 100644
--- a/lib/pcg/include/pcg/tensor_attrs.dtg.h
+++ b/lib/pcg/include/pcg/tensor_attrs.dtg.h
@@ -23,10 +23,11 @@
 namespace FlexFlow {
 struct TensorAttrs {
   TensorAttrs() = delete;
-  TensorAttrs(::FlexFlow::TensorShape const &shape,
-              std::optional<::FlexFlow::InitializerAttrs> const &initializer,
-              bool const &create_gradients,
-              std::optional<::FlexFlow::ParamSync> const &sync_type);
+  explicit TensorAttrs(
+      ::FlexFlow::TensorShape const &shape,
+      std::optional<::FlexFlow::InitializerAttrs> const &initializer,
+      bool const &create_gradients,
+      std::optional<::FlexFlow::ParamSync> const &sync_type);
 
   bool operator==(TensorAttrs const &) const;
   bool operator!=(TensorAttrs const &) const;
@@ -43,16 +44,16 @@ struct TensorAttrs {
 
 namespace std {
 template <>
-struct hash<FlexFlow::TensorAttrs> {
-  size_t operator()(FlexFlow::TensorAttrs const &) const;
+struct hash<::FlexFlow::TensorAttrs> {
+  size_t operator()(::FlexFlow::TensorAttrs const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::TensorAttrs> {
-  static FlexFlow::TensorAttrs from_json(json const &);
-  static void to_json(json &, FlexFlow::TensorAttrs const &);
+struct adl_serializer<::FlexFlow::TensorAttrs> {
+  static ::FlexFlow::TensorAttrs from_json(json const &);
+  static void to_json(json &, ::FlexFlow::TensorAttrs const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/pcg/include/pcg/tensor_guid_t.dtg.h b/lib/pcg/include/pcg/tensor_guid_t.dtg.h
index c6109c6103..3026c2169e 100644
--- a/lib/pcg/include/pcg/tensor_guid_t.dtg.h
+++ b/lib/pcg/include/pcg/tensor_guid_t.dtg.h
@@ -3,7 +3,7 @@
 // lib/pcg/include/pcg/tensor_guid_t.struct.toml
 /* proj-data
 {
-  "generated_from": "dc15fcbb876ec70509dfa8b662963bc3"
+  "generated_from": "1e3914b97a465f1752ce510614145b37"
 }
 */
 
@@ -19,7 +19,7 @@
 namespace FlexFlow {
 struct tensor_guid_t {
   tensor_guid_t() = delete;
-  tensor_guid_t(::FlexFlow::MultiDiOutput const &raw_graph_output);
+  explicit tensor_guid_t(::FlexFlow::MultiDiOutput const &raw_graph_output);
 
   bool operator==(tensor_guid_t const &) const;
   bool operator!=(tensor_guid_t const &) const;
@@ -33,8 +33,8 @@ struct tensor_guid_t {
 
 namespace std {
 template <>
-struct hash<FlexFlow::tensor_guid_t> {
-  size_t operator()(FlexFlow::tensor_guid_t const &) const;
+struct hash<::FlexFlow::tensor_guid_t> {
+  size_t operator()(::FlexFlow::tensor_guid_t const &) const;
 };
 } // namespace std
 
diff --git a/lib/pcg/include/pcg/tensor_guid_t.struct.toml b/lib/pcg/include/pcg/tensor_guid_t.struct.toml
index aea4fad108..795c0166eb 100644
--- a/lib/pcg/include/pcg/tensor_guid_t.struct.toml
+++ b/lib/pcg/include/pcg/tensor_guid_t.struct.toml
@@ -4,8 +4,6 @@ features = [
   "eq",
   "ord",
   "hash",
-  # "json",
-  # "rapidcheck",
   "fmt",
 ]
 
diff --git a/lib/pcg/src/file_format/v1/graphs.cc b/lib/pcg/src/file_format/v1/graphs.cc
index eabd266e25..8317c9ec6e 100644
--- a/lib/pcg/src/file_format/v1/graphs.cc
+++ b/lib/pcg/src/file_format/v1/graphs.cc
@@ -1,5 +1,5 @@
 #include "pcg/file_format/v1/graphs.h"
-#include "pcg/dataflow_graph.h"
+#include "pcg/dataflow_graph/dataflow_graph.h"
 #include "pcg/file_format/v1/graphs/v1_multidigraph.h"
 #include "pcg/file_format/v1/graphs/v1_operator_graph.dtg.h"
 #include "utils/graph/algorithms.h"
@@ -30,10 +30,10 @@ static V1MultiDiGraph to_v1(MultiDiGraphView const &g,
                             bidict<NodePort, size_t> const &node_ports) {
   std::unordered_set<V1GraphEdge> edges;
   for (MultiDiEdge const &e : get_edges(g)) {
-    edges.insert({nodes.at_l(e.src),
-                  node_ports.at_l(e.src_idx),
-                  nodes.at_l(e.dst),
-                  node_ports.at_l(e.dst_idx)});
+    edges.insert(V1GraphEdge{nodes.at_l(e.src),
+                             node_ports.at_l(e.src_idx),
+                             nodes.at_l(e.dst),
+                             node_ports.at_l(e.dst_idx)});
   }
 
   return V1MultiDiGraph{
@@ -107,7 +107,8 @@ static V1JsonableGraph<NodeLabel, OutputLabel>
   std::unordered_map<size_t, OutputLabel> output_labels = map_values(
       outputs_bidict, [&](MultiDiOutput const &o) { return g.at(o); });
 
-  return {node_labels, outputs, output_labels, unlabelled};
+  return V1JsonableGraph<NodeLabel, OutputLabel>{
+      node_labels, outputs, output_labels, unlabelled};
 }
 
 template <typename NodeLabel, typename OutputLabel>
@@ -129,7 +130,8 @@ static V1JsonableGraph<NodeLabel, OutputLabel>
   std::unordered_map<size_t, OutputLabel> output_labels = map_values(
       outputs_bidict, [&](MultiDiOutput const &o) { return g.at(o); });
 
-  return {node_labels, outputs, output_labels, unlabelled};
+  return V1JsonableGraph<NodeLabel, OutputLabel>{
+      node_labels, outputs, output_labels, unlabelled};
 }
 
 V1ComputationGraph to_v1(ComputationGraph const &g) {
diff --git a/lib/pcg/src/pcg/computation_graph.dtg.cc b/lib/pcg/src/pcg/computation_graph.dtg.cc
index bb6233a910..799cf55908 100644
--- a/lib/pcg/src/pcg/computation_graph.dtg.cc
+++ b/lib/pcg/src/pcg/computation_graph.dtg.cc
@@ -3,13 +3,13 @@
 // lib/pcg/include/pcg/computation_graph.struct.toml
 /* proj-data
 {
-  "generated_from": "8f1f0e13d75065944f7fe307e12fe280"
+  "generated_from": "bf8996bea2e022265a372d692c2db8ed"
 }
 */
 
 #include "pcg/computation_graph.dtg.h"
 
-#include "pcg/dataflow_graph.h"
+#include "pcg/dataflow_graph/dataflow_graph.h"
 #include "pcg/layer_attrs.dtg.h"
 #include "pcg/tensor_attrs.dtg.h"
 
diff --git a/lib/pcg/src/pcg/computation_graph/layer_added_result.dtg.cc b/lib/pcg/src/pcg/computation_graph/layer_added_result.dtg.cc
index 18b394f6d0..1d00b4f32e 100644
--- a/lib/pcg/src/pcg/computation_graph/layer_added_result.dtg.cc
+++ b/lib/pcg/src/pcg/computation_graph/layer_added_result.dtg.cc
@@ -3,7 +3,7 @@
 // lib/pcg/include/pcg/computation_graph/layer_added_result.struct.toml
 /* proj-data
 {
-  "generated_from": "15bf9d73ef934599c9b11807d86ae5d4"
+  "generated_from": "234b5c222ae4ce1da36194b4eb519145"
 }
 */
 
@@ -11,6 +11,7 @@
 
 #include "pcg/layer_guid_t.dtg.h"
 #include "pcg/tensor_guid_t.dtg.h"
+#include "utils/fmt/vector.h"
 #include <sstream>
 
 namespace FlexFlow {
diff --git a/lib/pcg/src/pcg/computation_graph_builder.cc b/lib/pcg/src/pcg/computation_graph_builder.cc
index 8ab85838fc..d3dcf79ca6 100644
--- a/lib/pcg/src/pcg/computation_graph_builder.cc
+++ b/lib/pcg/src/pcg/computation_graph_builder.cc
@@ -23,7 +23,8 @@ TensorShape ComputationGraphBuilder::get_shape(tensor_guid_t const &t) const {
 
 tensor_guid_t ComputationGraphBuilder::create_tensor(TensorShape const &shape,
                                                      bool create_grad) {
-  TensorAttrs tensor_attrs = {shape, std::nullopt, create_grad, std::nullopt};
+  TensorAttrs tensor_attrs =
+      TensorAttrs{shape, std::nullopt, create_grad, std::nullopt};
   LayerAttrs layer_attrs = LayerAttrs{
       ComputationGraphOpAttrs{InputAttrs{}},
       std::nullopt,
@@ -52,16 +53,20 @@ std::vector<tensor_guid_t> ComputationGraphBuilder::add_layer(
     };
     std::vector<MultiDiOutput> weight_layer_inputs = {};
     std::vector<TensorAttrs> weight_output_attrs = {weight_tensor_attrs};
-    raw_weight_tensors.push_back(
-        get_only(this->computation_graph.raw_graph.add_operator(
-            weight_layer_attrs, weight_layer_inputs, weight_output_attrs)));
+    raw_weight_tensors.push_back(get_only(this->computation_graph.raw_graph
+                                              .add_operator(weight_layer_attrs,
+                                                            weight_layer_inputs,
+                                                            weight_output_attrs)
+                                              .outputs));
   }
 
   std::vector<MultiDiOutput> raw_inputs = transform(
       inputs, [](tensor_guid_t const &t) { return t.raw_graph_output; });
   std::vector<MultiDiOutput> raw_outputs =
-      this->computation_graph.raw_graph.add_operator(
-          layer, concat_vectors(raw_inputs, raw_weight_tensors), outputs);
+      this->computation_graph.raw_graph
+          .add_operator(
+              layer, concat_vectors(raw_inputs, raw_weight_tensors), outputs)
+          .outputs;
   return transform(raw_outputs,
                    [](MultiDiOutput const &o) { return tensor_guid_t{o}; });
 }
@@ -133,9 +138,13 @@ static std::string get_default_name(ComputationGraphOpAttrs const &attrs) {
 }
 
 tensor_guid_t ComputationGraphBuilder::element_unary(
-    ElementUnaryAttrs const &attrs,
+    OperatorType op_type,
     tensor_guid_t const &x,
+    std::optional<float> scalar,
     std::optional<std::string> const &maybe_name) {
+
+  ElementUnaryAttrs attrs = ElementUnaryAttrs{op_type, scalar};
+
   std::string name =
       maybe_name.value_or(get_default_name(ComputationGraphOpAttrs{attrs}));
 
@@ -150,15 +159,6 @@ tensor_guid_t ComputationGraphBuilder::element_unary(
   return this->add_layer(layer, {input}, {}, output_shape);
 }
 
-tensor_guid_t ComputationGraphBuilder::element_unary(
-    OperatorType op_type,
-    tensor_guid_t const &input,
-    std::optional<float> scalar,
-    std::optional<std::string> const &name) {
-  ElementUnaryAttrs attrs = {op_type, scalar};
-  return this->element_unary(attrs, input, name);
-}
-
 tensor_guid_t ComputationGraphBuilder::element_binary(
     OperatorType op_type,
     tensor_guid_t const &lhs,
@@ -177,9 +177,10 @@ tensor_guid_t ComputationGraphBuilder::element_binary(
                                           compute_type,
                                           name + "_inputr_pre_cast");
 
-  ElementBinaryAttrs attrs = {op_type, compute_type, false, false};
+  ElementBinaryAttrs attrs =
+      ElementBinaryAttrs{op_type, compute_type, false, false};
 
-  LayerAttrs layer = {ComputationGraphOpAttrs{attrs}, name};
+  LayerAttrs layer = LayerAttrs{ComputationGraphOpAttrs{attrs}, name};
 
   TensorShape output_shape = throw_if_unexpected(get_output_shape(
       attrs, this->get_shape(lhs_input), this->get_shape(rhs_input)));
@@ -348,16 +349,16 @@ tensor_guid_t ComputationGraphBuilder::conv2d(
     std::optional<InitializerAttrs> const &bias_initializer,
     std::optional<RegularizerAttrs> const &kernel_regularizer,
     std::optional<std::string> const &maybe_name) {
-  Conv2DAttrs attrs = {outChannels,
-                       kernelH,
-                       kernelW,
-                       strideH,
-                       strideW,
-                       paddingH,
-                       paddingW,
-                       groups,
-                       activation,
-                       use_bias};
+  Conv2DAttrs attrs = Conv2DAttrs{outChannels,
+                                  kernelH,
+                                  kernelW,
+                                  strideH,
+                                  strideW,
+                                  paddingH,
+                                  paddingW,
+                                  groups,
+                                  activation,
+                                  use_bias};
 
   std::string name =
       maybe_name.value_or(get_default_name(ComputationGraphOpAttrs{attrs}));
@@ -365,7 +366,7 @@ tensor_guid_t ComputationGraphBuilder::conv2d(
   tensor_guid_t input =
       this->as_type(x, DataType::FLOAT, name + "input_pre_cast");
 
-  LayerAttrs layer = {ComputationGraphOpAttrs{attrs}, name};
+  LayerAttrs layer = LayerAttrs{ComputationGraphOpAttrs{attrs}, name};
 
   TensorShape input_shape = this->get_shape(input);
   TensorShape output_shape = get_output_shape(attrs, input_shape);
@@ -388,11 +389,11 @@ tensor_guid_t ComputationGraphBuilder::dropout(
     float rate,
     unsigned long long seed,
     std::optional<std::string> const &maybe_name) {
-  DropoutAttrs attrs = {rate, seed};
+  DropoutAttrs attrs = DropoutAttrs{rate, seed};
   std::string name =
       maybe_name.value_or(get_default_name(ComputationGraphOpAttrs{attrs}));
 
-  LayerAttrs layer = {ComputationGraphOpAttrs{attrs}, name};
+  LayerAttrs layer = LayerAttrs{ComputationGraphOpAttrs{attrs}, name};
   tensor_guid_t input =
       this->as_type(x, DataType::FLOAT, name + "input_pre_cast");
 
@@ -409,11 +410,11 @@ tensor_guid_t ComputationGraphBuilder::embedding(
     DataType dtype,
     std::optional<InitializerAttrs> const &kernel_initializer,
     std::optional<std::string> const &maybe_name) {
-  EmbeddingAttrs attrs = {num_entries, outDim, aggr, dtype};
+  EmbeddingAttrs attrs = EmbeddingAttrs{num_entries, outDim, aggr, dtype};
   std::string name =
       maybe_name.value_or(get_default_name(ComputationGraphOpAttrs{attrs}));
 
-  LayerAttrs layer = {ComputationGraphOpAttrs{attrs}, name};
+  LayerAttrs layer = LayerAttrs{ComputationGraphOpAttrs{attrs}, name};
   tensor_guid_t input =
       this->as_type(x, DataType::FLOAT, name + "input_pre_cast");
 
@@ -434,11 +435,11 @@ std::vector<tensor_guid_t> ComputationGraphBuilder::gather(
     tensor_guid_t const &index,
     ff_dim_t dim,
     std::optional<std::string> const &maybe_name) {
-  GatherAttrs attrs = {dim};
+  GatherAttrs attrs = GatherAttrs{dim};
   std::string name =
       maybe_name.value_or(get_default_name(ComputationGraphOpAttrs{attrs}));
 
-  LayerAttrs layer = {ComputationGraphOpAttrs{attrs}, name};
+  LayerAttrs layer = LayerAttrs{ComputationGraphOpAttrs{attrs}, name};
   if (this->get_shape(index).data_type != DataType::INT32 &&
       this->get_shape(index).data_type != DataType::INT64) {
     throw mk_runtime_error("Invalid data type for input tensor 2 for Gather: "
@@ -494,7 +495,7 @@ tensor_guid_t ComputationGraphBuilder::batch_norm(
   std::string name =
       maybe_name.value_or(get_default_name(ComputationGraphOpAttrs{attrs}));
 
-  LayerAttrs layer = {ComputationGraphOpAttrs{attrs}, name};
+  LayerAttrs layer = LayerAttrs{ComputationGraphOpAttrs{attrs}, name};
 
   TensorShape output_shape = get_output_shape(attrs, this->get_shape(input));
 
diff --git a/lib/pcg/src/pcg/cpu_id_t.dtg.cc b/lib/pcg/src/pcg/cpu_id_t.dtg.cc
index f865442eb0..ba8f8cc164 100644
--- a/lib/pcg/src/pcg/cpu_id_t.dtg.cc
+++ b/lib/pcg/src/pcg/cpu_id_t.dtg.cc
@@ -34,7 +34,8 @@ bool cpu_id_t::operator>=(cpu_id_t const &other) const {
 } // namespace FlexFlow
 
 namespace std {
-size_t hash<FlexFlow::cpu_id_t>::operator()(FlexFlow::cpu_id_t const &x) const {
+size_t
+    hash<FlexFlow::cpu_id_t>::operator()(::FlexFlow::cpu_id_t const &x) const {
   size_t result = 0;
   result ^= std::hash<int>{}(x.cpu_index) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -43,20 +44,20 @@ size_t hash<FlexFlow::cpu_id_t>::operator()(FlexFlow::cpu_id_t const &x) const {
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::cpu_id_t
-    adl_serializer<FlexFlow::cpu_id_t>::from_json(json const &j) {
-  return {j.at("cpu_index").template get<int>()};
+::FlexFlow::cpu_id_t
+    adl_serializer<::FlexFlow::cpu_id_t>::from_json(json const &j) {
+  return ::FlexFlow::cpu_id_t{j.at("cpu_index").template get<int>()};
 }
-void adl_serializer<FlexFlow::cpu_id_t>::to_json(json &j,
-                                                 FlexFlow::cpu_id_t const &v) {
+void adl_serializer<::FlexFlow::cpu_id_t>::to_json(
+    json &j, ::FlexFlow::cpu_id_t const &v) {
   j["__type"] = "cpu_id_t";
   j["cpu_index"] = v.cpu_index;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::cpu_id_t> Arbitrary<FlexFlow::cpu_id_t>::arbitrary() {
-  return gen::construct<FlexFlow::cpu_id_t>(gen::arbitrary<int>());
+Gen<::FlexFlow::cpu_id_t> Arbitrary<::FlexFlow::cpu_id_t>::arbitrary() {
+  return gen::construct<::FlexFlow::cpu_id_t>(gen::arbitrary<int>());
 }
 } // namespace rc
 
diff --git a/lib/pcg/src/pcg/dataflow_graph/algorithms.cc b/lib/pcg/src/pcg/dataflow_graph/algorithms.cc
new file mode 100644
index 0000000000..3ef04c95a3
--- /dev/null
+++ b/lib/pcg/src/pcg/dataflow_graph/algorithms.cc
@@ -0,0 +1 @@
+#include "pcg/dataflow_graph/algorithms.h"
diff --git a/lib/pcg/src/pcg/dataflow_graph/operator_added_result.dtg.cc b/lib/pcg/src/pcg/dataflow_graph/operator_added_result.dtg.cc
new file mode 100644
index 0000000000..d4b926c0a6
--- /dev/null
+++ b/lib/pcg/src/pcg/dataflow_graph/operator_added_result.dtg.cc
@@ -0,0 +1,60 @@
+// THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
+// If you would like to modify this datatype, instead modify
+// lib/pcg/include/pcg/dataflow_graph/operator_added_result.struct.toml
+/* proj-data
+{
+  "generated_from": "62224733c501773b41f1fc63a8677949"
+}
+*/
+
+#include "pcg/dataflow_graph/operator_added_result.dtg.h"
+
+#include "utils/fmt/vector.h"
+#include "utils/graph.h"
+#include <sstream>
+#include <vector>
+
+namespace FlexFlow {
+OperatorAddedResult::OperatorAddedResult(
+    ::FlexFlow::Node const &node,
+    std::vector<::FlexFlow::MultiDiOutput> const &outputs)
+    : node(node), outputs(outputs) {}
+bool OperatorAddedResult::operator==(OperatorAddedResult const &other) const {
+  return std::tie(this->node, this->outputs) ==
+         std::tie(other.node, other.outputs);
+}
+bool OperatorAddedResult::operator!=(OperatorAddedResult const &other) const {
+  return std::tie(this->node, this->outputs) !=
+         std::tie(other.node, other.outputs);
+}
+bool OperatorAddedResult::operator<(OperatorAddedResult const &other) const {
+  return std::tie(this->node, this->outputs) <
+         std::tie(other.node, other.outputs);
+}
+bool OperatorAddedResult::operator>(OperatorAddedResult const &other) const {
+  return std::tie(this->node, this->outputs) >
+         std::tie(other.node, other.outputs);
+}
+bool OperatorAddedResult::operator<=(OperatorAddedResult const &other) const {
+  return std::tie(this->node, this->outputs) <=
+         std::tie(other.node, other.outputs);
+}
+bool OperatorAddedResult::operator>=(OperatorAddedResult const &other) const {
+  return std::tie(this->node, this->outputs) >=
+         std::tie(other.node, other.outputs);
+}
+} // namespace FlexFlow
+
+namespace FlexFlow {
+std::string format_as(OperatorAddedResult const &x) {
+  std::ostringstream oss;
+  oss << "<OperatorAddedResult";
+  oss << " node=" << x.node;
+  oss << " outputs=" << x.outputs;
+  oss << ">";
+  return oss.str();
+}
+std::ostream &operator<<(std::ostream &s, OperatorAddedResult const &x) {
+  return s << fmt::to_string(x);
+}
+} // namespace FlexFlow
diff --git a/lib/pcg/src/pcg/file_format/v1/graphs/v1_graph_edge.dtg.cc b/lib/pcg/src/pcg/file_format/v1/graphs/v1_graph_edge.dtg.cc
index 713aa941d2..28a0a2e861 100644
--- a/lib/pcg/src/pcg/file_format/v1/graphs/v1_graph_edge.dtg.cc
+++ b/lib/pcg/src/pcg/file_format/v1/graphs/v1_graph_edge.dtg.cc
@@ -45,7 +45,7 @@ bool V1GraphEdge::operator>=(V1GraphEdge const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::V1GraphEdge>::operator()(
-    FlexFlow::V1GraphEdge const &x) const {
+    ::FlexFlow::V1GraphEdge const &x) const {
   size_t result = 0;
   result ^= std::hash<size_t>{}(x.srcNode) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -60,15 +60,15 @@ size_t hash<FlexFlow::V1GraphEdge>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::V1GraphEdge
-    adl_serializer<FlexFlow::V1GraphEdge>::from_json(json const &j) {
-  return {j.at("srcNode").template get<size_t>(),
-          j.at("srcIdx").template get<size_t>(),
-          j.at("dstNode").template get<size_t>(),
-          j.at("dstIdx").template get<size_t>()};
+::FlexFlow::V1GraphEdge
+    adl_serializer<::FlexFlow::V1GraphEdge>::from_json(json const &j) {
+  return ::FlexFlow::V1GraphEdge{j.at("srcNode").template get<size_t>(),
+                                 j.at("srcIdx").template get<size_t>(),
+                                 j.at("dstNode").template get<size_t>(),
+                                 j.at("dstIdx").template get<size_t>()};
 }
-void adl_serializer<FlexFlow::V1GraphEdge>::to_json(
-    json &j, FlexFlow::V1GraphEdge const &v) {
+void adl_serializer<::FlexFlow::V1GraphEdge>::to_json(
+    json &j, ::FlexFlow::V1GraphEdge const &v) {
   j["__type"] = "V1GraphEdge";
   j["srcNode"] = v.srcNode;
   j["srcIdx"] = v.srcIdx;
diff --git a/lib/pcg/src/pcg/file_format/v1/graphs/v1_graph_output.dtg.cc b/lib/pcg/src/pcg/file_format/v1/graphs/v1_graph_output.dtg.cc
index fa0b792a37..f4e2ecf0e1 100644
--- a/lib/pcg/src/pcg/file_format/v1/graphs/v1_graph_output.dtg.cc
+++ b/lib/pcg/src/pcg/file_format/v1/graphs/v1_graph_output.dtg.cc
@@ -42,7 +42,7 @@ bool V1GraphOutput::operator>=(V1GraphOutput const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::V1GraphOutput>::operator()(
-    FlexFlow::V1GraphOutput const &x) const {
+    ::FlexFlow::V1GraphOutput const &x) const {
   size_t result = 0;
   result ^= std::hash<size_t>{}(x.srcNode) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -53,13 +53,13 @@ size_t hash<FlexFlow::V1GraphOutput>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::V1GraphOutput
-    adl_serializer<FlexFlow::V1GraphOutput>::from_json(json const &j) {
-  return {j.at("srcNode").template get<size_t>(),
-          j.at("srcIdx").template get<size_t>()};
+::FlexFlow::V1GraphOutput
+    adl_serializer<::FlexFlow::V1GraphOutput>::from_json(json const &j) {
+  return ::FlexFlow::V1GraphOutput{j.at("srcNode").template get<size_t>(),
+                                   j.at("srcIdx").template get<size_t>()};
 }
-void adl_serializer<FlexFlow::V1GraphOutput>::to_json(
-    json &j, FlexFlow::V1GraphOutput const &v) {
+void adl_serializer<::FlexFlow::V1GraphOutput>::to_json(
+    json &j, ::FlexFlow::V1GraphOutput const &v) {
   j["__type"] = "V1GraphOutput";
   j["srcNode"] = v.srcNode;
   j["srcIdx"] = v.srcIdx;
diff --git a/lib/pcg/src/pcg/file_format/v1/graphs/v1_multidigraph.dtg.cc b/lib/pcg/src/pcg/file_format/v1/graphs/v1_multidigraph.dtg.cc
index 0f5a83b02f..41ad9e4e63 100644
--- a/lib/pcg/src/pcg/file_format/v1/graphs/v1_multidigraph.dtg.cc
+++ b/lib/pcg/src/pcg/file_format/v1/graphs/v1_multidigraph.dtg.cc
@@ -3,14 +3,15 @@
 // lib/pcg/include/pcg/file_format/v1/graphs/v1_multidigraph.struct.toml
 /* proj-data
 {
-  "generated_from": "fb1033385645e54a19c9b44cef0be04b"
+  "generated_from": "582054edb983c3cc31d9273ce29421eb"
 }
 */
 
 #include "pcg/file_format/v1/graphs/v1_multidigraph.dtg.h"
 
 #include "pcg/file_format/v1/graphs/v1_graph_edge.dtg.h"
-#include "utils/fmt.h"
+#include "utils/fmt/unordered_set.h"
+#include "utils/fmt/vector.h"
 #include <sstream>
 #include <unordered_set>
 #include <vector>
@@ -24,15 +25,16 @@ V1MultiDiGraph::V1MultiDiGraph(
 } // namespace FlexFlow
 
 namespace nlohmann {
-FlexFlow::V1MultiDiGraph
-    adl_serializer<FlexFlow::V1MultiDiGraph>::from_json(json const &j) {
-  return {j.at("nodes").template get<std::vector<size_t>>(),
-          j.at("ports").template get<std::vector<size_t>>(),
-          j.at("edges")
-              .template get<std::unordered_set<::FlexFlow::V1GraphEdge>>()};
+::FlexFlow::V1MultiDiGraph
+    adl_serializer<::FlexFlow::V1MultiDiGraph>::from_json(json const &j) {
+  return ::FlexFlow::V1MultiDiGraph{
+      j.at("nodes").template get<std::vector<size_t>>(),
+      j.at("ports").template get<std::vector<size_t>>(),
+      j.at("edges")
+          .template get<std::unordered_set<::FlexFlow::V1GraphEdge>>()};
 }
-void adl_serializer<FlexFlow::V1MultiDiGraph>::to_json(
-    json &j, FlexFlow::V1MultiDiGraph const &v) {
+void adl_serializer<::FlexFlow::V1MultiDiGraph>::to_json(
+    json &j, ::FlexFlow::V1MultiDiGraph const &v) {
   j["__type"] = "V1MultiDiGraph";
   j["nodes"] = v.nodes;
   j["ports"] = v.ports;
diff --git a/lib/pcg/src/pcg/file_format/v1/graphs/v1_operator_graph.dtg.cc b/lib/pcg/src/pcg/file_format/v1/graphs/v1_operator_graph.dtg.cc
index 19f1e09d07..4c908ae2f1 100644
--- a/lib/pcg/src/pcg/file_format/v1/graphs/v1_operator_graph.dtg.cc
+++ b/lib/pcg/src/pcg/file_format/v1/graphs/v1_operator_graph.dtg.cc
@@ -3,14 +3,15 @@
 // lib/pcg/include/pcg/file_format/v1/graphs/v1_operator_graph.struct.toml
 /* proj-data
 {
-  "generated_from": "5bfd7d8755cfd8cd9dbf57d5c367038e"
+  "generated_from": "fed215ca219af1bd375801eb2e33b473"
 }
 */
 
 #include "pcg/file_format/v1/graphs/v1_operator_graph.dtg.h"
 
 #include "pcg/file_format/v1/graphs/v1_graph_edge.dtg.h"
-#include "utils/fmt.h"
+#include "utils/fmt/unordered_set.h"
+#include "utils/fmt/vector.h"
 #include <sstream>
 #include <unordered_set>
 #include <vector>
@@ -23,14 +24,15 @@ V1OperatorGraph::V1OperatorGraph(
 } // namespace FlexFlow
 
 namespace nlohmann {
-FlexFlow::V1OperatorGraph
-    adl_serializer<FlexFlow::V1OperatorGraph>::from_json(json const &j) {
-  return {j.at("nodes").template get<std::vector<size_t>>(),
-          j.at("edges")
-              .template get<std::unordered_set<::FlexFlow::V1GraphEdge>>()};
+::FlexFlow::V1OperatorGraph
+    adl_serializer<::FlexFlow::V1OperatorGraph>::from_json(json const &j) {
+  return ::FlexFlow::V1OperatorGraph{
+      j.at("nodes").template get<std::vector<size_t>>(),
+      j.at("edges")
+          .template get<std::unordered_set<::FlexFlow::V1GraphEdge>>()};
 }
-void adl_serializer<FlexFlow::V1OperatorGraph>::to_json(
-    json &j, FlexFlow::V1OperatorGraph const &v) {
+void adl_serializer<::FlexFlow::V1OperatorGraph>::to_json(
+    json &j, ::FlexFlow::V1OperatorGraph const &v) {
   j["__type"] = "V1OperatorGraph";
   j["nodes"] = v.nodes;
   j["edges"] = v.edges;
diff --git a/lib/pcg/src/pcg/gpu_id_t.dtg.cc b/lib/pcg/src/pcg/gpu_id_t.dtg.cc
index e2385a83ce..f82e5c747e 100644
--- a/lib/pcg/src/pcg/gpu_id_t.dtg.cc
+++ b/lib/pcg/src/pcg/gpu_id_t.dtg.cc
@@ -34,7 +34,8 @@ bool gpu_id_t::operator>=(gpu_id_t const &other) const {
 } // namespace FlexFlow
 
 namespace std {
-size_t hash<FlexFlow::gpu_id_t>::operator()(FlexFlow::gpu_id_t const &x) const {
+size_t
+    hash<FlexFlow::gpu_id_t>::operator()(::FlexFlow::gpu_id_t const &x) const {
   size_t result = 0;
   result ^= std::hash<int>{}(x.gpu_index) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -43,20 +44,20 @@ size_t hash<FlexFlow::gpu_id_t>::operator()(FlexFlow::gpu_id_t const &x) const {
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::gpu_id_t
-    adl_serializer<FlexFlow::gpu_id_t>::from_json(json const &j) {
-  return {j.at("gpu_index").template get<int>()};
+::FlexFlow::gpu_id_t
+    adl_serializer<::FlexFlow::gpu_id_t>::from_json(json const &j) {
+  return ::FlexFlow::gpu_id_t{j.at("gpu_index").template get<int>()};
 }
-void adl_serializer<FlexFlow::gpu_id_t>::to_json(json &j,
-                                                 FlexFlow::gpu_id_t const &v) {
+void adl_serializer<::FlexFlow::gpu_id_t>::to_json(
+    json &j, ::FlexFlow::gpu_id_t const &v) {
   j["__type"] = "gpu_id_t";
   j["gpu_index"] = v.gpu_index;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::gpu_id_t> Arbitrary<FlexFlow::gpu_id_t>::arbitrary() {
-  return gen::construct<FlexFlow::gpu_id_t>(gen::arbitrary<int>());
+Gen<::FlexFlow::gpu_id_t> Arbitrary<::FlexFlow::gpu_id_t>::arbitrary() {
+  return gen::construct<::FlexFlow::gpu_id_t>(gen::arbitrary<int>());
 }
 } // namespace rc
 
diff --git a/lib/pcg/src/pcg/initializer_attrs.dtg.cc b/lib/pcg/src/pcg/initializer_attrs.dtg.cc
index 2a4e97db1e..44e1135869 100644
--- a/lib/pcg/src/pcg/initializer_attrs.dtg.cc
+++ b/lib/pcg/src/pcg/initializer_attrs.dtg.cc
@@ -3,7 +3,7 @@
 // lib/pcg/include/pcg/initializer_attrs.variant.toml
 /* proj-data
 {
-  "generated_from": "f66f3a89ea937e96a058d83ab52e2826"
+  "generated_from": "f4d932a4a7728ebfc28a23f2e6ca3201"
 }
 */
 
@@ -114,6 +114,22 @@ void adl_serializer<::FlexFlow::InitializerAttrs>::to_json(
   }
 }
 } // namespace nlohmann
+namespace rc {
+Gen<::FlexFlow::InitializerAttrs>
+    Arbitrary<::FlexFlow::InitializerAttrs>::arbitrary() {
+  return gen::oneOf(
+      gen::construct<::FlexFlow::InitializerAttrs>(
+          gen::arbitrary<::FlexFlow::GlorotUniformAttrs>()),
+      gen::construct<::FlexFlow::InitializerAttrs>(
+          gen::arbitrary<::FlexFlow::ZeroInitializerAttrs>()),
+      gen::construct<::FlexFlow::InitializerAttrs>(
+          gen::arbitrary<::FlexFlow::UniformInitializerAttrs>()),
+      gen::construct<::FlexFlow::InitializerAttrs>(
+          gen::arbitrary<::FlexFlow::NormInitializerAttrs>()),
+      gen::construct<::FlexFlow::InitializerAttrs>(
+          gen::arbitrary<::FlexFlow::ConstantInitializerAttrs>()));
+}
+} // namespace rc
 namespace FlexFlow {
 std::string format_as(::FlexFlow::InitializerAttrs const &x) {
   std::ostringstream oss;
diff --git a/lib/pcg/src/pcg/initializers/constant_initializer_attrs.dtg.cc b/lib/pcg/src/pcg/initializers/constant_initializer_attrs.dtg.cc
index 9770c35248..6c1ae1dfac 100644
--- a/lib/pcg/src/pcg/initializers/constant_initializer_attrs.dtg.cc
+++ b/lib/pcg/src/pcg/initializers/constant_initializer_attrs.dtg.cc
@@ -3,7 +3,7 @@
 // lib/pcg/include/pcg/initializers/constant_initializer_attrs.struct.toml
 /* proj-data
 {
-  "generated_from": "0162b9c49fe6cbfc65410c6fa8dec427"
+  "generated_from": "4ffc8ccd7dfdb7674556487433ea9913"
 }
 */
 
@@ -45,7 +45,7 @@ bool ConstantInitializerAttrs::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::ConstantInitializerAttrs>::operator()(
-    FlexFlow::ConstantInitializerAttrs const &x) const {
+    ::FlexFlow::ConstantInitializerAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::DataTypeValue>{}(x.value) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -54,18 +54,27 @@ size_t hash<FlexFlow::ConstantInitializerAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ConstantInitializerAttrs
-    adl_serializer<FlexFlow::ConstantInitializerAttrs>::from_json(
+::FlexFlow::ConstantInitializerAttrs
+    adl_serializer<::FlexFlow::ConstantInitializerAttrs>::from_json(
         json const &j) {
-  return {j.at("value").template get<::FlexFlow::DataTypeValue>()};
+  return ::FlexFlow::ConstantInitializerAttrs{
+      j.at("value").template get<::FlexFlow::DataTypeValue>()};
 }
-void adl_serializer<FlexFlow::ConstantInitializerAttrs>::to_json(
-    json &j, FlexFlow::ConstantInitializerAttrs const &v) {
+void adl_serializer<::FlexFlow::ConstantInitializerAttrs>::to_json(
+    json &j, ::FlexFlow::ConstantInitializerAttrs const &v) {
   j["__type"] = "ConstantInitializerAttrs";
   j["value"] = v.value;
 }
 } // namespace nlohmann
 
+namespace rc {
+Gen<::FlexFlow::ConstantInitializerAttrs>
+    Arbitrary<::FlexFlow::ConstantInitializerAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::ConstantInitializerAttrs>(
+      gen::arbitrary<::FlexFlow::DataTypeValue>());
+}
+} // namespace rc
+
 namespace FlexFlow {
 std::string format_as(ConstantInitializerAttrs const &x) {
   std::ostringstream oss;
diff --git a/lib/pcg/src/pcg/initializers/glorot_uniform_attrs.dtg.cc b/lib/pcg/src/pcg/initializers/glorot_uniform_attrs.dtg.cc
index 0c8ae6e60c..cf2164ed97 100644
--- a/lib/pcg/src/pcg/initializers/glorot_uniform_attrs.dtg.cc
+++ b/lib/pcg/src/pcg/initializers/glorot_uniform_attrs.dtg.cc
@@ -35,7 +35,7 @@ bool GlorotUniformAttrs::operator>=(GlorotUniformAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::GlorotUniformAttrs>::operator()(
-    FlexFlow::GlorotUniformAttrs const &x) const {
+    ::FlexFlow::GlorotUniformAttrs const &x) const {
   size_t result = 0;
   result ^=
       std::hash<int>{}(x.seed) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -44,21 +44,21 @@ size_t hash<FlexFlow::GlorotUniformAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::GlorotUniformAttrs
-    adl_serializer<FlexFlow::GlorotUniformAttrs>::from_json(json const &j) {
-  return {j.at("seed").template get<int>()};
+::FlexFlow::GlorotUniformAttrs
+    adl_serializer<::FlexFlow::GlorotUniformAttrs>::from_json(json const &j) {
+  return ::FlexFlow::GlorotUniformAttrs{j.at("seed").template get<int>()};
 }
-void adl_serializer<FlexFlow::GlorotUniformAttrs>::to_json(
-    json &j, FlexFlow::GlorotUniformAttrs const &v) {
+void adl_serializer<::FlexFlow::GlorotUniformAttrs>::to_json(
+    json &j, ::FlexFlow::GlorotUniformAttrs const &v) {
   j["__type"] = "GlorotUniformAttrs";
   j["seed"] = v.seed;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::GlorotUniformAttrs>
-    Arbitrary<FlexFlow::GlorotUniformAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::GlorotUniformAttrs>(gen::arbitrary<int>());
+Gen<::FlexFlow::GlorotUniformAttrs>
+    Arbitrary<::FlexFlow::GlorotUniformAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::GlorotUniformAttrs>(gen::arbitrary<int>());
 }
 } // namespace rc
 
diff --git a/lib/pcg/src/pcg/initializers/norm_initializer_attrs.dtg.cc b/lib/pcg/src/pcg/initializers/norm_initializer_attrs.dtg.cc
index aceac12212..5d8c2fa02b 100644
--- a/lib/pcg/src/pcg/initializers/norm_initializer_attrs.dtg.cc
+++ b/lib/pcg/src/pcg/initializers/norm_initializer_attrs.dtg.cc
@@ -44,7 +44,7 @@ bool NormInitializerAttrs::operator>=(NormInitializerAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::NormInitializerAttrs>::operator()(
-    FlexFlow::NormInitializerAttrs const &x) const {
+    ::FlexFlow::NormInitializerAttrs const &x) const {
   size_t result = 0;
   result ^=
       std::hash<int>{}(x.seed) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -57,14 +57,14 @@ size_t hash<FlexFlow::NormInitializerAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::NormInitializerAttrs
-    adl_serializer<FlexFlow::NormInitializerAttrs>::from_json(json const &j) {
-  return {j.at("seed").template get<int>(),
-          j.at("mean").template get<float>(),
-          j.at("stddev").template get<float>()};
+::FlexFlow::NormInitializerAttrs
+    adl_serializer<::FlexFlow::NormInitializerAttrs>::from_json(json const &j) {
+  return ::FlexFlow::NormInitializerAttrs{j.at("seed").template get<int>(),
+                                          j.at("mean").template get<float>(),
+                                          j.at("stddev").template get<float>()};
 }
-void adl_serializer<FlexFlow::NormInitializerAttrs>::to_json(
-    json &j, FlexFlow::NormInitializerAttrs const &v) {
+void adl_serializer<::FlexFlow::NormInitializerAttrs>::to_json(
+    json &j, ::FlexFlow::NormInitializerAttrs const &v) {
   j["__type"] = "NormInitializerAttrs";
   j["seed"] = v.seed;
   j["mean"] = v.mean;
@@ -73,9 +73,9 @@ void adl_serializer<FlexFlow::NormInitializerAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::NormInitializerAttrs>
-    Arbitrary<FlexFlow::NormInitializerAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::NormInitializerAttrs>(
+Gen<::FlexFlow::NormInitializerAttrs>
+    Arbitrary<::FlexFlow::NormInitializerAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::NormInitializerAttrs>(
       gen::arbitrary<int>(), gen::arbitrary<float>(), gen::arbitrary<float>());
 }
 } // namespace rc
diff --git a/lib/pcg/src/pcg/initializers/uniform_initializer_attrs.cc b/lib/pcg/src/pcg/initializers/uniform_initializer_attrs.cc
new file mode 100644
index 0000000000..56f13309ac
--- /dev/null
+++ b/lib/pcg/src/pcg/initializers/uniform_initializer_attrs.cc
@@ -0,0 +1,21 @@
+#include "pcg/initializers/uniform_initializer_attrs.h"
+
+namespace rc {
+
+using ::FlexFlow::UniformInitializerAttrs;
+
+Gen<UniformInitializerAttrs> Arbitrary<UniformInitializerAttrs>::arbitrary() {
+  return gen::map<std::tuple<float, float, int>>(
+      [](std::tuple<float, float, int> const &generated) {
+        auto [f1, f2, seed] = generated;
+        float minval = std::min(f1, f2);
+        float maxval = std::max(f1, f2);
+        return ::FlexFlow::UniformInitializerAttrs{
+            seed,
+            minval,
+            maxval,
+        };
+      });
+};
+
+} // namespace rc
diff --git a/lib/pcg/src/pcg/initializers/uniform_initializer_attrs.dtg.cc b/lib/pcg/src/pcg/initializers/uniform_initializer_attrs.dtg.cc
index a9c62675d0..b66544d4b3 100644
--- a/lib/pcg/src/pcg/initializers/uniform_initializer_attrs.dtg.cc
+++ b/lib/pcg/src/pcg/initializers/uniform_initializer_attrs.dtg.cc
@@ -3,7 +3,7 @@
 // lib/pcg/include/pcg/initializers/uniform_initializer_attrs.struct.toml
 /* proj-data
 {
-  "generated_from": "f887e1db5d5dc710793ec5fa99bb7cd4"
+  "generated_from": "dd9cbe65dc4495b031aef40d353db928"
 }
 */
 
@@ -50,7 +50,7 @@ bool UniformInitializerAttrs::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::UniformInitializerAttrs>::operator()(
-    FlexFlow::UniformInitializerAttrs const &x) const {
+    ::FlexFlow::UniformInitializerAttrs const &x) const {
   size_t result = 0;
   result ^=
       std::hash<int>{}(x.seed) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -63,15 +63,16 @@ size_t hash<FlexFlow::UniformInitializerAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::UniformInitializerAttrs
-    adl_serializer<FlexFlow::UniformInitializerAttrs>::from_json(
+::FlexFlow::UniformInitializerAttrs
+    adl_serializer<::FlexFlow::UniformInitializerAttrs>::from_json(
         json const &j) {
-  return {j.at("seed").template get<int>(),
-          j.at("min_val").template get<float>(),
-          j.at("max_val").template get<float>()};
+  return ::FlexFlow::UniformInitializerAttrs{
+      j.at("seed").template get<int>(),
+      j.at("min_val").template get<float>(),
+      j.at("max_val").template get<float>()};
 }
-void adl_serializer<FlexFlow::UniformInitializerAttrs>::to_json(
-    json &j, FlexFlow::UniformInitializerAttrs const &v) {
+void adl_serializer<::FlexFlow::UniformInitializerAttrs>::to_json(
+    json &j, ::FlexFlow::UniformInitializerAttrs const &v) {
   j["__type"] = "UniformInitializerAttrs";
   j["seed"] = v.seed;
   j["min_val"] = v.min_val;
diff --git a/lib/pcg/src/pcg/initializers/zero_initializer_attrs.dtg.cc b/lib/pcg/src/pcg/initializers/zero_initializer_attrs.dtg.cc
index 933501a734..eb88f4c8ff 100644
--- a/lib/pcg/src/pcg/initializers/zero_initializer_attrs.dtg.cc
+++ b/lib/pcg/src/pcg/initializers/zero_initializer_attrs.dtg.cc
@@ -34,27 +34,27 @@ bool ZeroInitializerAttrs::operator>=(ZeroInitializerAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ZeroInitializerAttrs>::operator()(
-    FlexFlow::ZeroInitializerAttrs const &x) const {
+    ::FlexFlow::ZeroInitializerAttrs const &x) const {
   size_t result = 0;
   return result;
 }
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ZeroInitializerAttrs
-    adl_serializer<FlexFlow::ZeroInitializerAttrs>::from_json(json const &j) {
-  return {};
+::FlexFlow::ZeroInitializerAttrs
+    adl_serializer<::FlexFlow::ZeroInitializerAttrs>::from_json(json const &j) {
+  return ::FlexFlow::ZeroInitializerAttrs{};
 }
-void adl_serializer<FlexFlow::ZeroInitializerAttrs>::to_json(
-    json &j, FlexFlow::ZeroInitializerAttrs const &v) {
+void adl_serializer<::FlexFlow::ZeroInitializerAttrs>::to_json(
+    json &j, ::FlexFlow::ZeroInitializerAttrs const &v) {
   j["__type"] = "ZeroInitializerAttrs";
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::ZeroInitializerAttrs>
-    Arbitrary<FlexFlow::ZeroInitializerAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::ZeroInitializerAttrs>();
+Gen<::FlexFlow::ZeroInitializerAttrs>
+    Arbitrary<::FlexFlow::ZeroInitializerAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::ZeroInitializerAttrs>();
 }
 } // namespace rc
 
diff --git a/lib/pcg/src/pcg/layer_attrs.dtg.cc b/lib/pcg/src/pcg/layer_attrs.dtg.cc
index 21c53ad4e8..4497d849e6 100644
--- a/lib/pcg/src/pcg/layer_attrs.dtg.cc
+++ b/lib/pcg/src/pcg/layer_attrs.dtg.cc
@@ -42,7 +42,7 @@ bool LayerAttrs::operator>=(LayerAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::LayerAttrs>::operator()(
-    FlexFlow::LayerAttrs const &x) const {
+    ::FlexFlow::LayerAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::ComputationGraphOpAttrs>{}(x.attrs) +
             0x9e3779b9 + (result << 6) + (result >> 2);
@@ -54,15 +54,15 @@ size_t hash<FlexFlow::LayerAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::LayerAttrs
-    adl_serializer<FlexFlow::LayerAttrs>::from_json(json const &j) {
-  return {
+::FlexFlow::LayerAttrs
+    adl_serializer<::FlexFlow::LayerAttrs>::from_json(json const &j) {
+  return ::FlexFlow::LayerAttrs{
       j.at("attrs").template get<::FlexFlow::ComputationGraphOpAttrs>(),
       j.at("name")
           .template get<std::optional<::FlexFlow::stack_string<MAX_OPNAME>>>()};
 }
-void adl_serializer<FlexFlow::LayerAttrs>::to_json(
-    json &j, FlexFlow::LayerAttrs const &v) {
+void adl_serializer<::FlexFlow::LayerAttrs>::to_json(
+    json &j, ::FlexFlow::LayerAttrs const &v) {
   j["__type"] = "LayerAttrs";
   j["attrs"] = v.attrs;
   j["name"] = v.name;
diff --git a/lib/pcg/src/pcg/layer_guid_t.dtg.cc b/lib/pcg/src/pcg/layer_guid_t.dtg.cc
index 9d92608569..706de4e376 100644
--- a/lib/pcg/src/pcg/layer_guid_t.dtg.cc
+++ b/lib/pcg/src/pcg/layer_guid_t.dtg.cc
@@ -37,7 +37,7 @@ bool layer_guid_t::operator>=(layer_guid_t const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::layer_guid_t>::operator()(
-    FlexFlow::layer_guid_t const &x) const {
+    ::FlexFlow::layer_guid_t const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::Node>{}(x.raw_node) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
diff --git a/lib/pcg/src/pcg/machine_specification.dtg.cc b/lib/pcg/src/pcg/machine_specification.dtg.cc
index 238c61a014..f893b135bb 100644
--- a/lib/pcg/src/pcg/machine_specification.dtg.cc
+++ b/lib/pcg/src/pcg/machine_specification.dtg.cc
@@ -97,7 +97,7 @@ bool MachineSpecification::operator>=(MachineSpecification const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::MachineSpecification>::operator()(
-    FlexFlow::MachineSpecification const &x) const {
+    ::FlexFlow::MachineSpecification const &x) const {
   size_t result = 0;
   result ^= std::hash<int>{}(x.num_nodes) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -114,16 +114,17 @@ size_t hash<FlexFlow::MachineSpecification>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::MachineSpecification
-    adl_serializer<FlexFlow::MachineSpecification>::from_json(json const &j) {
-  return {j.at("num_nodes").template get<int>(),
-          j.at("num_cpus_per_node").template get<int>(),
-          j.at("num_gpus_per_node").template get<int>(),
-          j.at("inter_node_bandwidth").template get<float>(),
-          j.at("intra_node_bandwidth").template get<float>()};
+::FlexFlow::MachineSpecification
+    adl_serializer<::FlexFlow::MachineSpecification>::from_json(json const &j) {
+  return ::FlexFlow::MachineSpecification{
+      j.at("num_nodes").template get<int>(),
+      j.at("num_cpus_per_node").template get<int>(),
+      j.at("num_gpus_per_node").template get<int>(),
+      j.at("inter_node_bandwidth").template get<float>(),
+      j.at("intra_node_bandwidth").template get<float>()};
 }
-void adl_serializer<FlexFlow::MachineSpecification>::to_json(
-    json &j, FlexFlow::MachineSpecification const &v) {
+void adl_serializer<::FlexFlow::MachineSpecification>::to_json(
+    json &j, ::FlexFlow::MachineSpecification const &v) {
   j["__type"] = "MachineSpecification";
   j["num_nodes"] = v.num_nodes;
   j["num_cpus_per_node"] = v.num_cpus_per_node;
diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc
index ff1d34852b..00bf1296fe 100644
--- a/lib/pcg/src/pcg/machine_view.cc
+++ b/lib/pcg/src/pcg/machine_view.cc
@@ -25,18 +25,19 @@ static StridedRectangle make_1d_rect(int start, int stop, int stride) {
   assert(stride > 0);
   StridedRectangleSide side =
       strided_side_from_size_and_stride(side_size_t{stop - start}, stride);
-  StridedRectangle rect = {{side}};
+  StridedRectangle rect =
+      StridedRectangle{std::vector<StridedRectangleSide>{side}};
   return rect;
 }
 
 MachineView make_1d_machine_view(gpu_id_t start, gpu_id_t stop, int stride) {
   StridedRectangle rect = make_1d_rect(start.gpu_index, stop.gpu_index, stride);
-  return {device_id_t{start}, rect};
+  return MachineView{device_id_t{start}, rect};
 }
 
 MachineView make_1d_machine_view(cpu_id_t start, cpu_id_t stop, int stride) {
   StridedRectangle rect = make_1d_rect(start.cpu_index, stop.cpu_index, stride);
-  return {device_id_t{start}, rect};
+  return MachineView{device_id_t{start}, rect};
 }
 
 MachineView make_1d_machine_view(device_id_t start,
diff --git a/lib/pcg/src/pcg/machine_view.dtg.cc b/lib/pcg/src/pcg/machine_view.dtg.cc
index edab125e3d..de577fe409 100644
--- a/lib/pcg/src/pcg/machine_view.dtg.cc
+++ b/lib/pcg/src/pcg/machine_view.dtg.cc
@@ -39,7 +39,7 @@ bool MachineView::operator>=(MachineView const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::MachineView>::operator()(
-    FlexFlow::MachineView const &x) const {
+    ::FlexFlow::MachineView const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::device_id_t>{}(x.start) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -50,13 +50,14 @@ size_t hash<FlexFlow::MachineView>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::MachineView
-    adl_serializer<FlexFlow::MachineView>::from_json(json const &j) {
-  return {j.at("start").template get<::FlexFlow::device_id_t>(),
-          j.at("rect").template get<::FlexFlow::StridedRectangle>()};
+::FlexFlow::MachineView
+    adl_serializer<::FlexFlow::MachineView>::from_json(json const &j) {
+  return ::FlexFlow::MachineView{
+      j.at("start").template get<::FlexFlow::device_id_t>(),
+      j.at("rect").template get<::FlexFlow::StridedRectangle>()};
 }
-void adl_serializer<FlexFlow::MachineView>::to_json(
-    json &j, FlexFlow::MachineView const &v) {
+void adl_serializer<::FlexFlow::MachineView>::to_json(
+    json &j, ::FlexFlow::MachineView const &v) {
   j["__type"] = "MachineView";
   j["start"] = v.start;
   j["rect"] = v.rect;
diff --git a/lib/pcg/src/pcg/num_points_t.dtg.cc b/lib/pcg/src/pcg/num_points_t.dtg.cc
index 7a0a849814..e7c54dcfbe 100644
--- a/lib/pcg/src/pcg/num_points_t.dtg.cc
+++ b/lib/pcg/src/pcg/num_points_t.dtg.cc
@@ -35,7 +35,7 @@ bool num_points_t::operator>=(num_points_t const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::num_points_t>::operator()(
-    FlexFlow::num_points_t const &x) const {
+    ::FlexFlow::num_points_t const &x) const {
   size_t result = 0;
   result ^= std::hash<int>{}(x.unwrapped) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -44,20 +44,20 @@ size_t hash<FlexFlow::num_points_t>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::num_points_t
-    adl_serializer<FlexFlow::num_points_t>::from_json(json const &j) {
-  return {j.at("unwrapped").template get<int>()};
+::FlexFlow::num_points_t
+    adl_serializer<::FlexFlow::num_points_t>::from_json(json const &j) {
+  return ::FlexFlow::num_points_t{j.at("unwrapped").template get<int>()};
 }
-void adl_serializer<FlexFlow::num_points_t>::to_json(
-    json &j, FlexFlow::num_points_t const &v) {
+void adl_serializer<::FlexFlow::num_points_t>::to_json(
+    json &j, ::FlexFlow::num_points_t const &v) {
   j["__type"] = "num_points_t";
   j["unwrapped"] = v.unwrapped;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::num_points_t> Arbitrary<FlexFlow::num_points_t>::arbitrary() {
-  return gen::construct<FlexFlow::num_points_t>(gen::arbitrary<int>());
+Gen<::FlexFlow::num_points_t> Arbitrary<::FlexFlow::num_points_t>::arbitrary() {
+  return gen::construct<::FlexFlow::num_points_t>(gen::arbitrary<int>());
 }
 } // namespace rc
 
diff --git a/lib/pcg/src/pcg/operator_graph/operator_graph_input.dtg.cc b/lib/pcg/src/pcg/operator_graph/operator_graph_input.dtg.cc
index 381c948ad0..7d31197f9d 100644
--- a/lib/pcg/src/pcg/operator_graph/operator_graph_input.dtg.cc
+++ b/lib/pcg/src/pcg/operator_graph/operator_graph_input.dtg.cc
@@ -38,7 +38,7 @@ bool OperatorGraphInput::operator>=(OperatorGraphInput const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::OperatorGraphInput>::operator()(
-    FlexFlow::OperatorGraphInput const &x) const {
+    ::FlexFlow::OperatorGraphInput const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::Node>{}(x.node) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
diff --git a/lib/pcg/src/pcg/operator_graph/operator_graph_output.dtg.cc b/lib/pcg/src/pcg/operator_graph/operator_graph_output.dtg.cc
index 88c23c0c67..2b5a2abbcd 100644
--- a/lib/pcg/src/pcg/operator_graph/operator_graph_output.dtg.cc
+++ b/lib/pcg/src/pcg/operator_graph/operator_graph_output.dtg.cc
@@ -38,7 +38,7 @@ bool OperatorGraphOutput::operator>=(OperatorGraphOutput const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::OperatorGraphOutput>::operator()(
-    FlexFlow::OperatorGraphOutput const &x) const {
+    ::FlexFlow::OperatorGraphOutput const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::Node>{}(x.node) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
diff --git a/lib/pcg/src/pcg/optimizers/adam_optimizer_attrs.dtg.cc b/lib/pcg/src/pcg/optimizers/adam_optimizer_attrs.dtg.cc
index d362459cc3..7ec6876c8b 100644
--- a/lib/pcg/src/pcg/optimizers/adam_optimizer_attrs.dtg.cc
+++ b/lib/pcg/src/pcg/optimizers/adam_optimizer_attrs.dtg.cc
@@ -115,7 +115,7 @@ bool AdamOptimizerAttrs::operator>=(AdamOptimizerAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::AdamOptimizerAttrs>::operator()(
-    FlexFlow::AdamOptimizerAttrs const &x) const {
+    ::FlexFlow::AdamOptimizerAttrs const &x) const {
   size_t result = 0;
   result ^=
       std::hash<double>{}(x.alpha) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -136,18 +136,19 @@ size_t hash<FlexFlow::AdamOptimizerAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::AdamOptimizerAttrs
-    adl_serializer<FlexFlow::AdamOptimizerAttrs>::from_json(json const &j) {
-  return {j.at("alpha").template get<double>(),
-          j.at("beta1").template get<double>(),
-          j.at("beta2").template get<double>(),
-          j.at("weight_decay").template get<double>(),
-          j.at("alpha_t").template get<double>(),
-          j.at("beta_t").template get<double>(),
-          j.at("beta2_t").template get<double>()};
+::FlexFlow::AdamOptimizerAttrs
+    adl_serializer<::FlexFlow::AdamOptimizerAttrs>::from_json(json const &j) {
+  return ::FlexFlow::AdamOptimizerAttrs{
+      j.at("alpha").template get<double>(),
+      j.at("beta1").template get<double>(),
+      j.at("beta2").template get<double>(),
+      j.at("weight_decay").template get<double>(),
+      j.at("alpha_t").template get<double>(),
+      j.at("beta_t").template get<double>(),
+      j.at("beta2_t").template get<double>()};
 }
-void adl_serializer<FlexFlow::AdamOptimizerAttrs>::to_json(
-    json &j, FlexFlow::AdamOptimizerAttrs const &v) {
+void adl_serializer<::FlexFlow::AdamOptimizerAttrs>::to_json(
+    json &j, ::FlexFlow::AdamOptimizerAttrs const &v) {
   j["__type"] = "AdamOptimizerAttrs";
   j["alpha"] = v.alpha;
   j["beta1"] = v.beta1;
@@ -160,15 +161,16 @@ void adl_serializer<FlexFlow::AdamOptimizerAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::AdamOptimizerAttrs>
-    Arbitrary<FlexFlow::AdamOptimizerAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::AdamOptimizerAttrs>(gen::arbitrary<double>(),
-                                                      gen::arbitrary<double>(),
-                                                      gen::arbitrary<double>(),
-                                                      gen::arbitrary<double>(),
-                                                      gen::arbitrary<double>(),
-                                                      gen::arbitrary<double>(),
-                                                      gen::arbitrary<double>());
+Gen<::FlexFlow::AdamOptimizerAttrs>
+    Arbitrary<::FlexFlow::AdamOptimizerAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::AdamOptimizerAttrs>(
+      gen::arbitrary<double>(),
+      gen::arbitrary<double>(),
+      gen::arbitrary<double>(),
+      gen::arbitrary<double>(),
+      gen::arbitrary<double>(),
+      gen::arbitrary<double>(),
+      gen::arbitrary<double>());
 }
 } // namespace rc
 
diff --git a/lib/pcg/src/pcg/optimizers/sgd_optimizer_attrs.dtg.cc b/lib/pcg/src/pcg/optimizers/sgd_optimizer_attrs.dtg.cc
index d5e668917b..de1c5a4e6b 100644
--- a/lib/pcg/src/pcg/optimizers/sgd_optimizer_attrs.dtg.cc
+++ b/lib/pcg/src/pcg/optimizers/sgd_optimizer_attrs.dtg.cc
@@ -52,7 +52,7 @@ bool SGDOptimizerAttrs::operator>=(SGDOptimizerAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::SGDOptimizerAttrs>::operator()(
-    FlexFlow::SGDOptimizerAttrs const &x) const {
+    ::FlexFlow::SGDOptimizerAttrs const &x) const {
   size_t result = 0;
   result ^=
       std::hash<double>{}(x.lr) + 0x9e3779b9 + (result << 6) + (result >> 2);
@@ -67,15 +67,16 @@ size_t hash<FlexFlow::SGDOptimizerAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::SGDOptimizerAttrs
-    adl_serializer<FlexFlow::SGDOptimizerAttrs>::from_json(json const &j) {
-  return {j.at("lr").template get<double>(),
-          j.at("momentum").template get<double>(),
-          j.at("nesterov").template get<bool>(),
-          j.at("weight_decay").template get<double>()};
+::FlexFlow::SGDOptimizerAttrs
+    adl_serializer<::FlexFlow::SGDOptimizerAttrs>::from_json(json const &j) {
+  return ::FlexFlow::SGDOptimizerAttrs{
+      j.at("lr").template get<double>(),
+      j.at("momentum").template get<double>(),
+      j.at("nesterov").template get<bool>(),
+      j.at("weight_decay").template get<double>()};
 }
-void adl_serializer<FlexFlow::SGDOptimizerAttrs>::to_json(
-    json &j, FlexFlow::SGDOptimizerAttrs const &v) {
+void adl_serializer<::FlexFlow::SGDOptimizerAttrs>::to_json(
+    json &j, ::FlexFlow::SGDOptimizerAttrs const &v) {
   j["__type"] = "SGDOptimizerAttrs";
   j["lr"] = v.lr;
   j["momentum"] = v.momentum;
@@ -85,12 +86,13 @@ void adl_serializer<FlexFlow::SGDOptimizerAttrs>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::SGDOptimizerAttrs>
-    Arbitrary<FlexFlow::SGDOptimizerAttrs>::arbitrary() {
-  return gen::construct<FlexFlow::SGDOptimizerAttrs>(gen::arbitrary<double>(),
-                                                     gen::arbitrary<double>(),
-                                                     gen::arbitrary<bool>(),
-                                                     gen::arbitrary<double>());
+Gen<::FlexFlow::SGDOptimizerAttrs>
+    Arbitrary<::FlexFlow::SGDOptimizerAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::SGDOptimizerAttrs>(
+      gen::arbitrary<double>(),
+      gen::arbitrary<double>(),
+      gen::arbitrary<bool>(),
+      gen::arbitrary<double>());
 }
 } // namespace rc
 
diff --git a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc
new file mode 100644
index 0000000000..82fc0b9425
--- /dev/null
+++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc
@@ -0,0 +1,75 @@
+#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
+#include "pcg/dataflow_graph/algorithms.h"
+#include "utils/containers.h"
+
+namespace FlexFlow {
+
+ParallelComputationGraph empty_parallel_computation_graph() {
+  return ParallelComputationGraph{
+      DataflowGraph<ParallelLayerAttrs, ParallelTensorAttrs>{}};
+}
+
+std::unordered_set<parallel_layer_guid_t>
+    get_parallel_layers(ParallelComputationGraph const &pcg) {
+  return transform(get_nodes(pcg.raw_graph),
+                   [&](Node const &n) { return parallel_layer_guid_t{n}; });
+}
+
+ParallelLayerAddedResult
+    add_parallel_layer(ParallelComputationGraph &pcg,
+                       ParallelLayerAttrs const &layer_attrs,
+                       std::vector<parallel_tensor_guid_t> const &inputs,
+                       std::vector<ParallelTensorAttrs> const &output_labels) {
+  std::vector<MultiDiOutput> unwrapped_inputs =
+      transform(inputs, [](parallel_tensor_guid_t const &t) {
+        return t.raw_graph_output;
+      });
+  OperatorAddedResult op_added =
+      pcg.raw_graph.add_operator(layer_attrs, unwrapped_inputs, output_labels);
+  return ParallelLayerAddedResult{
+      parallel_layer_guid_t{op_added.node},
+      transform(
+          op_added.outputs,
+          [](MultiDiOutput const &o) { return parallel_tensor_guid_t{o}; }),
+  };
+}
+
+std::vector<parallel_tensor_guid_t>
+    get_layer_inputs(ParallelComputationGraph const &pcg,
+                     parallel_layer_guid_t const &l) {
+  return transform(
+      get_inputs(pcg.raw_graph, l.raw_graph_node),
+      [](MultiDiOutput const &o) { return parallel_tensor_guid_t{o}; });
+}
+
+std::vector<parallel_tensor_guid_t>
+    get_layer_outputs(ParallelComputationGraph const &pcg,
+                      parallel_layer_guid_t const &l) {
+  return transform(
+      get_outputs(pcg.raw_graph, l.raw_graph_node),
+      [](MultiDiOutput const &o) { return parallel_tensor_guid_t{o}; });
+}
+
+parallel_layer_guid_t get_source_layer(ParallelComputationGraph const &g,
+                                       parallel_tensor_guid_t const &t) {
+  return parallel_layer_guid_t{t.raw_graph_output.src};
+}
+
+ParallelLayerAttrs get_parallel_layer_attrs(ParallelComputationGraph const &pcg,
+                                            parallel_layer_guid_t const &l) {
+  return pcg.raw_graph.at(l.raw_graph_node);
+}
+
+ParallelTensorAttrs
+    get_parallel_tensor_attrs(ParallelComputationGraph const &pcg,
+                              parallel_tensor_guid_t const &t) {
+  return pcg.raw_graph.at(t.raw_graph_output);
+}
+
+std::vector<parallel_layer_guid_t>
+    topological_ordering(ParallelComputationGraph const &pcg) {
+  return transform(topological_ordering(pcg.raw_graph),
+                   [](Node const &n) { return parallel_layer_guid_t{n}; });
+}
+
+} // namespace FlexFlow
diff --git a/lib/pcg/src/pcg/parallel_computation_graph.dtg.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.dtg.cc
similarity index 50%
rename from lib/pcg/src/pcg/parallel_computation_graph.dtg.cc
rename to lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.dtg.cc
index e4e1555b4a..cdc9130979 100644
--- a/lib/pcg/src/pcg/parallel_computation_graph.dtg.cc
+++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.dtg.cc
@@ -1,17 +1,17 @@
 // THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
 // If you would like to modify this datatype, instead modify
-// lib/pcg/include/pcg/parallel_computation_graph.struct.toml
+// lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.struct.toml
 /* proj-data
 {
-  "generated_from": "e4db0f603f7b8947dda13e01f96c40fb"
+  "generated_from": "1339be6e86e9818c36d6ecf5475e2d4b"
 }
 */
 
-#include "pcg/parallel_computation_graph.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h"
 
-#include "pcg/dataflow_graph.h"
-#include "pcg/parallel_layer_attrs.dtg.h"
-#include "pcg/parallel_tensor_attrs.dtg.h"
+#include "pcg/dataflow_graph/dataflow_graph.h"
+#include "pcg/parallel_computation_graph/parallel_layer_attrs.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.h"
 
 namespace FlexFlow {
 ParallelComputationGraph::ParallelComputationGraph(
diff --git a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc
new file mode 100644
index 0000000000..29723ed078
--- /dev/null
+++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc
@@ -0,0 +1,520 @@
+#include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h"
+#include "op-attrs/ops/weight_attrs.dtg.h"
+#include "op-attrs/pcg_operator_attrs.h"
+#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
+#include "utils/containers.h"
+#include "utils/containers/concat_vectors.h"
+
+namespace FlexFlow {
+
+static std::string get_default_name(OperatorType op_type) {
+  return get_operator_type_name(op_type);
+}
+
+static std::string get_default_name(PCGOperatorAttrs const &attrs) {
+  return get_default_name(get_op_type(attrs));
+}
+
+static ParallelTensorAttrs make_weight_attrs(
+    ParallelTensorShape const &shape,
+    std::optional<InitializerAttrs> const &initializer_attrs) {
+  return ParallelTensorAttrs{
+      /*shape=*/shape,
+      /*sync_type=*/std::nullopt,
+      /*initializer=*/initializer_attrs,
+      /*create_gradients=*/CreateGrad::YES,
+  };
+}
+
+ParallelComputationGraphBuilder::ParallelComputationGraphBuilder()
+    : pcg(empty_parallel_computation_graph()) {}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::create_input_tensor(
+    ParallelTensorShape const &shape,
+    bool create_grad,
+    std::optional<std::string> const &name) {
+  ParallelTensorAttrs tensor_attrs = ParallelTensorAttrs{
+      /*shape=*/shape,
+      /*sync_type=*/std::nullopt,
+      /*initializer=*/std::nullopt,
+      /*create_gradients=*/(create_grad ? CreateGrad::YES : CreateGrad::NO),
+  };
+  ParallelLayerAttrs layer_attrs = ParallelLayerAttrs{
+      PCGOperatorAttrs{InputAttrs{}},
+      name,
+  };
+
+  return this->add_layer(layer_attrs, {}, {}, tensor_attrs);
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::add(
+    parallel_tensor_guid_t const &lhs,
+    parallel_tensor_guid_t const &rhs,
+    std::optional<std::string> const &maybe_name) {
+
+  ParallelTensorShape lhs_shape = this->get_shape(lhs);
+  ParallelTensorShape rhs_shape = this->get_shape(rhs);
+
+  DataType datatype = [&] {
+    if (lhs_shape.data_type != rhs_shape.data_type) {
+      throw mk_runtime_error(
+          fmt::format("Datatypes do not match: {} (lhs) != {} (rhs)",
+                      lhs_shape.data_type,
+                      rhs_shape.data_type));
+    } else {
+      return lhs_shape.data_type;
+    }
+  }();
+
+  ElementBinaryAttrs attrs = ElementBinaryAttrs{
+      OperatorType::EW_ADD,
+      datatype,
+      false,
+      false,
+  };
+
+  std::string name =
+      maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs}));
+
+  ParallelLayerAttrs layer = ParallelLayerAttrs{PCGOperatorAttrs{attrs}, name};
+  ParallelTensorShape output_shape =
+      throw_if_unexpected(get_output_shape(attrs, lhs_shape, rhs_shape));
+
+  return this->add_layer(layer, {lhs, rhs}, {}, output_shape);
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::batch_matmul(
+    parallel_tensor_guid_t const &a,
+    parallel_tensor_guid_t const &b,
+    std::optional<std::string> const &maybe_name) {
+
+  BatchMatmulAttrs attrs = BatchMatmulAttrs{
+      /*a_seq_length_dim=*/-1,
+      /*b_seq_length_dim=*/-1,
+  };
+
+  std::string name =
+      maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs}));
+
+  ParallelLayerAttrs layer = ParallelLayerAttrs{PCGOperatorAttrs{attrs}, name};
+  ParallelTensorShape output_shape = throw_if_unexpected(
+      get_output_shape(attrs, this->get_shape(a), this->get_shape(b)));
+
+  return this->add_layer(layer, {a, b}, {}, {output_shape});
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::cast(
+    parallel_tensor_guid_t const &input,
+    DataType result_type,
+    std::optional<std::string> const &maybe_name) {
+
+  CastAttrs attrs = CastAttrs{result_type};
+
+  std::string name =
+      maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs}));
+
+  ParallelLayerAttrs layer = ParallelLayerAttrs{PCGOperatorAttrs{attrs}, name};
+  ParallelTensorShape output_shape =
+      throw_if_unexpected(get_output_shape(attrs, this->get_shape(input)));
+
+  return this->add_layer(layer, {input}, {}, {output_shape});
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::conv2d(
+    parallel_tensor_guid_t const &raw_input,
+    int outChannels,
+    int kernelH,
+    int kernelW,
+    int strideH,
+    int strideW,
+    int paddingH,
+    int paddingW,
+    std::optional<Activation> const &activation,
+    int groups,
+    bool use_bias,
+    std::optional<InitializerAttrs> const &kernel_initializer,
+    std::optional<InitializerAttrs> const &bias_initializer,
+    std::optional<RegularizerAttrs> const &kernel_regularizer,
+    std::optional<std::string> const &maybe_name) {
+  Conv2DAttrs attrs = Conv2DAttrs{outChannels,
+                                  kernelH,
+                                  kernelW,
+                                  strideH,
+                                  strideW,
+                                  paddingH,
+                                  paddingW,
+                                  groups,
+                                  activation,
+                                  use_bias};
+
+  std::string name =
+      maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs}));
+
+  parallel_tensor_guid_t input =
+      this->as_type(raw_input, DataType::FLOAT, name + "input_pre_cast");
+
+  ParallelLayerAttrs layer = ParallelLayerAttrs{PCGOperatorAttrs{attrs}, name};
+
+  ParallelTensorShape input_shape = this->get_shape(input);
+  ParallelTensorShape output_shape = get_output_shape(attrs, input_shape);
+
+  std::vector<ParallelTensorAttrs> weights;
+
+  weights.push_back(make_weight_attrs(get_kernel_shape(attrs, input_shape),
+                                      kernel_initializer));
+
+  if (use_bias) {
+    weights.push_back(make_weight_attrs(get_bias_shape(attrs, input_shape),
+                                        bias_initializer));
+  }
+
+  return this->add_layer(layer, {input}, weights, output_shape);
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::dense(
+    parallel_tensor_guid_t const &input,
+    int outDim,
+    std::optional<Activation> activation,
+    bool use_bias,
+    DataType data_type,
+    std::optional<InitializerAttrs> const &kernel_initializer,
+    std::optional<InitializerAttrs> const &bias_initializer,
+    std::optional<std::string> const &maybe_name) {
+  LinearAttrs attrs = LinearAttrs{
+      outDim,
+      use_bias,
+      data_type,
+      activation,
+      std::nullopt,
+  };
+
+  std::string name =
+      maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs}));
+
+  ParallelLayerAttrs layer = ParallelLayerAttrs{PCGOperatorAttrs{attrs}, name};
+
+  ParallelTensorShape input_shape = this->get_shape(input);
+  ParallelTensorShape output_shape =
+      throw_if_unexpected(get_output_shape(attrs, input_shape));
+
+  std::vector<ParallelTensorAttrs> weights;
+
+  {
+    ParallelTensorShape kernel_shape =
+        throw_if_unexpected(get_kernel_shape(attrs, input_shape));
+    weights.push_back(make_weight_attrs(kernel_shape, kernel_initializer));
+  }
+
+  if (use_bias) {
+    ParallelTensorShape bias_shape =
+        throw_if_unexpected(get_bias_shape(attrs, input_shape));
+    weights.push_back(make_weight_attrs(bias_shape, bias_initializer));
+  } else if (bias_initializer.has_value()) {
+    throw mk_runtime_error("Dense received unexpected bias initializer even "
+                           "though use_bias is set to false");
+  }
+
+  return this->add_layer(layer, {input}, weights, output_shape);
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::embedding(
+    parallel_tensor_guid_t const &input,
+    int num_entries,
+    int outDim,
+    AggregateOp aggr,
+    DataType dtype,
+    std::optional<InitializerAttrs> const &kernel_initializer,
+    std::optional<std::string> const &maybe_name) {
+
+  EmbeddingAttrs attrs = EmbeddingAttrs{
+      num_entries,
+      outDim,
+      aggr,
+      dtype,
+  };
+
+  std::string name =
+      maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs}));
+
+  ParallelLayerAttrs layer = ParallelLayerAttrs{PCGOperatorAttrs{attrs}, name};
+
+  ParallelTensorShape input_shape = this->get_shape(input);
+  ParallelTensorShape output_shape =
+      throw_if_unexpected(get_output_shape(attrs, input_shape));
+  ParallelTensorShape weights_shape =
+      throw_if_unexpected(get_weights_shape(attrs, input_shape));
+
+  ParallelTensorAttrs weights_attrs =
+      make_weight_attrs(weights_shape, kernel_initializer);
+
+  return this->add_layer(layer, {input}, {weights_attrs}, output_shape);
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::multihead_attention(
+    parallel_tensor_guid_t const &query,
+    parallel_tensor_guid_t const &key,
+    parallel_tensor_guid_t const &value,
+    int embed_dim,
+    int num_heads,
+    std::optional<int> maybe_kdim,
+    std::optional<int> maybe_vdim,
+    float dropout,
+    bool bias,
+    bool add_bias_kv,
+    bool add_zero_attn,
+    std::optional<InitializerAttrs> initializer,
+    std::optional<InitializerAttrs> input_bias_initializer,
+    std::optional<InitializerAttrs> output_bias_initializer,
+    std::optional<std::string> const &maybe_name) {
+
+  int kdim = maybe_kdim.value_or(embed_dim);
+  int vdim = maybe_vdim.value_or(embed_dim);
+
+  MultiHeadAttentionAttrs attrs = MultiHeadAttentionAttrs{
+      /*embed_dim=*/embed_dim,
+      /*num_heads=*/num_heads,
+      /*kdim=*/kdim,
+      /*vdim=*/vdim,
+      /*dropout=*/dropout,
+      /*bias=*/bias,
+      /*add_bias_kv=*/add_bias_kv,
+      /*add_zero_attn=*/add_zero_attn,
+  };
+
+  std::string name =
+      maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs}));
+
+  ParallelLayerAttrs layer = ParallelLayerAttrs{PCGOperatorAttrs{attrs}, name};
+
+  ParallelTensorShape query_shape = this->get_shape(query);
+  ParallelTensorShape key_shape = this->get_shape(key);
+  ParallelTensorShape value_shape = this->get_shape(value);
+
+  ParallelTensorShape output_shape = throw_if_unexpected(
+      get_output_shape(attrs, query_shape, key_shape, value_shape));
+
+  std::vector<ParallelTensorAttrs> weights;
+
+  ParallelTensorAttrs weight_attrs = [&] {
+    ParallelTensorShape weight_shape = throw_if_unexpected(
+        get_weights_shape(attrs, query_shape, key_shape, value_shape));
+    return make_weight_attrs(weight_shape, initializer);
+  }();
+
+  weights.push_back(weight_attrs);
+
+  if (bias) {
+    ParallelTensorShape input_bias_shape = throw_if_unexpected(
+        get_input_bias_shape(attrs, query_shape, key_shape, value_shape));
+    weights.push_back(
+        make_weight_attrs(input_bias_shape, input_bias_initializer));
+    ParallelTensorShape output_bias_shape = throw_if_unexpected(
+        get_output_bias_shape(attrs, query_shape, key_shape, value_shape));
+    weights.push_back(
+        make_weight_attrs(output_bias_shape, output_bias_initializer));
+
+  } else if (input_bias_initializer.has_value()) {
+    throw mk_runtime_error("MultiheadAttention received unexpected input bias "
+                           "initializer even though bias is set to false");
+  } else if (output_bias_initializer.has_value()) {
+    throw mk_runtime_error("MultiheadAttention received unexpected output bias "
+                           "initializer even though bias is set to false");
+  }
+
+  return this->add_layer(layer, {query, key, value}, weights, output_shape);
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::relu(
+    parallel_tensor_guid_t const &input,
+    std::optional<std::string> const &maybe_name) {
+
+  ElementUnaryAttrs attrs = ElementUnaryAttrs{OperatorType::RELU, std::nullopt};
+
+  std::string name =
+      maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs}));
+
+  ParallelLayerAttrs layer = ParallelLayerAttrs{PCGOperatorAttrs{attrs}, name};
+
+  ParallelTensorShape output_shape =
+      throw_if_unexpected(get_output_shape(attrs, this->get_shape(input)));
+
+  return this->add_layer(layer, {input}, {}, {output_shape});
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::parallel_partition(
+    parallel_tensor_guid_t const &input,
+    ff_dim_t dim,
+    int degree,
+    std::optional<std::string> const &maybe_name) {
+
+  RepartitionAttrs attrs = RepartitionAttrs{dim, degree};
+
+  std::string name =
+      maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs}));
+
+  ParallelLayerAttrs layer = ParallelLayerAttrs{PCGOperatorAttrs{attrs}, name};
+
+  ParallelTensorShape output_shape =
+      throw_if_unexpected(get_output_shape(attrs, this->get_shape(input)));
+
+  return this->add_layer(layer, {input}, {}, {output_shape});
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::parallel_combine(
+    parallel_tensor_guid_t const &input,
+    ff_dim_t dim,
+    int degree,
+    std::optional<std::string> const &maybe_name) {
+
+  CombineAttrs attrs = CombineAttrs{dim, degree};
+
+  std::string name =
+      maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs}));
+
+  ParallelLayerAttrs layer = ParallelLayerAttrs{PCGOperatorAttrs{attrs}, name};
+
+  ParallelTensorShape output_shape =
+      throw_if_unexpected(get_output_shape(attrs, this->get_shape(input)));
+
+  return this->add_layer(layer, {input}, {}, {output_shape});
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::parallel_replicate(
+    parallel_tensor_guid_t const &input,
+    int degree,
+    std::optional<std::string> const &maybe_name) {
+
+  ReplicateAttrs attrs = ReplicateAttrs{degree};
+
+  std::string name =
+      maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs}));
+
+  ParallelLayerAttrs layer = ParallelLayerAttrs{PCGOperatorAttrs{attrs}, name};
+
+  ParallelTensorShape output_shape =
+      get_output_shape(attrs, this->get_shape(input));
+
+  return this->add_layer(layer, {input}, {}, {output_shape});
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::parallel_reduce(
+    parallel_tensor_guid_t const &input,
+    int degree,
+    std::optional<std::string> const &maybe_name) {
+
+  ReductionAttrs attrs = ReductionAttrs{degree};
+
+  std::string name =
+      maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs}));
+
+  ParallelLayerAttrs layer = ParallelLayerAttrs{PCGOperatorAttrs{attrs}, name};
+
+  ParallelTensorShape output_shape =
+      throw_if_unexpected(get_output_shape(attrs, this->get_shape(input)));
+
+  return this->add_layer(layer, {input}, {}, {output_shape});
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::as_type(
+    parallel_tensor_guid_t const &input,
+    DataType goal_datatype,
+    std::string const &name) {
+  DataType input_datatype = this->get_shape(input).data_type;
+  if (input_datatype == goal_datatype) {
+    return input;
+  } else if (can_strictly_promote_datatype_from_to(input_datatype,
+                                                   goal_datatype)) {
+    return this->cast(input, goal_datatype, name);
+  } else {
+    throw mk_runtime_error(
+        fmt::format("Could not convert provided tensor data type {} to "
+                    "desired data type {}",
+                    input_datatype,
+                    goal_datatype));
+  }
+}
+
+ParallelTensorShape ParallelComputationGraphBuilder::get_shape(
+    parallel_tensor_guid_t const &t) const {
+  return get_parallel_tensor_attrs(this->pcg, t).shape;
+}
+
+std::vector<parallel_tensor_guid_t> ParallelComputationGraphBuilder::add_layer(
+    ParallelLayerAttrs const &layer,
+    std::vector<parallel_tensor_guid_t> const &inputs,
+    std::vector<ParallelTensorAttrs> const &weights,
+    std::vector<ParallelTensorAttrs> const &outputs) {
+  std::vector<MultiDiOutput> raw_weight_tensors;
+  for (auto const &kv : enumerate_vector(weights)) {
+    int weight_idx = kv.first;
+    ParallelTensorAttrs weight_tensor_attrs = kv.second;
+
+    std::optional<std::string> weight_name =
+        transform(layer.name, [&](std::string const &layer_name) {
+          return fmt::format("{}.weights[{}]", layer_name, weight_idx);
+        });
+    ParallelLayerAttrs weight_layer_attrs = ParallelLayerAttrs{
+        PCGOperatorAttrs{WeightAttrs{}},
+        weight_name,
+    };
+    std::vector<MultiDiOutput> weight_layer_inputs = {};
+    std::vector<ParallelTensorAttrs> weight_output_attrs = {
+        weight_tensor_attrs};
+    raw_weight_tensors.push_back(get_only(this->pcg.raw_graph
+                                              .add_operator(weight_layer_attrs,
+                                                            weight_layer_inputs,
+                                                            weight_output_attrs)
+                                              .outputs));
+  }
+
+  std::vector<MultiDiOutput> raw_inputs =
+      transform(inputs, [](parallel_tensor_guid_t const &t) {
+        return t.raw_graph_output;
+      });
+  std::vector<MultiDiOutput> raw_outputs =
+      this->pcg.raw_graph
+          .add_operator(
+              layer, concat_vectors(raw_inputs, raw_weight_tensors), outputs)
+          .outputs;
+  return transform(raw_outputs, [](MultiDiOutput const &o) {
+    return parallel_tensor_guid_t{o};
+  });
+}
+
+std::vector<parallel_tensor_guid_t> ParallelComputationGraphBuilder::add_layer(
+    ParallelLayerAttrs const &layer,
+    std::vector<parallel_tensor_guid_t> const &inputs,
+    std::vector<ParallelTensorAttrs> const &weights,
+    std::vector<ParallelTensorShape> const &outputs) {
+  return this->add_layer(layer,
+                         inputs,
+                         weights,
+                         transform(outputs, [](ParallelTensorShape const &s) {
+                           return ParallelTensorAttrs{
+                               /*shape=*/s,
+                               /*sync_type=*/std::nullopt,
+                               /*initializer=*/std::nullopt,
+                               /*create_gradients=*/CreateGrad::YES,
+                           };
+                         }));
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::add_layer(
+    ParallelLayerAttrs const &layer,
+    std::vector<parallel_tensor_guid_t> const &inputs,
+    std::vector<ParallelTensorAttrs> const &weights,
+    ParallelTensorAttrs const &output) {
+  std::vector<ParallelTensorAttrs> outputs = {output};
+  return get_only(this->add_layer(layer, inputs, weights, outputs));
+}
+
+parallel_tensor_guid_t ParallelComputationGraphBuilder::add_layer(
+    ParallelLayerAttrs const &layer,
+    std::vector<parallel_tensor_guid_t> const &inputs,
+    std::vector<ParallelTensorAttrs> const &weights,
+    ParallelTensorShape const &output) {
+  std::vector<ParallelTensorShape> outputs = {output};
+  return get_only(this->add_layer(layer, inputs, weights, outputs));
+}
+
+} // namespace FlexFlow
diff --git a/lib/pcg/src/pcg/parallel_computation_graph/parallel_layer_added_result.dtg.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_layer_added_result.dtg.cc
new file mode 100644
index 0000000000..7b2dbf8de1
--- /dev/null
+++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_layer_added_result.dtg.cc
@@ -0,0 +1,67 @@
+// THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
+// If you would like to modify this datatype, instead modify
+// lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_added_result.struct.toml
+/* proj-data
+{
+  "generated_from": "cb4fa8a3a6319d9b7de628a58d08bfed"
+}
+*/
+
+#include "pcg/parallel_computation_graph/parallel_layer_added_result.dtg.h"
+
+#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h"
+#include "utils/fmt/vector.h"
+#include <sstream>
+#include <vector>
+
+namespace FlexFlow {
+ParallelLayerAddedResult::ParallelLayerAddedResult(
+    ::FlexFlow::parallel_layer_guid_t const &parallel_layer,
+    std::vector<::FlexFlow::parallel_tensor_guid_t> const &outputs)
+    : parallel_layer(parallel_layer), outputs(outputs) {}
+bool ParallelLayerAddedResult::operator==(
+    ParallelLayerAddedResult const &other) const {
+  return std::tie(this->parallel_layer, this->outputs) ==
+         std::tie(other.parallel_layer, other.outputs);
+}
+bool ParallelLayerAddedResult::operator!=(
+    ParallelLayerAddedResult const &other) const {
+  return std::tie(this->parallel_layer, this->outputs) !=
+         std::tie(other.parallel_layer, other.outputs);
+}
+bool ParallelLayerAddedResult::operator<(
+    ParallelLayerAddedResult const &other) const {
+  return std::tie(this->parallel_layer, this->outputs) <
+         std::tie(other.parallel_layer, other.outputs);
+}
+bool ParallelLayerAddedResult::operator>(
+    ParallelLayerAddedResult const &other) const {
+  return std::tie(this->parallel_layer, this->outputs) >
+         std::tie(other.parallel_layer, other.outputs);
+}
+bool ParallelLayerAddedResult::operator<=(
+    ParallelLayerAddedResult const &other) const {
+  return std::tie(this->parallel_layer, this->outputs) <=
+         std::tie(other.parallel_layer, other.outputs);
+}
+bool ParallelLayerAddedResult::operator>=(
+    ParallelLayerAddedResult const &other) const {
+  return std::tie(this->parallel_layer, this->outputs) >=
+         std::tie(other.parallel_layer, other.outputs);
+}
+} // namespace FlexFlow
+
+namespace FlexFlow {
+std::string format_as(ParallelLayerAddedResult const &x) {
+  std::ostringstream oss;
+  oss << "<ParallelLayerAddedResult";
+  oss << " parallel_layer=" << x.parallel_layer;
+  oss << " outputs=" << x.outputs;
+  oss << ">";
+  return oss.str();
+}
+std::ostream &operator<<(std::ostream &s, ParallelLayerAddedResult const &x) {
+  return s << fmt::to_string(x);
+}
+} // namespace FlexFlow
diff --git a/lib/pcg/src/pcg/parallel_computation_graph/parallel_layer_attrs.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_layer_attrs.cc
new file mode 100644
index 0000000000..5995e4ee01
--- /dev/null
+++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_layer_attrs.cc
@@ -0,0 +1,10 @@
+#include "pcg/parallel_computation_graph/parallel_layer_attrs.h"
+#include "op-attrs/pcg_operator_attrs.h"
+
+namespace FlexFlow {
+
+OperatorType get_op_type(ParallelLayerAttrs const &a) {
+  return get_op_type(a.op_attrs);
+}
+
+} // namespace FlexFlow
diff --git a/lib/pcg/src/pcg/parallel_layer_attrs.dtg.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_layer_attrs.dtg.cc
similarity index 50%
rename from lib/pcg/src/pcg/parallel_layer_attrs.dtg.cc
rename to lib/pcg/src/pcg/parallel_computation_graph/parallel_layer_attrs.dtg.cc
index 455fb22baf..5a982b13ab 100644
--- a/lib/pcg/src/pcg/parallel_layer_attrs.dtg.cc
+++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_layer_attrs.dtg.cc
@@ -1,13 +1,13 @@
 // THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
 // If you would like to modify this datatype, instead modify
-// lib/pcg/include/pcg/parallel_layer_attrs.struct.toml
+// lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_attrs.struct.toml
 /* proj-data
 {
-  "generated_from": "97fa0b11c59ae892a8a530ffd67e33ad"
+  "generated_from": "1b3a0491865fd43c79afcf4939b56fae"
 }
 */
 
-#include "pcg/parallel_layer_attrs.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_layer_attrs.dtg.h"
 
 #include "op-attrs/operator_attrs.h"
 #include "utils/stack_string.h"
@@ -16,34 +16,40 @@
 
 namespace FlexFlow {
 ParallelLayerAttrs::ParallelLayerAttrs(
-    ::FlexFlow::PCGOperatorAttrs const &attrs,
+    ::FlexFlow::PCGOperatorAttrs const &op_attrs,
     std::optional<::FlexFlow::stack_string<MAX_OPNAME>> const &name)
-    : attrs(attrs), name(name) {}
+    : op_attrs(op_attrs), name(name) {}
 bool ParallelLayerAttrs::operator==(ParallelLayerAttrs const &other) const {
-  return std::tie(this->attrs, this->name) == std::tie(other.attrs, other.name);
+  return std::tie(this->op_attrs, this->name) ==
+         std::tie(other.op_attrs, other.name);
 }
 bool ParallelLayerAttrs::operator!=(ParallelLayerAttrs const &other) const {
-  return std::tie(this->attrs, this->name) != std::tie(other.attrs, other.name);
+  return std::tie(this->op_attrs, this->name) !=
+         std::tie(other.op_attrs, other.name);
 }
 bool ParallelLayerAttrs::operator<(ParallelLayerAttrs const &other) const {
-  return std::tie(this->attrs, this->name) < std::tie(other.attrs, other.name);
+  return std::tie(this->op_attrs, this->name) <
+         std::tie(other.op_attrs, other.name);
 }
 bool ParallelLayerAttrs::operator>(ParallelLayerAttrs const &other) const {
-  return std::tie(this->attrs, this->name) > std::tie(other.attrs, other.name);
+  return std::tie(this->op_attrs, this->name) >
+         std::tie(other.op_attrs, other.name);
 }
 bool ParallelLayerAttrs::operator<=(ParallelLayerAttrs const &other) const {
-  return std::tie(this->attrs, this->name) <= std::tie(other.attrs, other.name);
+  return std::tie(this->op_attrs, this->name) <=
+         std::tie(other.op_attrs, other.name);
 }
 bool ParallelLayerAttrs::operator>=(ParallelLayerAttrs const &other) const {
-  return std::tie(this->attrs, this->name) >= std::tie(other.attrs, other.name);
+  return std::tie(this->op_attrs, this->name) >=
+         std::tie(other.op_attrs, other.name);
 }
 } // namespace FlexFlow
 
 namespace std {
 size_t hash<FlexFlow::ParallelLayerAttrs>::operator()(
-    FlexFlow::ParallelLayerAttrs const &x) const {
+    ::FlexFlow::ParallelLayerAttrs const &x) const {
   size_t result = 0;
-  result ^= std::hash<::FlexFlow::PCGOperatorAttrs>{}(x.attrs) + 0x9e3779b9 +
+  result ^= std::hash<::FlexFlow::PCGOperatorAttrs>{}(x.op_attrs) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
   result ^=
       std::hash<std::optional<::FlexFlow::stack_string<MAX_OPNAME>>>{}(x.name) +
@@ -53,26 +59,35 @@ size_t hash<FlexFlow::ParallelLayerAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ParallelLayerAttrs
-    adl_serializer<FlexFlow::ParallelLayerAttrs>::from_json(json const &j) {
-  return {
-      j.at("attrs").template get<::FlexFlow::PCGOperatorAttrs>(),
+::FlexFlow::ParallelLayerAttrs
+    adl_serializer<::FlexFlow::ParallelLayerAttrs>::from_json(json const &j) {
+  return ::FlexFlow::ParallelLayerAttrs{
+      j.at("op_attrs").template get<::FlexFlow::PCGOperatorAttrs>(),
       j.at("name")
           .template get<std::optional<::FlexFlow::stack_string<MAX_OPNAME>>>()};
 }
-void adl_serializer<FlexFlow::ParallelLayerAttrs>::to_json(
-    json &j, FlexFlow::ParallelLayerAttrs const &v) {
+void adl_serializer<::FlexFlow::ParallelLayerAttrs>::to_json(
+    json &j, ::FlexFlow::ParallelLayerAttrs const &v) {
   j["__type"] = "ParallelLayerAttrs";
-  j["attrs"] = v.attrs;
+  j["op_attrs"] = v.op_attrs;
   j["name"] = v.name;
 }
 } // namespace nlohmann
 
+namespace rc {
+Gen<::FlexFlow::ParallelLayerAttrs>
+    Arbitrary<::FlexFlow::ParallelLayerAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::ParallelLayerAttrs>(
+      gen::arbitrary<::FlexFlow::PCGOperatorAttrs>(),
+      gen::arbitrary<std::optional<::FlexFlow::stack_string<MAX_OPNAME>>>());
+}
+} // namespace rc
+
 namespace FlexFlow {
 std::string format_as(ParallelLayerAttrs const &x) {
   std::ostringstream oss;
   oss << "<ParallelLayerAttrs";
-  oss << " attrs=" << x.attrs;
+  oss << " op_attrs=" << x.op_attrs;
   oss << " name=" << x.name;
   oss << ">";
   return oss.str();
diff --git a/lib/pcg/src/pcg/operator_guid_t.dtg.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.cc
similarity index 50%
rename from lib/pcg/src/pcg/operator_guid_t.dtg.cc
rename to lib/pcg/src/pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.cc
index 46b031f7e1..df575ebc98 100644
--- a/lib/pcg/src/pcg/operator_guid_t.dtg.cc
+++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.cc
@@ -1,43 +1,50 @@
 // THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
 // If you would like to modify this datatype, instead modify
-// lib/pcg/include/pcg/operator_guid_t.struct.toml
+// lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.struct.toml
 /* proj-data
 {
-  "generated_from": "348b5a610f4ff6f545884564ee9a1e6a"
+  "generated_from": "c31301efeb92e151b04943786aa7bec1"
 }
 */
 
-#include "pcg/operator_guid_t.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h"
 
 #include "utils/graph.h"
 #include <sstream>
 
 namespace FlexFlow {
-operator_guid_t::operator_guid_t(::FlexFlow::Node const &raw_graph_node)
+parallel_layer_guid_t::parallel_layer_guid_t(
+    ::FlexFlow::Node const &raw_graph_node)
     : raw_graph_node(raw_graph_node) {}
-bool operator_guid_t::operator==(operator_guid_t const &other) const {
+bool parallel_layer_guid_t::operator==(
+    parallel_layer_guid_t const &other) const {
   return std::tie(this->raw_graph_node) == std::tie(other.raw_graph_node);
 }
-bool operator_guid_t::operator!=(operator_guid_t const &other) const {
+bool parallel_layer_guid_t::operator!=(
+    parallel_layer_guid_t const &other) const {
   return std::tie(this->raw_graph_node) != std::tie(other.raw_graph_node);
 }
-bool operator_guid_t::operator<(operator_guid_t const &other) const {
+bool parallel_layer_guid_t::operator<(
+    parallel_layer_guid_t const &other) const {
   return std::tie(this->raw_graph_node) < std::tie(other.raw_graph_node);
 }
-bool operator_guid_t::operator>(operator_guid_t const &other) const {
+bool parallel_layer_guid_t::operator>(
+    parallel_layer_guid_t const &other) const {
   return std::tie(this->raw_graph_node) > std::tie(other.raw_graph_node);
 }
-bool operator_guid_t::operator<=(operator_guid_t const &other) const {
+bool parallel_layer_guid_t::operator<=(
+    parallel_layer_guid_t const &other) const {
   return std::tie(this->raw_graph_node) <= std::tie(other.raw_graph_node);
 }
-bool operator_guid_t::operator>=(operator_guid_t const &other) const {
+bool parallel_layer_guid_t::operator>=(
+    parallel_layer_guid_t const &other) const {
   return std::tie(this->raw_graph_node) >= std::tie(other.raw_graph_node);
 }
 } // namespace FlexFlow
 
 namespace std {
-size_t hash<FlexFlow::operator_guid_t>::operator()(
-    FlexFlow::operator_guid_t const &x) const {
+size_t hash<FlexFlow::parallel_layer_guid_t>::operator()(
+    ::FlexFlow::parallel_layer_guid_t const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::Node>{}(x.raw_graph_node) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -46,14 +53,14 @@ size_t hash<FlexFlow::operator_guid_t>::operator()(
 } // namespace std
 
 namespace FlexFlow {
-std::string format_as(operator_guid_t const &x) {
+std::string format_as(parallel_layer_guid_t const &x) {
   std::ostringstream oss;
-  oss << "<operator_guid_t";
+  oss << "<parallel_layer_guid_t";
   oss << " raw_graph_node=" << x.raw_graph_node;
   oss << ">";
   return oss.str();
 }
-std::ostream &operator<<(std::ostream &s, operator_guid_t const &x) {
+std::ostream &operator<<(std::ostream &s, parallel_layer_guid_t const &x) {
   return s << fmt::to_string(x);
 }
 } // namespace FlexFlow
diff --git a/lib/pcg/src/pcg/parallel_tensor_attrs.dtg.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.cc
similarity index 84%
rename from lib/pcg/src/pcg/parallel_tensor_attrs.dtg.cc
rename to lib/pcg/src/pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.cc
index ae5d618172..88f7ed4d3c 100644
--- a/lib/pcg/src/pcg/parallel_tensor_attrs.dtg.cc
+++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.cc
@@ -1,13 +1,13 @@
 // THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
 // If you would like to modify this datatype, instead modify
-// lib/pcg/include/pcg/parallel_tensor_attrs.struct.toml
+// lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_attrs.struct.toml
 /* proj-data
 {
-  "generated_from": "b3e086b380bbc41d99332e1463a34b28"
+  "generated_from": "3d641c90950f49a7bef664d0153c97f6"
 }
 */
 
-#include "pcg/parallel_tensor_attrs.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.h"
 
 #include "op-attrs/parallel_tensor_shape.dtg.h"
 #include "op-attrs/param_sync.dtg.h"
@@ -82,7 +82,7 @@ bool ParallelTensorAttrs::operator>=(ParallelTensorAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ParallelTensorAttrs>::operator()(
-    FlexFlow::ParallelTensorAttrs const &x) const {
+    ::FlexFlow::ParallelTensorAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::ParallelTensorShape>{}(x.shape) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -98,17 +98,17 @@ size_t hash<FlexFlow::ParallelTensorAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::ParallelTensorAttrs
-    adl_serializer<FlexFlow::ParallelTensorAttrs>::from_json(json const &j) {
-  return {
+::FlexFlow::ParallelTensorAttrs
+    adl_serializer<::FlexFlow::ParallelTensorAttrs>::from_json(json const &j) {
+  return ::FlexFlow::ParallelTensorAttrs{
       j.at("shape").template get<::FlexFlow::ParallelTensorShape>(),
       j.at("sync_type").template get<std::optional<::FlexFlow::ParamSync>>(),
       j.at("initializer")
           .template get<std::optional<::FlexFlow::InitializerAttrs>>(),
       j.at("create_gradients").template get<::FlexFlow::CreateGrad>()};
 }
-void adl_serializer<FlexFlow::ParallelTensorAttrs>::to_json(
-    json &j, FlexFlow::ParallelTensorAttrs const &v) {
+void adl_serializer<::FlexFlow::ParallelTensorAttrs>::to_json(
+    json &j, ::FlexFlow::ParallelTensorAttrs const &v) {
   j["__type"] = "ParallelTensorAttrs";
   j["shape"] = v.shape;
   j["sync_type"] = v.sync_type;
@@ -117,6 +117,17 @@ void adl_serializer<FlexFlow::ParallelTensorAttrs>::to_json(
 }
 } // namespace nlohmann
 
+namespace rc {
+Gen<::FlexFlow::ParallelTensorAttrs>
+    Arbitrary<::FlexFlow::ParallelTensorAttrs>::arbitrary() {
+  return gen::construct<::FlexFlow::ParallelTensorAttrs>(
+      gen::arbitrary<::FlexFlow::ParallelTensorShape>(),
+      gen::arbitrary<std::optional<::FlexFlow::ParamSync>>(),
+      gen::arbitrary<std::optional<::FlexFlow::InitializerAttrs>>(),
+      gen::arbitrary<::FlexFlow::CreateGrad>());
+}
+} // namespace rc
+
 namespace FlexFlow {
 std::string format_as(ParallelTensorAttrs const &x) {
   std::ostringstream oss;
diff --git a/lib/pcg/src/pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.cc
new file mode 100644
index 0000000000..38c2970225
--- /dev/null
+++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.cc
@@ -0,0 +1,66 @@
+// THIS FILE WAS AUTO-GENERATED BY proj. DO NOT MODIFY IT!
+// If you would like to modify this datatype, instead modify
+// lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.struct.toml
+/* proj-data
+{
+  "generated_from": "de2c2d33bfa5cd72f0e51954d6879f38"
+}
+*/
+
+#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h"
+
+#include "utils/graph/multidiedge.h"
+#include <sstream>
+
+namespace FlexFlow {
+parallel_tensor_guid_t::parallel_tensor_guid_t(
+    ::FlexFlow::MultiDiOutput const &raw_graph_output)
+    : raw_graph_output(raw_graph_output) {}
+bool parallel_tensor_guid_t::operator==(
+    parallel_tensor_guid_t const &other) const {
+  return std::tie(this->raw_graph_output) == std::tie(other.raw_graph_output);
+}
+bool parallel_tensor_guid_t::operator!=(
+    parallel_tensor_guid_t const &other) const {
+  return std::tie(this->raw_graph_output) != std::tie(other.raw_graph_output);
+}
+bool parallel_tensor_guid_t::operator<(
+    parallel_tensor_guid_t const &other) const {
+  return std::tie(this->raw_graph_output) < std::tie(other.raw_graph_output);
+}
+bool parallel_tensor_guid_t::operator>(
+    parallel_tensor_guid_t const &other) const {
+  return std::tie(this->raw_graph_output) > std::tie(other.raw_graph_output);
+}
+bool parallel_tensor_guid_t::operator<=(
+    parallel_tensor_guid_t const &other) const {
+  return std::tie(this->raw_graph_output) <= std::tie(other.raw_graph_output);
+}
+bool parallel_tensor_guid_t::operator>=(
+    parallel_tensor_guid_t const &other) const {
+  return std::tie(this->raw_graph_output) >= std::tie(other.raw_graph_output);
+}
+} // namespace FlexFlow
+
+namespace std {
+size_t hash<FlexFlow::parallel_tensor_guid_t>::operator()(
+    ::FlexFlow::parallel_tensor_guid_t const &x) const {
+  size_t result = 0;
+  result ^= std::hash<::FlexFlow::MultiDiOutput>{}(x.raw_graph_output) +
+            0x9e3779b9 + (result << 6) + (result >> 2);
+  return result;
+}
+} // namespace std
+
+namespace FlexFlow {
+std::string format_as(parallel_tensor_guid_t const &x) {
+  std::ostringstream oss;
+  oss << "<parallel_tensor_guid_t";
+  oss << " raw_graph_output=" << x.raw_graph_output;
+  oss << ">";
+  return oss.str();
+}
+std::ostream &operator<<(std::ostream &s, parallel_tensor_guid_t const &x) {
+  return s << fmt::to_string(x);
+}
+} // namespace FlexFlow
diff --git a/lib/pcg/src/pcg/side_size_t.dtg.cc b/lib/pcg/src/pcg/side_size_t.dtg.cc
index 54db2974fe..0d13091cc8 100644
--- a/lib/pcg/src/pcg/side_size_t.dtg.cc
+++ b/lib/pcg/src/pcg/side_size_t.dtg.cc
@@ -35,7 +35,7 @@ bool side_size_t::operator>=(side_size_t const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::side_size_t>::operator()(
-    FlexFlow::side_size_t const &x) const {
+    ::FlexFlow::side_size_t const &x) const {
   size_t result = 0;
   result ^= std::hash<int>{}(x.unwrapped) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
@@ -44,20 +44,20 @@ size_t hash<FlexFlow::side_size_t>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::side_size_t
-    adl_serializer<FlexFlow::side_size_t>::from_json(json const &j) {
-  return {j.at("unwrapped").template get<int>()};
+::FlexFlow::side_size_t
+    adl_serializer<::FlexFlow::side_size_t>::from_json(json const &j) {
+  return ::FlexFlow::side_size_t{j.at("unwrapped").template get<int>()};
 }
-void adl_serializer<FlexFlow::side_size_t>::to_json(
-    json &j, FlexFlow::side_size_t const &v) {
+void adl_serializer<::FlexFlow::side_size_t>::to_json(
+    json &j, ::FlexFlow::side_size_t const &v) {
   j["__type"] = "side_size_t";
   j["unwrapped"] = v.unwrapped;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::side_size_t> Arbitrary<FlexFlow::side_size_t>::arbitrary() {
-  return gen::construct<FlexFlow::side_size_t>(gen::arbitrary<int>());
+Gen<::FlexFlow::side_size_t> Arbitrary<::FlexFlow::side_size_t>::arbitrary() {
+  return gen::construct<::FlexFlow::side_size_t>(gen::arbitrary<int>());
 }
 } // namespace rc
 
diff --git a/lib/pcg/src/pcg/strided_rectangle.dtg.cc b/lib/pcg/src/pcg/strided_rectangle.dtg.cc
index e743a2722a..d50c5861ea 100644
--- a/lib/pcg/src/pcg/strided_rectangle.dtg.cc
+++ b/lib/pcg/src/pcg/strided_rectangle.dtg.cc
@@ -39,7 +39,7 @@ bool StridedRectangle::operator>=(StridedRectangle const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::StridedRectangle>::operator()(
-    FlexFlow::StridedRectangle const &x) const {
+    ::FlexFlow::StridedRectangle const &x) const {
   size_t result = 0;
   result ^=
       std::hash<::FlexFlow::FFOrdered<::FlexFlow::StridedRectangleSide>>{}(
@@ -50,23 +50,24 @@ size_t hash<FlexFlow::StridedRectangle>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::StridedRectangle
-    adl_serializer<FlexFlow::StridedRectangle>::from_json(json const &j) {
-  return {j.at("sides")
-              .template get<
-                  ::FlexFlow::FFOrdered<::FlexFlow::StridedRectangleSide>>()};
+::FlexFlow::StridedRectangle
+    adl_serializer<::FlexFlow::StridedRectangle>::from_json(json const &j) {
+  return ::FlexFlow::StridedRectangle{
+      j.at("sides")
+          .template get<
+              ::FlexFlow::FFOrdered<::FlexFlow::StridedRectangleSide>>()};
 }
-void adl_serializer<FlexFlow::StridedRectangle>::to_json(
-    json &j, FlexFlow::StridedRectangle const &v) {
+void adl_serializer<::FlexFlow::StridedRectangle>::to_json(
+    json &j, ::FlexFlow::StridedRectangle const &v) {
   j["__type"] = "StridedRectangle";
   j["sides"] = v.sides;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::StridedRectangle>
-    Arbitrary<FlexFlow::StridedRectangle>::arbitrary() {
-  return gen::construct<FlexFlow::StridedRectangle>(
+Gen<::FlexFlow::StridedRectangle>
+    Arbitrary<::FlexFlow::StridedRectangle>::arbitrary() {
+  return gen::construct<::FlexFlow::StridedRectangle>(
       gen::arbitrary<
           ::FlexFlow::FFOrdered<::FlexFlow::StridedRectangleSide>>());
 }
diff --git a/lib/pcg/src/pcg/strided_rectangle_side.cc b/lib/pcg/src/pcg/strided_rectangle_side.cc
index 80258886d7..5e7274141d 100644
--- a/lib/pcg/src/pcg/strided_rectangle_side.cc
+++ b/lib/pcg/src/pcg/strided_rectangle_side.cc
@@ -9,7 +9,7 @@ StridedRectangleSide strided_side_from_size_and_stride(side_size_t,
 }
 
 side_size_t get_side_size(StridedRectangleSide const &s) {
-  return s.num_points.unwrapped * s.stride;
+  return side_size_t{s.num_points.unwrapped * s.stride};
 }
 
 } // namespace FlexFlow
diff --git a/lib/pcg/src/pcg/strided_rectangle_side.dtg.cc b/lib/pcg/src/pcg/strided_rectangle_side.dtg.cc
index 0bb31b0496..e2533f7a21 100644
--- a/lib/pcg/src/pcg/strided_rectangle_side.dtg.cc
+++ b/lib/pcg/src/pcg/strided_rectangle_side.dtg.cc
@@ -44,7 +44,7 @@ bool StridedRectangleSide::operator>=(StridedRectangleSide const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::StridedRectangleSide>::operator()(
-    FlexFlow::StridedRectangleSide const &x) const {
+    ::FlexFlow::StridedRectangleSide const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::num_points_t>{}(x.num_points) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -55,13 +55,14 @@ size_t hash<FlexFlow::StridedRectangleSide>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::StridedRectangleSide
-    adl_serializer<FlexFlow::StridedRectangleSide>::from_json(json const &j) {
-  return {j.at("num_points").template get<::FlexFlow::num_points_t>(),
-          j.at("stride").template get<int>()};
+::FlexFlow::StridedRectangleSide
+    adl_serializer<::FlexFlow::StridedRectangleSide>::from_json(json const &j) {
+  return ::FlexFlow::StridedRectangleSide{
+      j.at("num_points").template get<::FlexFlow::num_points_t>(),
+      j.at("stride").template get<int>()};
 }
-void adl_serializer<FlexFlow::StridedRectangleSide>::to_json(
-    json &j, FlexFlow::StridedRectangleSide const &v) {
+void adl_serializer<::FlexFlow::StridedRectangleSide>::to_json(
+    json &j, ::FlexFlow::StridedRectangleSide const &v) {
   j["__type"] = "StridedRectangleSide";
   j["num_points"] = v.num_points;
   j["stride"] = v.stride;
@@ -69,9 +70,9 @@ void adl_serializer<FlexFlow::StridedRectangleSide>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::StridedRectangleSide>
-    Arbitrary<FlexFlow::StridedRectangleSide>::arbitrary() {
-  return gen::construct<FlexFlow::StridedRectangleSide>(
+Gen<::FlexFlow::StridedRectangleSide>
+    Arbitrary<::FlexFlow::StridedRectangleSide>::arbitrary() {
+  return gen::construct<::FlexFlow::StridedRectangleSide>(
       gen::arbitrary<::FlexFlow::num_points_t>(), gen::arbitrary<int>());
 }
 } // namespace rc
diff --git a/lib/pcg/src/pcg/tensor_attrs.dtg.cc b/lib/pcg/src/pcg/tensor_attrs.dtg.cc
index 46a6fb8d50..e75fe506f6 100644
--- a/lib/pcg/src/pcg/tensor_attrs.dtg.cc
+++ b/lib/pcg/src/pcg/tensor_attrs.dtg.cc
@@ -81,7 +81,7 @@ bool TensorAttrs::operator>=(TensorAttrs const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::TensorAttrs>::operator()(
-    FlexFlow::TensorAttrs const &x) const {
+    ::FlexFlow::TensorAttrs const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::TensorShape>{}(x.shape) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
@@ -97,17 +97,17 @@ size_t hash<FlexFlow::TensorAttrs>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::TensorAttrs
-    adl_serializer<FlexFlow::TensorAttrs>::from_json(json const &j) {
-  return {
+::FlexFlow::TensorAttrs
+    adl_serializer<::FlexFlow::TensorAttrs>::from_json(json const &j) {
+  return ::FlexFlow::TensorAttrs{
       j.at("shape").template get<::FlexFlow::TensorShape>(),
       j.at("initializer")
           .template get<std::optional<::FlexFlow::InitializerAttrs>>(),
       j.at("create_gradients").template get<bool>(),
       j.at("sync_type").template get<std::optional<::FlexFlow::ParamSync>>()};
 }
-void adl_serializer<FlexFlow::TensorAttrs>::to_json(
-    json &j, FlexFlow::TensorAttrs const &v) {
+void adl_serializer<::FlexFlow::TensorAttrs>::to_json(
+    json &j, ::FlexFlow::TensorAttrs const &v) {
   j["__type"] = "TensorAttrs";
   j["shape"] = v.shape;
   j["initializer"] = v.initializer;
diff --git a/lib/pcg/src/pcg/tensor_guid_t.dtg.cc b/lib/pcg/src/pcg/tensor_guid_t.dtg.cc
index 9d57291112..c8fbb7299b 100644
--- a/lib/pcg/src/pcg/tensor_guid_t.dtg.cc
+++ b/lib/pcg/src/pcg/tensor_guid_t.dtg.cc
@@ -3,7 +3,7 @@
 // lib/pcg/include/pcg/tensor_guid_t.struct.toml
 /* proj-data
 {
-  "generated_from": "dc15fcbb876ec70509dfa8b662963bc3"
+  "generated_from": "1e3914b97a465f1752ce510614145b37"
 }
 */
 
@@ -37,7 +37,7 @@ bool tensor_guid_t::operator>=(tensor_guid_t const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::tensor_guid_t>::operator()(
-    FlexFlow::tensor_guid_t const &x) const {
+    ::FlexFlow::tensor_guid_t const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::MultiDiOutput>{}(x.raw_graph_output) +
             0x9e3779b9 + (result << 6) + (result >> 2);
diff --git a/lib/pcg/test/src/pcg/dataflow_graph/algorithms.cc b/lib/pcg/test/src/pcg/dataflow_graph/algorithms.cc
new file mode 100644
index 0000000000..f47151e76a
--- /dev/null
+++ b/lib/pcg/test/src/pcg/dataflow_graph/algorithms.cc
@@ -0,0 +1,76 @@
+#include "pcg/dataflow_graph/algorithms.h"
+#include "test/utils/doctest.h"
+#include "utils/fmt/unordered_set.h"
+
+TEST_SUITE(FF_TEST_SUITE) {
+  TEST_CASE("get_inputs/get_outputs") {
+    DataflowGraph<int, std::string> g;
+
+    int n1_label = 1;
+    int n2_label = 2;
+    int n3_label = 3;
+    int n4_label = 4;
+
+    std::string o1_label = "o1";
+    std::string o2_label = "o2";
+    std::string o3_label = "o3";
+    std::string o4_label = "o4";
+
+    OperatorAddedResult n1_added = g.add_operator(n1_label, {}, {o1_label});
+    Node n1 = n1_added.node;
+    MultiDiOutput o1 = get_only(n1_added.outputs);
+
+    OperatorAddedResult n2_added = g.add_operator(n2_label, {}, {o2_label});
+    Node n2 = n2_added.node;
+    MultiDiOutput o2 = get_only(n2_added.outputs);
+
+    OperatorAddedResult n3_added = g.add_operator(n3_label, {}, {o3_label});
+    Node n3 = n3_added.node;
+    MultiDiOutput o3 = get_only(n3_added.outputs);
+
+    OperatorAddedResult n4_added =
+        g.add_operator(n4_label, {o1, o2, o3}, {o4_label});
+    Node n4 = n4_added.node;
+    MultiDiOutput o4 = get_only(n4_added.outputs);
+
+    SUBCASE("get_inputs") {
+      std::vector<MultiDiOutput> result = get_inputs(g, n4);
+      std::vector<MultiDiOutput> correct = {o1, o2, o3};
+      CHECK(result == correct);
+    }
+
+    SUBCASE("get_outputs") {
+      std::vector<MultiDiOutput> result = get_outputs(g, n4);
+      std::vector<MultiDiOutput> correct = {o4};
+      CHECK(result == correct);
+    }
+  }
+
+  TEST_CASE("topological_ordering") {
+    DataflowGraph<int, std::string> g;
+
+    int n1_label = 1;
+    int n2_label = 2;
+    int n3_label = 3;
+
+    std::string o1_label = "o1";
+    std::string o2_label = "o2";
+    std::string o3_label = "o3";
+
+    OperatorAddedResult n1_added = g.add_operator(n1_label, {}, {o1_label});
+    Node n1 = n1_added.node;
+    MultiDiOutput o1 = get_only(n1_added.outputs);
+
+    OperatorAddedResult n2_added = g.add_operator(n2_label, {o1}, {o2_label});
+    Node n2 = n2_added.node;
+    MultiDiOutput o2 = get_only(n2_added.outputs);
+
+    OperatorAddedResult n3_added = g.add_operator(n3_label, {o2}, {o3_label});
+    Node n3 = n3_added.node;
+    MultiDiOutput o3 = get_only(n3_added.outputs);
+
+    std::vector<Node> result = topological_ordering(g);
+    std::vector<Node> correct = {n1, n2, n3};
+    CHECK(result == correct);
+  }
+}
diff --git a/lib/pcg/test/src/pcg/initializers/uniform_initializer_attrs.cc b/lib/pcg/test/src/pcg/initializers/uniform_initializer_attrs.cc
new file mode 100644
index 0000000000..0b75e3ae1a
--- /dev/null
+++ b/lib/pcg/test/src/pcg/initializers/uniform_initializer_attrs.cc
@@ -0,0 +1,11 @@
+#include "pcg/initializers/uniform_initializer_attrs.h"
+#include "test/utils/doctest.h"
+#include "test/utils/rapidcheck.h"
+
+TEST_SUITE(FF_TEST_SUITE) {
+  TEST_CASE("Arbitrary<UniformInitializerAttrs>") {
+    RC_SUBCASE([](UniformInitializerAttrs const &attrs) {
+      RC_ASSERT(attrs.max_val >= attrs.min_val);
+    });
+  }
+}
diff --git a/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc b/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc
new file mode 100644
index 0000000000..fa3fce91eb
--- /dev/null
+++ b/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc
@@ -0,0 +1,35 @@
+#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
+#include "test/utils/rapidcheck.h"
+
+TEST_SUITE(FF_TEST_SUITE) {
+  TEST_CASE("topological_ordering") {
+    // TODO(@lockshaw) should probably be replaced with a rapidcheck test that
+    // compares ParallelComputationGraph to DataflowGraph, but since we
+    // currently don't have rapidcheck generation for DataflowGraph this will
+    // have to do for now
+
+    ParallelComputationGraph pcg = empty_parallel_computation_graph();
+
+    ParallelLayerAttrs layer_label = some<ParallelLayerAttrs>();
+    ParallelTensorAttrs tensor_label = some<ParallelTensorAttrs>();
+
+    ParallelLayerAddedResult layer1_added =
+        add_parallel_layer(pcg, layer_label, {}, {tensor_label});
+    parallel_layer_guid_t layer1 = layer1_added.parallel_layer;
+    parallel_tensor_guid_t tensor1 = get_only(layer1_added.outputs);
+
+    ParallelLayerAddedResult layer2_added =
+        add_parallel_layer(pcg, layer_label, {tensor1}, {tensor_label});
+    parallel_layer_guid_t layer2 = layer2_added.parallel_layer;
+    parallel_tensor_guid_t tensor2 = get_only(layer2_added.outputs);
+
+    ParallelLayerAddedResult layer3_added =
+        add_parallel_layer(pcg, layer_label, {tensor2}, {tensor_label});
+    parallel_layer_guid_t layer3 = layer3_added.parallel_layer;
+    parallel_tensor_guid_t tensor3 = get_only(layer3_added.outputs);
+
+    std::vector<parallel_layer_guid_t> result = topological_ordering(pcg);
+    std::vector<parallel_layer_guid_t> correct = {layer1, layer2, layer3};
+    CHECK(result == correct);
+  }
+}
diff --git a/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc b/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc
new file mode 100644
index 0000000000..50ad727c12
--- /dev/null
+++ b/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc
@@ -0,0 +1,610 @@
+#include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h"
+#include "op-attrs/parallel_tensor_shape.h"
+#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
+#include "pcg/parallel_computation_graph/parallel_layer_attrs.h"
+#include "test/utils/doctest.h"
+#include "utils/containers.h"
+#include "utils/containers/without_nullopts.h"
+
+TEST_SUITE(FF_TEST_SUITE) {
+  TEST_CASE("ParallelComputationGraphBuilder::add") {
+    ParallelComputationGraphBuilder b;
+
+    ShardParallelDim d1 = ShardParallelDim{10, 2};
+    ShardParallelDim d2 = ShardParallelDim{15, 3};
+
+    ParallelTensorShape lhs_shape = ParallelTensorShape{
+        ParallelTensorDims{
+            FFOrdered<ShardParallelDim>{
+                ShardParallelDim{10, 2},
+                ShardParallelDim{15, 3},
+            },
+            ReplicaParallelDimSet{
+                SumDegree{2},
+                DiscardCopyDegree{1},
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    ParallelTensorShape rhs_shape = lhs_shape;
+
+    parallel_tensor_guid_t lhs = b.create_input_tensor(lhs_shape);
+    parallel_tensor_guid_t rhs = b.create_input_tensor(rhs_shape);
+
+    parallel_tensor_guid_t out = b.add(lhs, rhs);
+    parallel_layer_guid_t layer = get_source_layer(b.pcg, out);
+
+    SUBCASE("inputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_inputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {lhs, rhs};
+      CHECK(result == correct);
+    }
+
+    SUBCASE("outputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_outputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {out};
+      CHECK(result == correct);
+    }
+
+    SUBCASE("op attrs") {
+      PCGOperatorAttrs result = get_parallel_layer_attrs(b.pcg, layer).op_attrs;
+      PCGOperatorAttrs correct = PCGOperatorAttrs{ElementBinaryAttrs{
+          OperatorType::EW_ADD, DataType::FLOAT, false, false}};
+      CHECK(result == correct);
+    }
+  }
+
+  TEST_CASE("ParallelComputationGraphBuilder::batch_matmul") {
+    ParallelComputationGraphBuilder b;
+
+    ShardParallelDim batch_dim = ShardParallelDim{4, 2};
+
+    ParallelTensorShape a_shape = ParallelTensorShape{
+        ParallelTensorDims{
+            FFOrdered<ShardParallelDim>{
+                batch_dim,
+                ShardParallelDim{10, 1},
+                ShardParallelDim{15, 3},
+            },
+            ReplicaParallelDimSet{
+                SumDegree{1},
+                DiscardCopyDegree{1},
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    ParallelTensorShape b_shape = ParallelTensorShape{
+        ParallelTensorDims{
+            FFOrdered<ShardParallelDim>{
+                batch_dim,
+                ShardParallelDim{15, 3},
+                ShardParallelDim{12, 1},
+            },
+            ReplicaParallelDimSet{
+                SumDegree{1},
+                DiscardCopyDegree{1},
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    parallel_tensor_guid_t a_tensor = b.create_input_tensor(a_shape);
+    parallel_tensor_guid_t b_tensor = b.create_input_tensor(b_shape);
+
+    parallel_tensor_guid_t out = b.batch_matmul(a_tensor, b_tensor);
+    parallel_layer_guid_t layer = get_source_layer(b.pcg, out);
+
+    SUBCASE("inputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_inputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {a_tensor, b_tensor};
+      CHECK(result == correct);
+    }
+
+    SUBCASE("outputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_outputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {out};
+      CHECK(result == correct);
+    }
+
+    SUBCASE("op attrs") {
+      PCGOperatorAttrs result = get_parallel_layer_attrs(b.pcg, layer).op_attrs;
+      PCGOperatorAttrs correct = PCGOperatorAttrs{BatchMatmulAttrs{-1, -1}};
+      CHECK(result == correct);
+    }
+  }
+
+  TEST_CASE("ParallelComputationGraphBuilder::cast") {
+    ParallelComputationGraphBuilder b;
+
+    ParallelTensorShape input_shape = ParallelTensorShape{
+        ParallelTensorDims{
+            FFOrdered<ShardParallelDim>{
+                ShardParallelDim{10, 2},
+                ShardParallelDim{12, 1},
+            },
+            ReplicaParallelDimSet{
+                SumDegree{3},
+                DiscardCopyDegree{1},
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    DataType output_datatype = DataType::DOUBLE;
+    parallel_tensor_guid_t input = b.create_input_tensor(input_shape);
+    parallel_tensor_guid_t output = b.cast(input, output_datatype);
+    parallel_layer_guid_t layer = get_source_layer(b.pcg, output);
+
+    SUBCASE("inputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_inputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {input};
+      CHECK(result == correct);
+    }
+
+    SUBCASE("outputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_outputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {output};
+      CHECK(result == correct);
+
+      ParallelTensorShape output_shape =
+          get_parallel_tensor_attrs(b.pcg, output).shape;
+      CHECK(output_shape.data_type == output_datatype);
+    }
+  }
+
+  TEST_CASE("ParallelComputationGraphBuilder::conv2d") {
+    ParallelComputationGraphBuilder b;
+
+    size_t batch_size = 2;
+
+    TensorShape unpar_input_shape = TensorShape{
+        TensorDims{FFOrdered<size_t>{batch_size, 3, 10, 10}},
+        DataType::FLOAT,
+    };
+
+    ParallelTensorShape input_shape =
+        lift_to_parallel_with_degrees(unpar_input_shape,
+                                      SumDegree{1},
+                                      DiscardCopyDegree{1},
+                                      FFOrdered<int>{2, 1, 1, 1});
+
+    parallel_tensor_guid_t input = b.create_input_tensor(input_shape);
+
+    int outChannels = 6;
+    int kernelH = 5;
+    int kernelW = 4;
+    int strideH = 3;
+    int strideW = 2;
+    int paddingH = 1;
+    int paddingW = 0;
+    parallel_tensor_guid_t output = b.conv2d(input,
+                                             /*outChannels=*/outChannels,
+                                             /*kernelH=*/kernelH,
+                                             /*kernelW=*/kernelW,
+                                             /*strideH=*/strideH,
+                                             /*strideW=*/strideW,
+                                             /*paddingH=*/paddingH,
+                                             /*paddingW=*/paddingW);
+
+    std::unordered_map<parallel_layer_guid_t, ParallelLayerAttrs> layers =
+        generate_map(get_parallel_layers(b.pcg),
+                     [&](parallel_layer_guid_t const &l) {
+                       return get_parallel_layer_attrs(b.pcg, l);
+                     });
+    CHECK_MESSAGE(layers.size() == 4, "Incorrect layers ", layers);
+
+    auto num_attrs_of_type = [&](OperatorType op_type) -> int {
+      return count(values(layers), [&](ParallelLayerAttrs const &l) {
+        return get_op_type(l) == op_type;
+      });
+    };
+
+    int num_weight_attrs = num_attrs_of_type(OperatorType::WEIGHT);
+    CHECK(num_weight_attrs == 2);
+
+    int num_input_attrs = num_attrs_of_type(OperatorType::INPUT);
+    CHECK(num_input_attrs == 1);
+
+    int num_conv_attrs = num_attrs_of_type(OperatorType::CONV2D);
+    CHECK(num_conv_attrs == 1);
+
+    parallel_layer_guid_t conv_guid = get_only(without_nullopts(transform(
+        as_vector(items(layers)),
+        [](std::pair<parallel_layer_guid_t, ParallelLayerAttrs> const &kv)
+            -> std::optional<parallel_layer_guid_t> {
+          if (get_op_type(kv.second) == OperatorType::CONV2D) {
+            return kv.first;
+          } else {
+            return std::nullopt;
+          }
+        })));
+    Conv2DAttrs conv_attrs = layers.at(conv_guid).op_attrs.get<Conv2DAttrs>();
+    Conv2DAttrs correct_attrs = Conv2DAttrs{
+        outChannels,
+        kernelH,
+        kernelW,
+        strideH,
+        strideW,
+        paddingH,
+        paddingW,
+        /*groups=*/1,
+        /*activation=*/std::nullopt,
+        /*use_bias=*/true,
+    };
+    CHECK(conv_attrs == correct_attrs);
+
+    ParallelTensorShape correct_output_shape =
+        get_output_shape(correct_attrs, input_shape);
+    ParallelTensorShape correct_kernel_shape =
+        get_kernel_shape(correct_attrs, input_shape);
+    ParallelTensorShape correct_bias_shape =
+        get_bias_shape(correct_attrs, input_shape);
+
+    std::vector<parallel_tensor_guid_t> conv_inputs =
+        get_layer_inputs(b.pcg, conv_guid);
+
+    parallel_tensor_guid_t conv_input = conv_inputs.at(0);
+    ParallelTensorShape conv_input_shape =
+        get_parallel_tensor_attrs(b.pcg, conv_input).shape;
+    CHECK(conv_input_shape == input_shape);
+
+    parallel_tensor_guid_t conv_kernel = conv_inputs.at(1);
+    ParallelTensorShape conv_kernel_shape =
+        get_parallel_tensor_attrs(b.pcg, conv_kernel).shape;
+    CHECK(conv_kernel_shape == correct_kernel_shape);
+
+    parallel_tensor_guid_t conv_bias = conv_inputs.at(2);
+    ParallelTensorShape conv_bias_shape =
+        get_parallel_tensor_attrs(b.pcg, conv_bias).shape;
+    CHECK(conv_bias_shape == correct_bias_shape);
+
+    std::vector<parallel_tensor_guid_t> conv_outputs =
+        get_layer_outputs(b.pcg, conv_guid);
+    CHECK(conv_outputs.size() == 1);
+
+    parallel_tensor_guid_t conv_output = get_only(conv_outputs);
+    ParallelTensorShape conv_output_shape =
+        get_parallel_tensor_attrs(b.pcg, conv_output).shape;
+    CHECK(conv_output_shape == correct_output_shape);
+  };
+
+  TEST_CASE("ParallelComputationGraphBuilder::dense") {
+    ParallelComputationGraphBuilder b;
+
+    ParallelTensorShape input_shape = ParallelTensorShape{
+        ParallelTensorDims{
+            FFOrdered<ShardParallelDim>{
+                ShardParallelDim{10, 2},
+                ShardParallelDim{16, 1},
+            },
+            ReplicaParallelDimSet{
+                SumDegree{1},
+                DiscardCopyDegree{1},
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    int outDim = 14;
+
+    parallel_tensor_guid_t input = b.create_input_tensor(input_shape);
+    parallel_tensor_guid_t output = b.dense(input,
+                                            outDim,
+                                            Activation::RELU,
+                                            /*use_bias=*/true,
+                                            DataType::FLOAT);
+    parallel_layer_guid_t layer = get_source_layer(b.pcg, output);
+
+    SUBCASE("inputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_inputs(b.pcg, layer);
+      CHECK(result.at(0) == input);
+
+      CHECK(result.size() == 3);
+    }
+
+    SUBCASE("outputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_outputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {output};
+      CHECK(result == correct);
+    }
+  }
+
+  TEST_CASE("ParallelComputationGraphBuilder::embedding") {
+    ParallelComputationGraphBuilder b;
+
+    ShardParallelDim batch_dim = ShardParallelDim{12, 2};
+    ShardParallelDim feature_dim = ShardParallelDim{10, 1};
+    ParallelTensorShape input_shape = ParallelTensorShape{
+        ParallelTensorDims{
+            FFOrdered<ShardParallelDim>{
+                batch_dim,
+                feature_dim,
+            },
+            ReplicaParallelDimSet{
+                SumDegree{1},
+                DiscardCopyDegree{1},
+            },
+        },
+        DataType::INT32,
+    };
+
+    parallel_tensor_guid_t input = b.create_input_tensor(input_shape);
+    parallel_tensor_guid_t output = b.embedding(input,
+                                                /*num_entries=*/32,
+                                                /*outDim=*/8,
+                                                AggregateOp::SUM,
+                                                DataType::FLOAT);
+    parallel_layer_guid_t layer = get_source_layer(b.pcg, output);
+
+    SUBCASE("inputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_inputs(b.pcg, layer);
+      CHECK(result.at(0) == input);
+
+      CHECK(result.size() == 2);
+    }
+
+    SUBCASE("outputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_outputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {output};
+      CHECK(result == correct);
+    }
+  }
+
+  TEST_CASE("ParallelComputationGraphBuilder::multihead_attention") {
+    ParallelComputationGraphBuilder b;
+
+    ShardParallelDim batch_dim = ShardParallelDim{12, 2};
+    ShardParallelDim sequence_dim = ShardParallelDim{16, 1};
+    ShardParallelDim feature_dim = ShardParallelDim{10, 1};
+    ParallelTensorShape query_shape = ParallelTensorShape{
+        ParallelTensorDims{
+            FFOrdered<ShardParallelDim>{
+                batch_dim,
+                sequence_dim,
+                feature_dim,
+            },
+            ReplicaParallelDimSet{
+                SumDegree{1},
+                DiscardCopyDegree{1},
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    ParallelTensorShape key_shape = query_shape;
+    ParallelTensorShape value_shape = query_shape;
+
+    int embed_dim = 8;
+    int num_heads = 6;
+
+    parallel_tensor_guid_t query = b.create_input_tensor(query_shape);
+    parallel_tensor_guid_t key = b.create_input_tensor(key_shape);
+    parallel_tensor_guid_t value = b.create_input_tensor(value_shape);
+    parallel_tensor_guid_t output =
+        b.multihead_attention(query, key, value, embed_dim, num_heads);
+    parallel_layer_guid_t layer = get_source_layer(b.pcg, output);
+
+    SUBCASE("inputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_inputs(b.pcg, layer);
+      CHECK(result.at(0) == query);
+      CHECK(result.at(1) == key);
+      CHECK(result.at(2) == value);
+      CHECK(result.size() == 6);
+    }
+
+    SUBCASE("outputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_outputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {output};
+      CHECK(result == correct);
+    }
+  }
+
+  TEST_CASE("ParallelComputationGraphBuilder::relu") {
+    ParallelComputationGraphBuilder b;
+
+    ShardParallelDim batch_dim = ShardParallelDim{18, 3};
+    ShardParallelDim feature_dim = ShardParallelDim{32, 1};
+
+    ParallelTensorShape input_shape = ParallelTensorShape{
+        ParallelTensorDims{
+            FFOrdered<ShardParallelDim>{
+                batch_dim,
+                feature_dim,
+            },
+            ReplicaParallelDimSet{
+                SumDegree{1},
+                DiscardCopyDegree{1},
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    parallel_tensor_guid_t input = b.create_input_tensor(input_shape);
+    parallel_tensor_guid_t output = b.relu(input);
+    parallel_layer_guid_t layer = get_source_layer(b.pcg, output);
+
+    SUBCASE("inputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_inputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {input};
+      CHECK(result == correct);
+    }
+
+    SUBCASE("outputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_outputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {output};
+      CHECK(result == correct);
+    }
+  }
+
+  TEST_CASE("ParallelComputationGraphBuilder::parallel_partition") {
+    ParallelComputationGraphBuilder b;
+
+    ShardParallelDim batch_dim = ShardParallelDim{18, 2};
+    ShardParallelDim feature_dim = ShardParallelDim{10, 1};
+
+    ParallelTensorShape input_shape = ParallelTensorShape{
+        ParallelTensorDims{
+            FFOrdered<ShardParallelDim>{
+                batch_dim,
+                feature_dim,
+            },
+            ReplicaParallelDimSet{
+                SumDegree{1},
+                DiscardCopyDegree{1},
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    parallel_tensor_guid_t input = b.create_input_tensor(input_shape);
+    parallel_tensor_guid_t output = b.parallel_partition(input, ff_dim_t{0}, 2);
+    parallel_layer_guid_t layer = get_source_layer(b.pcg, output);
+
+    SUBCASE("inputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_inputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {input};
+      CHECK(result == correct);
+    }
+
+    SUBCASE("outputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_outputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {output};
+      CHECK(result == correct);
+    }
+  }
+
+  TEST_CASE("ParallelComputationGraphBuilder::parallel_combine") {
+    ParallelComputationGraphBuilder b;
+
+    ShardParallelDim batch_dim = ShardParallelDim{18, 2};
+    ShardParallelDim feature_dim = ShardParallelDim{10, 1};
+
+    ParallelTensorShape input_shape = ParallelTensorShape{
+        ParallelTensorDims{
+            FFOrdered<ShardParallelDim>{
+                batch_dim,
+                feature_dim,
+            },
+            ReplicaParallelDimSet{
+                SumDegree{1},
+                DiscardCopyDegree{1},
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    parallel_tensor_guid_t input = b.create_input_tensor(input_shape);
+    parallel_tensor_guid_t output = b.parallel_combine(input, ff_dim_t{0}, 2);
+    parallel_layer_guid_t layer = get_source_layer(b.pcg, output);
+
+    SUBCASE("inputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_inputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {input};
+      CHECK(result == correct);
+    }
+
+    SUBCASE("outputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_outputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {output};
+      CHECK(result == correct);
+    }
+  }
+
+  TEST_CASE("ParallelComputationGraphBuilder::parallel_replicate") {
+    ParallelComputationGraphBuilder b;
+
+    ShardParallelDim batch_dim = ShardParallelDim{18, 2};
+    ShardParallelDim feature_dim = ShardParallelDim{10, 1};
+
+    ParallelTensorShape input_shape = ParallelTensorShape{
+        ParallelTensorDims{
+            FFOrdered<ShardParallelDim>{
+                batch_dim,
+                feature_dim,
+            },
+            ReplicaParallelDimSet{
+                SumDegree{1},
+                DiscardCopyDegree{1},
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    parallel_tensor_guid_t input = b.create_input_tensor(input_shape);
+    parallel_tensor_guid_t output = b.parallel_replicate(input, 2);
+    parallel_layer_guid_t layer = get_source_layer(b.pcg, output);
+
+    SUBCASE("inputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_inputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {input};
+      CHECK(result == correct);
+    }
+
+    SUBCASE("outputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_outputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {output};
+      CHECK(result == correct);
+    }
+  }
+
+  TEST_CASE("ParallelComputationGraphBuilder::parallel_reduce") {
+    ParallelComputationGraphBuilder b;
+
+    ShardParallelDim batch_dim = ShardParallelDim{18, 2};
+    ShardParallelDim feature_dim = ShardParallelDim{10, 1};
+
+    ParallelTensorShape input_shape = ParallelTensorShape{
+        ParallelTensorDims{
+            FFOrdered<ShardParallelDim>{
+                batch_dim,
+                feature_dim,
+            },
+            ReplicaParallelDimSet{
+                SumDegree{4},
+                DiscardCopyDegree{1},
+            },
+        },
+        DataType::FLOAT,
+    };
+
+    parallel_tensor_guid_t input = b.create_input_tensor(input_shape);
+    parallel_tensor_guid_t output = b.parallel_reduce(input, 2);
+    parallel_layer_guid_t layer = get_source_layer(b.pcg, output);
+
+    SUBCASE("inputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_inputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {input};
+      CHECK(result == correct);
+    }
+
+    SUBCASE("outputs") {
+      std::vector<parallel_tensor_guid_t> result =
+          get_layer_outputs(b.pcg, layer);
+      std::vector<parallel_tensor_guid_t> correct = {output};
+      CHECK(result == correct);
+    }
+  }
+}
diff --git a/lib/pcg/test/src/test_computation_graph_builder.cc b/lib/pcg/test/src/test_computation_graph_builder.cc
index e88e231bd0..34be83c281 100644
--- a/lib/pcg/test/src/test_computation_graph_builder.cc
+++ b/lib/pcg/test/src/test_computation_graph_builder.cc
@@ -8,7 +8,7 @@ TEST_SUITE(FF_TEST_SUITE) {
 
     size_t batch_size = 2;
 
-    TensorShape input_shape = {
+    TensorShape input_shape = TensorShape{
         TensorDims{FFOrdered<size_t>{batch_size, 3, 10, 10}},
         DataType::FLOAT,
     };
diff --git a/lib/runtime/test/src/test_serialization.cc b/lib/runtime/test/src/test_serialization.cc
index d80808b7fb..e46a481a1a 100644
--- a/lib/runtime/test/src/test_serialization.cc
+++ b/lib/runtime/test/src/test_serialization.cc
@@ -34,12 +34,12 @@ TEST_CASE("Serialization") {
   }
 
   for (CompleteOperatorAttrs const &op : operator_attrs) {
-    CHECK(rc::check("Serialization", [](CompleteOperatorAttrs const &pre_op) {
+    RC_SUBCASE("Serialization", [](CompleteOperatorAttrs const &pre_op) {
       pre_op = *rc::gen::arbitrary<CompleteOperatorAttrs>();
       auto post_op = pre_op;
       ff_task_serialize<CompleteOperatorAttrs>(sez, post_op);
       auto post_op = ff_task_deserialize<CompleteOperatorAttrs>(dez);
       RC_ASSERT(post_op == pre_op);
-    }))
+    });
   }
 }
diff --git a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_constraint.dtg.h b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_constraint.dtg.h
index 35ec9e499f..38e0b66f78 100644
--- a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_constraint.dtg.h
+++ b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_constraint.dtg.h
@@ -22,7 +22,7 @@
 namespace FlexFlow {
 struct OperatorAttributeConstraint {
   OperatorAttributeConstraint() = delete;
-  OperatorAttributeConstraint(
+  explicit OperatorAttributeConstraint(
       ::FlexFlow::ConstraintType const &constraint_type,
       ::FlexFlow::OperatorAttributeExpr const &attribute_expr,
       ::FlexFlow::OperatorAttributeValue const &attribute_value);
@@ -41,16 +41,16 @@ struct OperatorAttributeConstraint {
 
 namespace std {
 template <>
-struct hash<FlexFlow::OperatorAttributeConstraint> {
-  size_t operator()(FlexFlow::OperatorAttributeConstraint const &) const;
+struct hash<::FlexFlow::OperatorAttributeConstraint> {
+  size_t operator()(::FlexFlow::OperatorAttributeConstraint const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::OperatorAttributeConstraint> {
-  static FlexFlow::OperatorAttributeConstraint from_json(json const &);
-  static void to_json(json &, FlexFlow::OperatorAttributeConstraint const &);
+struct adl_serializer<::FlexFlow::OperatorAttributeConstraint> {
+  static ::FlexFlow::OperatorAttributeConstraint from_json(json const &);
+  static void to_json(json &, ::FlexFlow::OperatorAttributeConstraint const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_list_access.dtg.h b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_list_access.dtg.h
index 5a30c40f8d..559352de40 100644
--- a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_list_access.dtg.h
+++ b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_list_access.dtg.h
@@ -21,7 +21,7 @@
 namespace FlexFlow {
 struct OperatorAttributeListIndexAccess {
   OperatorAttributeListIndexAccess() = delete;
-  OperatorAttributeListIndexAccess(
+  explicit OperatorAttributeListIndexAccess(
       ::FlexFlow::OperatorAttributeKey const &attribute_key, int const &index);
 
   bool operator==(OperatorAttributeListIndexAccess const &) const;
@@ -37,24 +37,24 @@ struct OperatorAttributeListIndexAccess {
 
 namespace std {
 template <>
-struct hash<FlexFlow::OperatorAttributeListIndexAccess> {
-  size_t operator()(FlexFlow::OperatorAttributeListIndexAccess const &) const;
+struct hash<::FlexFlow::OperatorAttributeListIndexAccess> {
+  size_t operator()(::FlexFlow::OperatorAttributeListIndexAccess const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::OperatorAttributeListIndexAccess> {
-  static FlexFlow::OperatorAttributeListIndexAccess from_json(json const &);
+struct adl_serializer<::FlexFlow::OperatorAttributeListIndexAccess> {
+  static ::FlexFlow::OperatorAttributeListIndexAccess from_json(json const &);
   static void to_json(json &,
-                      FlexFlow::OperatorAttributeListIndexAccess const &);
+                      ::FlexFlow::OperatorAttributeListIndexAccess const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::OperatorAttributeListIndexAccess> {
-  static Gen<FlexFlow::OperatorAttributeListIndexAccess> arbitrary();
+struct Arbitrary<::FlexFlow::OperatorAttributeListIndexAccess> {
+  static Gen<::FlexFlow::OperatorAttributeListIndexAccess> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_list_size.dtg.h b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_list_size.dtg.h
index 17d76a08f1..23779f9d3e 100644
--- a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_list_size.dtg.h
+++ b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_list_size.dtg.h
@@ -21,7 +21,7 @@
 namespace FlexFlow {
 struct OperatorAttributeListSize {
   OperatorAttributeListSize() = delete;
-  OperatorAttributeListSize(
+  explicit OperatorAttributeListSize(
       ::FlexFlow::OperatorAttributeKey const &attribute_key);
 
   bool operator==(OperatorAttributeListSize const &) const;
@@ -36,23 +36,23 @@ struct OperatorAttributeListSize {
 
 namespace std {
 template <>
-struct hash<FlexFlow::OperatorAttributeListSize> {
-  size_t operator()(FlexFlow::OperatorAttributeListSize const &) const;
+struct hash<::FlexFlow::OperatorAttributeListSize> {
+  size_t operator()(::FlexFlow::OperatorAttributeListSize const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::OperatorAttributeListSize> {
-  static FlexFlow::OperatorAttributeListSize from_json(json const &);
-  static void to_json(json &, FlexFlow::OperatorAttributeListSize const &);
+struct adl_serializer<::FlexFlow::OperatorAttributeListSize> {
+  static ::FlexFlow::OperatorAttributeListSize from_json(json const &);
+  static void to_json(json &, ::FlexFlow::OperatorAttributeListSize const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::OperatorAttributeListSize> {
-  static Gen<FlexFlow::OperatorAttributeListSize> arbitrary();
+struct Arbitrary<::FlexFlow::OperatorAttributeListSize> {
+  static Gen<::FlexFlow::OperatorAttributeListSize> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_pattern.dtg.h b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_pattern.dtg.h
index 7bce198f3d..4a491af2f6 100644
--- a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_pattern.dtg.h
+++ b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_pattern.dtg.h
@@ -22,7 +22,7 @@
 namespace FlexFlow {
 struct OperatorAttributePattern {
   OperatorAttributePattern() = delete;
-  OperatorAttributePattern(
+  explicit OperatorAttributePattern(
       std::unordered_set<::FlexFlow::OperatorAttributeConstraint> const
           &attribute_constraints);
 
@@ -35,16 +35,16 @@ struct OperatorAttributePattern {
 
 namespace std {
 template <>
-struct hash<FlexFlow::OperatorAttributePattern> {
-  size_t operator()(FlexFlow::OperatorAttributePattern const &) const;
+struct hash<::FlexFlow::OperatorAttributePattern> {
+  size_t operator()(::FlexFlow::OperatorAttributePattern const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::OperatorAttributePattern> {
-  static FlexFlow::OperatorAttributePattern from_json(json const &);
-  static void to_json(json &, FlexFlow::OperatorAttributePattern const &);
+struct adl_serializer<::FlexFlow::OperatorAttributePattern> {
+  static ::FlexFlow::OperatorAttributePattern from_json(json const &);
+  static void to_json(json &, ::FlexFlow::OperatorAttributePattern const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/substitutions/include/substitutions/output_graph/attr_constant.dtg.h b/lib/substitutions/include/substitutions/output_graph/attr_constant.dtg.h
index 9dd20bb10e..bc76f68c4d 100644
--- a/lib/substitutions/include/substitutions/output_graph/attr_constant.dtg.h
+++ b/lib/substitutions/include/substitutions/output_graph/attr_constant.dtg.h
@@ -19,7 +19,7 @@
 namespace FlexFlow {
 struct AttrConstant {
   AttrConstant() = delete;
-  AttrConstant(::FlexFlow::OperatorAttributeValue const &value);
+  explicit AttrConstant(::FlexFlow::OperatorAttributeValue const &value);
 
   bool operator==(AttrConstant const &) const;
   bool operator!=(AttrConstant const &) const;
@@ -33,8 +33,8 @@ struct AttrConstant {
 
 namespace std {
 template <>
-struct hash<FlexFlow::AttrConstant> {
-  size_t operator()(FlexFlow::AttrConstant const &) const;
+struct hash<::FlexFlow::AttrConstant> {
+  size_t operator()(::FlexFlow::AttrConstant const &) const;
 };
 } // namespace std
 
diff --git a/lib/substitutions/include/substitutions/output_graph/output_graph_expr.dtg.h b/lib/substitutions/include/substitutions/output_graph/output_graph_expr.dtg.h
index 3d6fb21574..1e78d76777 100644
--- a/lib/substitutions/include/substitutions/output_graph/output_graph_expr.dtg.h
+++ b/lib/substitutions/include/substitutions/output_graph/output_graph_expr.dtg.h
@@ -16,8 +16,9 @@
 namespace FlexFlow {
 struct OutputGraphExpr {
   OutputGraphExpr() = delete;
-  OutputGraphExpr(::FlexFlow::NodeLabelledOpenMultiDiGraph<
-                  ::FlexFlow::OutputOperatorAttrsAssignment> const &raw_graph);
+  explicit OutputGraphExpr(
+      ::FlexFlow::NodeLabelledOpenMultiDiGraph<
+          ::FlexFlow::OutputOperatorAttrsAssignment> const &raw_graph);
 
   ::FlexFlow::NodeLabelledOpenMultiDiGraph<
       ::FlexFlow::OutputOperatorAttrsAssignment>
diff --git a/lib/substitutions/include/substitutions/output_graph/output_operator_attr_access.dtg.h b/lib/substitutions/include/substitutions/output_graph/output_operator_attr_access.dtg.h
index 0d585f0aa0..d7137c90a6 100644
--- a/lib/substitutions/include/substitutions/output_graph/output_operator_attr_access.dtg.h
+++ b/lib/substitutions/include/substitutions/output_graph/output_operator_attr_access.dtg.h
@@ -20,8 +20,9 @@
 namespace FlexFlow {
 struct OutputOperatorAttrAccess {
   OutputOperatorAttrAccess() = delete;
-  OutputOperatorAttrAccess(::FlexFlow::Node const &node,
-                           ::FlexFlow::OperatorAttributeExpr const &attr_expr);
+  explicit OutputOperatorAttrAccess(
+      ::FlexFlow::Node const &node,
+      ::FlexFlow::OperatorAttributeExpr const &attr_expr);
 
   bool operator==(OutputOperatorAttrAccess const &) const;
   bool operator!=(OutputOperatorAttrAccess const &) const;
@@ -36,8 +37,8 @@ struct OutputOperatorAttrAccess {
 
 namespace std {
 template <>
-struct hash<FlexFlow::OutputOperatorAttrAccess> {
-  size_t operator()(FlexFlow::OutputOperatorAttrAccess const &) const;
+struct hash<::FlexFlow::OutputOperatorAttrAccess> {
+  size_t operator()(::FlexFlow::OutputOperatorAttrAccess const &) const;
 };
 } // namespace std
 
diff --git a/lib/substitutions/include/substitutions/output_graph/output_operator_attrs_assignment.dtg.h b/lib/substitutions/include/substitutions/output_graph/output_operator_attrs_assignment.dtg.h
index 5586a90a08..5718965c27 100644
--- a/lib/substitutions/include/substitutions/output_graph/output_operator_attrs_assignment.dtg.h
+++ b/lib/substitutions/include/substitutions/output_graph/output_operator_attrs_assignment.dtg.h
@@ -21,7 +21,7 @@
 namespace FlexFlow {
 struct OutputOperatorAttrsAssignment {
   OutputOperatorAttrsAssignment() = delete;
-  OutputOperatorAttrsAssignment(
+  explicit OutputOperatorAttrsAssignment(
       std::unordered_map<::FlexFlow::OperatorAttributeKey,
                          ::FlexFlow::OutputOperatorAttributeExpr> const
           &assignments);
@@ -36,8 +36,8 @@ struct OutputOperatorAttrsAssignment {
 
 namespace std {
 template <>
-struct hash<FlexFlow::OutputOperatorAttrsAssignment> {
-  size_t operator()(FlexFlow::OutputOperatorAttrsAssignment const &) const;
+struct hash<::FlexFlow::OutputOperatorAttrsAssignment> {
+  size_t operator()(::FlexFlow::OutputOperatorAttrsAssignment const &) const;
 };
 } // namespace std
 
diff --git a/lib/substitutions/include/substitutions/pcg_pattern.dtg.h b/lib/substitutions/include/substitutions/pcg_pattern.dtg.h
index 0c0cc41891..98aec04e61 100644
--- a/lib/substitutions/include/substitutions/pcg_pattern.dtg.h
+++ b/lib/substitutions/include/substitutions/pcg_pattern.dtg.h
@@ -17,9 +17,9 @@
 namespace FlexFlow {
 struct PCGPattern {
   PCGPattern() = delete;
-  PCGPattern(::FlexFlow::OutputLabelledOpenMultiDiGraph<
-             ::FlexFlow::OperatorAttributePattern,
-             ::FlexFlow::TensorAttributePattern> const &raw_graph);
+  explicit PCGPattern(::FlexFlow::OutputLabelledOpenMultiDiGraph<
+                      ::FlexFlow::OperatorAttributePattern,
+                      ::FlexFlow::TensorAttributePattern> const &raw_graph);
 
   ::FlexFlow::OutputLabelledOpenMultiDiGraph<
       ::FlexFlow::OperatorAttributePattern,
diff --git a/lib/substitutions/include/substitutions/sub_parallel_computation_graph.dtg.h b/lib/substitutions/include/substitutions/sub_parallel_computation_graph.dtg.h
index d31d65d83b..f0d6882dc9 100644
--- a/lib/substitutions/include/substitutions/sub_parallel_computation_graph.dtg.h
+++ b/lib/substitutions/include/substitutions/sub_parallel_computation_graph.dtg.h
@@ -17,7 +17,7 @@
 namespace FlexFlow {
 struct SubParallelComputationGraph {
   SubParallelComputationGraph() = delete;
-  SubParallelComputationGraph(
+  explicit SubParallelComputationGraph(
       ::FlexFlow::OutputLabelledOpenMultiDiGraph<
           ::FlexFlow::ParallelLayerAttrs,
           ::FlexFlow::ParallelTensorAttrs> const &raw_graph);
diff --git a/lib/substitutions/include/substitutions/substitution.dtg.h b/lib/substitutions/include/substitutions/substitution.dtg.h
index 5f50d9bafc..3515299acb 100644
--- a/lib/substitutions/include/substitutions/substitution.dtg.h
+++ b/lib/substitutions/include/substitutions/substitution.dtg.h
@@ -16,14 +16,14 @@
 namespace FlexFlow {
 struct Substitution {
   Substitution() = delete;
-  Substitution(::FlexFlow::PCGPattern const &pcg_pattern,
-               ::FlexFlow::OutputGraphExpr const &output_graph_expr,
-               ::FlexFlow::bidict<::FlexFlow::InputMultiDiEdge,
-                                  ::FlexFlow::InputMultiDiEdge> const
-                   &input_edge_match_to_output,
-               ::FlexFlow::bidict<::FlexFlow::OutputMultiDiEdge,
-                                  ::FlexFlow::OutputMultiDiEdge> const
-                   &output_edge_match_to_output);
+  explicit Substitution(::FlexFlow::PCGPattern const &pcg_pattern,
+                        ::FlexFlow::OutputGraphExpr const &output_graph_expr,
+                        ::FlexFlow::bidict<::FlexFlow::InputMultiDiEdge,
+                                           ::FlexFlow::InputMultiDiEdge> const
+                            &input_edge_match_to_output,
+                        ::FlexFlow::bidict<::FlexFlow::OutputMultiDiEdge,
+                                           ::FlexFlow::OutputMultiDiEdge> const
+                            &output_edge_match_to_output);
 
   ::FlexFlow::PCGPattern pcg_pattern;
   ::FlexFlow::OutputGraphExpr output_graph_expr;
diff --git a/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_constraint.dtg.h b/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_constraint.dtg.h
index ba705a5d35..16807ff37c 100644
--- a/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_constraint.dtg.h
+++ b/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_constraint.dtg.h
@@ -22,7 +22,7 @@
 namespace FlexFlow {
 struct TensorAttributeConstraint {
   TensorAttributeConstraint() = delete;
-  TensorAttributeConstraint(
+  explicit TensorAttributeConstraint(
       ::FlexFlow::ConstraintType const &constraint_type,
       ::FlexFlow::TensorAttributeExpr const &attribute_expr,
       ::FlexFlow::TensorAttributeValue const &attribute_value);
@@ -41,16 +41,16 @@ struct TensorAttributeConstraint {
 
 namespace std {
 template <>
-struct hash<FlexFlow::TensorAttributeConstraint> {
-  size_t operator()(FlexFlow::TensorAttributeConstraint const &) const;
+struct hash<::FlexFlow::TensorAttributeConstraint> {
+  size_t operator()(::FlexFlow::TensorAttributeConstraint const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::TensorAttributeConstraint> {
-  static FlexFlow::TensorAttributeConstraint from_json(json const &);
-  static void to_json(json &, FlexFlow::TensorAttributeConstraint const &);
+struct adl_serializer<::FlexFlow::TensorAttributeConstraint> {
+  static ::FlexFlow::TensorAttributeConstraint from_json(json const &);
+  static void to_json(json &, ::FlexFlow::TensorAttributeConstraint const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_list_access.dtg.h b/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_list_access.dtg.h
index 473f4e1698..e81d2fcc04 100644
--- a/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_list_access.dtg.h
+++ b/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_list_access.dtg.h
@@ -21,7 +21,7 @@
 namespace FlexFlow {
 struct TensorAttributeListIndexAccess {
   TensorAttributeListIndexAccess() = delete;
-  TensorAttributeListIndexAccess(
+  explicit TensorAttributeListIndexAccess(
       ::FlexFlow::TensorAttributeKey const &attribute_key, int const &index);
 
   bool operator==(TensorAttributeListIndexAccess const &) const;
@@ -37,23 +37,24 @@ struct TensorAttributeListIndexAccess {
 
 namespace std {
 template <>
-struct hash<FlexFlow::TensorAttributeListIndexAccess> {
-  size_t operator()(FlexFlow::TensorAttributeListIndexAccess const &) const;
+struct hash<::FlexFlow::TensorAttributeListIndexAccess> {
+  size_t operator()(::FlexFlow::TensorAttributeListIndexAccess const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::TensorAttributeListIndexAccess> {
-  static FlexFlow::TensorAttributeListIndexAccess from_json(json const &);
-  static void to_json(json &, FlexFlow::TensorAttributeListIndexAccess const &);
+struct adl_serializer<::FlexFlow::TensorAttributeListIndexAccess> {
+  static ::FlexFlow::TensorAttributeListIndexAccess from_json(json const &);
+  static void to_json(json &,
+                      ::FlexFlow::TensorAttributeListIndexAccess const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::TensorAttributeListIndexAccess> {
-  static Gen<FlexFlow::TensorAttributeListIndexAccess> arbitrary();
+struct Arbitrary<::FlexFlow::TensorAttributeListIndexAccess> {
+  static Gen<::FlexFlow::TensorAttributeListIndexAccess> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_list_size.dtg.h b/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_list_size.dtg.h
index 1630014bdf..5516a4b07b 100644
--- a/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_list_size.dtg.h
+++ b/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_list_size.dtg.h
@@ -21,7 +21,8 @@
 namespace FlexFlow {
 struct TensorAttributeListSize {
   TensorAttributeListSize() = delete;
-  TensorAttributeListSize(::FlexFlow::TensorAttributeKey const &attribute_key);
+  explicit TensorAttributeListSize(
+      ::FlexFlow::TensorAttributeKey const &attribute_key);
 
   bool operator==(TensorAttributeListSize const &) const;
   bool operator!=(TensorAttributeListSize const &) const;
@@ -35,23 +36,23 @@ struct TensorAttributeListSize {
 
 namespace std {
 template <>
-struct hash<FlexFlow::TensorAttributeListSize> {
-  size_t operator()(FlexFlow::TensorAttributeListSize const &) const;
+struct hash<::FlexFlow::TensorAttributeListSize> {
+  size_t operator()(::FlexFlow::TensorAttributeListSize const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::TensorAttributeListSize> {
-  static FlexFlow::TensorAttributeListSize from_json(json const &);
-  static void to_json(json &, FlexFlow::TensorAttributeListSize const &);
+struct adl_serializer<::FlexFlow::TensorAttributeListSize> {
+  static ::FlexFlow::TensorAttributeListSize from_json(json const &);
+  static void to_json(json &, ::FlexFlow::TensorAttributeListSize const &);
 };
 } // namespace nlohmann
 
 namespace rc {
 template <>
-struct Arbitrary<FlexFlow::TensorAttributeListSize> {
-  static Gen<FlexFlow::TensorAttributeListSize> arbitrary();
+struct Arbitrary<::FlexFlow::TensorAttributeListSize> {
+  static Gen<::FlexFlow::TensorAttributeListSize> arbitrary();
 };
 } // namespace rc
 
diff --git a/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_pattern.dtg.h b/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_pattern.dtg.h
index ecc4bc7da0..a106b59073 100644
--- a/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_pattern.dtg.h
+++ b/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_pattern.dtg.h
@@ -22,7 +22,7 @@
 namespace FlexFlow {
 struct TensorAttributePattern {
   TensorAttributePattern() = delete;
-  TensorAttributePattern(
+  explicit TensorAttributePattern(
       std::unordered_set<::FlexFlow::TensorAttributeConstraint> const
           &attribute_constraints);
 
@@ -35,16 +35,16 @@ struct TensorAttributePattern {
 
 namespace std {
 template <>
-struct hash<FlexFlow::TensorAttributePattern> {
-  size_t operator()(FlexFlow::TensorAttributePattern const &) const;
+struct hash<::FlexFlow::TensorAttributePattern> {
+  size_t operator()(::FlexFlow::TensorAttributePattern const &) const;
 };
 } // namespace std
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::TensorAttributePattern> {
-  static FlexFlow::TensorAttributePattern from_json(json const &);
-  static void to_json(json &, FlexFlow::TensorAttributePattern const &);
+struct adl_serializer<::FlexFlow::TensorAttributePattern> {
+  static ::FlexFlow::TensorAttributePattern from_json(json const &);
+  static void to_json(json &, ::FlexFlow::TensorAttributePattern const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/substitutions/include/substitutions/unlabelled/closed_pattern_edge.dtg.h b/lib/substitutions/include/substitutions/unlabelled/closed_pattern_edge.dtg.h
index 6bf815791d..c67b508928 100644
--- a/lib/substitutions/include/substitutions/unlabelled/closed_pattern_edge.dtg.h
+++ b/lib/substitutions/include/substitutions/unlabelled/closed_pattern_edge.dtg.h
@@ -17,7 +17,7 @@
 namespace FlexFlow {
 struct ClosedPatternEdge {
   ClosedPatternEdge() = delete;
-  ClosedPatternEdge(::FlexFlow::MultiDiEdge const &raw_edge);
+  explicit ClosedPatternEdge(::FlexFlow::MultiDiEdge const &raw_edge);
 
   bool operator==(ClosedPatternEdge const &) const;
   bool operator!=(ClosedPatternEdge const &) const;
@@ -31,8 +31,8 @@ struct ClosedPatternEdge {
 
 namespace std {
 template <>
-struct hash<FlexFlow::ClosedPatternEdge> {
-  size_t operator()(FlexFlow::ClosedPatternEdge const &) const;
+struct hash<::FlexFlow::ClosedPatternEdge> {
+  size_t operator()(::FlexFlow::ClosedPatternEdge const &) const;
 };
 } // namespace std
 
diff --git a/lib/substitutions/include/substitutions/unlabelled/downward_open_pattern_edge.dtg.h b/lib/substitutions/include/substitutions/unlabelled/downward_open_pattern_edge.dtg.h
index 5ce0e63073..4eb6cbee7a 100644
--- a/lib/substitutions/include/substitutions/unlabelled/downward_open_pattern_edge.dtg.h
+++ b/lib/substitutions/include/substitutions/unlabelled/downward_open_pattern_edge.dtg.h
@@ -17,7 +17,8 @@
 namespace FlexFlow {
 struct DownwardOpenPatternEdge {
   DownwardOpenPatternEdge() = delete;
-  DownwardOpenPatternEdge(::FlexFlow::DownwardOpenMultiDiEdge const &raw_edge);
+  explicit DownwardOpenPatternEdge(
+      ::FlexFlow::DownwardOpenMultiDiEdge const &raw_edge);
 
   bool operator==(DownwardOpenPatternEdge const &) const;
   bool operator!=(DownwardOpenPatternEdge const &) const;
@@ -31,8 +32,8 @@ struct DownwardOpenPatternEdge {
 
 namespace std {
 template <>
-struct hash<FlexFlow::DownwardOpenPatternEdge> {
-  size_t operator()(FlexFlow::DownwardOpenPatternEdge const &) const;
+struct hash<::FlexFlow::DownwardOpenPatternEdge> {
+  size_t operator()(::FlexFlow::DownwardOpenPatternEdge const &) const;
 };
 } // namespace std
 
diff --git a/lib/substitutions/include/substitutions/unlabelled/edge_splits.dtg.h b/lib/substitutions/include/substitutions/unlabelled/edge_splits.dtg.h
index e92fe547b1..a69a5b5f6b 100644
--- a/lib/substitutions/include/substitutions/unlabelled/edge_splits.dtg.h
+++ b/lib/substitutions/include/substitutions/unlabelled/edge_splits.dtg.h
@@ -18,7 +18,7 @@
 namespace FlexFlow {
 struct UnlabelledPatternEdgeSplits {
   UnlabelledPatternEdgeSplits() = delete;
-  UnlabelledPatternEdgeSplits(
+  explicit UnlabelledPatternEdgeSplits(
       ::FlexFlow::bidict<::FlexFlow::MultiDiEdge,
                          std::pair<::FlexFlow::OutputMultiDiEdge,
                                    ::FlexFlow::InputMultiDiEdge>> const
diff --git a/lib/substitutions/include/substitutions/unlabelled/input_pattern_edge.dtg.h b/lib/substitutions/include/substitutions/unlabelled/input_pattern_edge.dtg.h
index f292acba14..1240244762 100644
--- a/lib/substitutions/include/substitutions/unlabelled/input_pattern_edge.dtg.h
+++ b/lib/substitutions/include/substitutions/unlabelled/input_pattern_edge.dtg.h
@@ -17,7 +17,7 @@
 namespace FlexFlow {
 struct InputPatternEdge {
   InputPatternEdge() = delete;
-  InputPatternEdge(::FlexFlow::InputMultiDiEdge const &raw_edge);
+  explicit InputPatternEdge(::FlexFlow::InputMultiDiEdge const &raw_edge);
 
   bool operator==(InputPatternEdge const &) const;
   bool operator!=(InputPatternEdge const &) const;
@@ -31,8 +31,8 @@ struct InputPatternEdge {
 
 namespace std {
 template <>
-struct hash<FlexFlow::InputPatternEdge> {
-  size_t operator()(FlexFlow::InputPatternEdge const &) const;
+struct hash<::FlexFlow::InputPatternEdge> {
+  size_t operator()(::FlexFlow::InputPatternEdge const &) const;
 };
 } // namespace std
 
diff --git a/lib/substitutions/include/substitutions/unlabelled/match_additional_criterion.dtg.h b/lib/substitutions/include/substitutions/unlabelled/match_additional_criterion.dtg.h
index e910be21ba..f6c1df278a 100644
--- a/lib/substitutions/include/substitutions/unlabelled/match_additional_criterion.dtg.h
+++ b/lib/substitutions/include/substitutions/unlabelled/match_additional_criterion.dtg.h
@@ -18,7 +18,7 @@
 namespace FlexFlow {
 struct MatchAdditionalCriterion {
   MatchAdditionalCriterion() = delete;
-  MatchAdditionalCriterion(
+  explicit MatchAdditionalCriterion(
       std::function<bool(::FlexFlow::PatternNode const &,
                          ::FlexFlow::Node const &)> const &node_criterion,
       std::function<bool(::FlexFlow::PatternEdge const &,
diff --git a/lib/substitutions/include/substitutions/unlabelled/match_split.dtg.h b/lib/substitutions/include/substitutions/unlabelled/match_split.dtg.h
index aa17814c52..e0c8f00969 100644
--- a/lib/substitutions/include/substitutions/unlabelled/match_split.dtg.h
+++ b/lib/substitutions/include/substitutions/unlabelled/match_split.dtg.h
@@ -16,8 +16,8 @@
 namespace FlexFlow {
 struct MatchSplit {
   MatchSplit() = delete;
-  MatchSplit(MultiDiGraphPatternMatch const &prefix_submatch,
-             MultiDiGraphPatternMatch const &postfix_submatch);
+  explicit MatchSplit(MultiDiGraphPatternMatch const &prefix_submatch,
+                      MultiDiGraphPatternMatch const &postfix_submatch);
 
   bool operator==(MatchSplit const &) const;
   bool operator!=(MatchSplit const &) const;
diff --git a/lib/substitutions/include/substitutions/unlabelled/multidigraph_pattern_match.dtg.h b/lib/substitutions/include/substitutions/unlabelled/multidigraph_pattern_match.dtg.h
index 30f81504fe..32a5228a9b 100644
--- a/lib/substitutions/include/substitutions/unlabelled/multidigraph_pattern_match.dtg.h
+++ b/lib/substitutions/include/substitutions/unlabelled/multidigraph_pattern_match.dtg.h
@@ -19,7 +19,7 @@
 namespace FlexFlow {
 struct MultiDiGraphPatternMatch {
   MultiDiGraphPatternMatch() = delete;
-  MultiDiGraphPatternMatch(
+  explicit MultiDiGraphPatternMatch(
       ::FlexFlow::bidict<::FlexFlow::PatternNode, ::FlexFlow::Node> const
           &node_assignment,
       ::FlexFlow::bidict<::FlexFlow::PatternEdge,
diff --git a/lib/substitutions/include/substitutions/unlabelled/output_pattern_edge.dtg.h b/lib/substitutions/include/substitutions/unlabelled/output_pattern_edge.dtg.h
index 04ec8c656d..0b8994fbff 100644
--- a/lib/substitutions/include/substitutions/unlabelled/output_pattern_edge.dtg.h
+++ b/lib/substitutions/include/substitutions/unlabelled/output_pattern_edge.dtg.h
@@ -17,7 +17,7 @@
 namespace FlexFlow {
 struct OutputPatternEdge {
   OutputPatternEdge() = delete;
-  OutputPatternEdge(::FlexFlow::OutputMultiDiEdge const &raw_edge);
+  explicit OutputPatternEdge(::FlexFlow::OutputMultiDiEdge const &raw_edge);
 
   bool operator==(OutputPatternEdge const &) const;
   bool operator!=(OutputPatternEdge const &) const;
@@ -31,8 +31,8 @@ struct OutputPatternEdge {
 
 namespace std {
 template <>
-struct hash<FlexFlow::OutputPatternEdge> {
-  size_t operator()(FlexFlow::OutputPatternEdge const &) const;
+struct hash<::FlexFlow::OutputPatternEdge> {
+  size_t operator()(::FlexFlow::OutputPatternEdge const &) const;
 };
 } // namespace std
 
diff --git a/lib/substitutions/include/substitutions/unlabelled/pattern_edge.dtg.h b/lib/substitutions/include/substitutions/unlabelled/pattern_edge.dtg.h
index 4883590130..8303cd8c9c 100644
--- a/lib/substitutions/include/substitutions/unlabelled/pattern_edge.dtg.h
+++ b/lib/substitutions/include/substitutions/unlabelled/pattern_edge.dtg.h
@@ -17,7 +17,7 @@
 namespace FlexFlow {
 struct PatternEdge {
   PatternEdge() = delete;
-  PatternEdge(::FlexFlow::OpenMultiDiEdge const &raw_edge);
+  explicit PatternEdge(::FlexFlow::OpenMultiDiEdge const &raw_edge);
 
   bool operator==(PatternEdge const &) const;
   bool operator!=(PatternEdge const &) const;
@@ -31,8 +31,8 @@ struct PatternEdge {
 
 namespace std {
 template <>
-struct hash<FlexFlow::PatternEdge> {
-  size_t operator()(FlexFlow::PatternEdge const &) const;
+struct hash<::FlexFlow::PatternEdge> {
+  size_t operator()(::FlexFlow::PatternEdge const &) const;
 };
 } // namespace std
 
diff --git a/lib/substitutions/include/substitutions/unlabelled/pattern_node.dtg.h b/lib/substitutions/include/substitutions/unlabelled/pattern_node.dtg.h
index 56471c2e08..a8e473382c 100644
--- a/lib/substitutions/include/substitutions/unlabelled/pattern_node.dtg.h
+++ b/lib/substitutions/include/substitutions/unlabelled/pattern_node.dtg.h
@@ -17,7 +17,7 @@
 namespace FlexFlow {
 struct PatternNode {
   PatternNode() = delete;
-  PatternNode(::FlexFlow::Node const &raw_node);
+  explicit PatternNode(::FlexFlow::Node const &raw_node);
 
   bool operator==(PatternNode const &) const;
   bool operator!=(PatternNode const &) const;
@@ -31,8 +31,8 @@ struct PatternNode {
 
 namespace std {
 template <>
-struct hash<FlexFlow::PatternNode> {
-  size_t operator()(FlexFlow::PatternNode const &) const;
+struct hash<::FlexFlow::PatternNode> {
+  size_t operator()(::FlexFlow::PatternNode const &) const;
 };
 } // namespace std
 
diff --git a/lib/substitutions/include/substitutions/unlabelled/pattern_split.dtg.h b/lib/substitutions/include/substitutions/unlabelled/pattern_split.dtg.h
index 453c4020a8..fb5c1d9b25 100644
--- a/lib/substitutions/include/substitutions/unlabelled/pattern_split.dtg.h
+++ b/lib/substitutions/include/substitutions/unlabelled/pattern_split.dtg.h
@@ -21,8 +21,9 @@
 namespace FlexFlow {
 struct PatternSplit {
   PatternSplit() = delete;
-  PatternSplit(std::unordered_set<::FlexFlow::PatternNode> const &first,
-               std::unordered_set<::FlexFlow::PatternNode> const &second);
+  explicit PatternSplit(
+      std::unordered_set<::FlexFlow::PatternNode> const &first,
+      std::unordered_set<::FlexFlow::PatternNode> const &second);
 
   bool operator==(PatternSplit const &) const;
   bool operator!=(PatternSplit const &) const;
@@ -33,9 +34,9 @@ struct PatternSplit {
 
 namespace nlohmann {
 template <>
-struct adl_serializer<FlexFlow::PatternSplit> {
-  static FlexFlow::PatternSplit from_json(json const &);
-  static void to_json(json &, FlexFlow::PatternSplit const &);
+struct adl_serializer<::FlexFlow::PatternSplit> {
+  static ::FlexFlow::PatternSplit from_json(json const &);
+  static void to_json(json &, ::FlexFlow::PatternSplit const &);
 };
 } // namespace nlohmann
 
diff --git a/lib/substitutions/include/substitutions/unlabelled/unlabelled_graph_pattern.dtg.h b/lib/substitutions/include/substitutions/unlabelled/unlabelled_graph_pattern.dtg.h
index a2ba6c26d2..972dda4200 100644
--- a/lib/substitutions/include/substitutions/unlabelled/unlabelled_graph_pattern.dtg.h
+++ b/lib/substitutions/include/substitutions/unlabelled/unlabelled_graph_pattern.dtg.h
@@ -15,7 +15,8 @@
 namespace FlexFlow {
 struct UnlabelledGraphPattern {
   UnlabelledGraphPattern() = delete;
-  UnlabelledGraphPattern(::FlexFlow::OpenMultiDiGraphView const &raw_graph);
+  explicit UnlabelledGraphPattern(
+      ::FlexFlow::OpenMultiDiGraphView const &raw_graph);
 
   ::FlexFlow::OpenMultiDiGraphView raw_graph;
 };
diff --git a/lib/substitutions/include/substitutions/unlabelled/upward_open_pattern_edge.dtg.h b/lib/substitutions/include/substitutions/unlabelled/upward_open_pattern_edge.dtg.h
index 82440b5820..e94403feb4 100644
--- a/lib/substitutions/include/substitutions/unlabelled/upward_open_pattern_edge.dtg.h
+++ b/lib/substitutions/include/substitutions/unlabelled/upward_open_pattern_edge.dtg.h
@@ -17,7 +17,8 @@
 namespace FlexFlow {
 struct UpwardOpenPatternEdge {
   UpwardOpenPatternEdge() = delete;
-  UpwardOpenPatternEdge(::FlexFlow::UpwardOpenMultiDiEdge const &raw_edge);
+  explicit UpwardOpenPatternEdge(
+      ::FlexFlow::UpwardOpenMultiDiEdge const &raw_edge);
 
   bool operator==(UpwardOpenPatternEdge const &) const;
   bool operator!=(UpwardOpenPatternEdge const &) const;
@@ -31,8 +32,8 @@ struct UpwardOpenPatternEdge {
 
 namespace std {
 template <>
-struct hash<FlexFlow::UpwardOpenPatternEdge> {
-  size_t operator()(FlexFlow::UpwardOpenPatternEdge const &) const;
+struct hash<::FlexFlow::UpwardOpenPatternEdge> {
+  size_t operator()(::FlexFlow::UpwardOpenPatternEdge const &) const;
 };
 } // namespace std
 
diff --git a/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_constraint.dtg.cc b/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_constraint.dtg.cc
index bc913b7c1a..2956dad2c4 100644
--- a/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_constraint.dtg.cc
+++ b/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_constraint.dtg.cc
@@ -73,7 +73,7 @@ bool OperatorAttributeConstraint::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::OperatorAttributeConstraint>::operator()(
-    FlexFlow::OperatorAttributeConstraint const &x) const {
+    ::FlexFlow::OperatorAttributeConstraint const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::ConstraintType>{}(x.constraint_type) +
             0x9e3779b9 + (result << 6) + (result >> 2);
@@ -86,17 +86,17 @@ size_t hash<FlexFlow::OperatorAttributeConstraint>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::OperatorAttributeConstraint
-    adl_serializer<FlexFlow::OperatorAttributeConstraint>::from_json(
+::FlexFlow::OperatorAttributeConstraint
+    adl_serializer<::FlexFlow::OperatorAttributeConstraint>::from_json(
         json const &j) {
-  return {
+  return ::FlexFlow::OperatorAttributeConstraint{
       j.at("constraint_type").template get<::FlexFlow::ConstraintType>(),
       j.at("attribute_expr").template get<::FlexFlow::OperatorAttributeExpr>(),
       j.at("attribute_value")
           .template get<::FlexFlow::OperatorAttributeValue>()};
 }
-void adl_serializer<FlexFlow::OperatorAttributeConstraint>::to_json(
-    json &j, FlexFlow::OperatorAttributeConstraint const &v) {
+void adl_serializer<::FlexFlow::OperatorAttributeConstraint>::to_json(
+    json &j, ::FlexFlow::OperatorAttributeConstraint const &v) {
   j["__type"] = "OperatorAttributeConstraint";
   j["constraint_type"] = v.constraint_type;
   j["attribute_expr"] = v.attribute_expr;
diff --git a/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_list_access.dtg.cc b/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_list_access.dtg.cc
index 71b71d4a51..67e3761515 100644
--- a/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_list_access.dtg.cc
+++ b/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_list_access.dtg.cc
@@ -50,7 +50,7 @@ bool OperatorAttributeListIndexAccess::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::OperatorAttributeListIndexAccess>::operator()(
-    FlexFlow::OperatorAttributeListIndexAccess const &x) const {
+    ::FlexFlow::OperatorAttributeListIndexAccess const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::OperatorAttributeKey>{}(x.attribute_key) +
             0x9e3779b9 + (result << 6) + (result >> 2);
@@ -61,15 +61,15 @@ size_t hash<FlexFlow::OperatorAttributeListIndexAccess>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::OperatorAttributeListIndexAccess
-    adl_serializer<FlexFlow::OperatorAttributeListIndexAccess>::from_json(
+::FlexFlow::OperatorAttributeListIndexAccess
+    adl_serializer<::FlexFlow::OperatorAttributeListIndexAccess>::from_json(
         json const &j) {
-  return {
+  return ::FlexFlow::OperatorAttributeListIndexAccess{
       j.at("attribute_key").template get<::FlexFlow::OperatorAttributeKey>(),
       j.at("index").template get<int>()};
 }
-void adl_serializer<FlexFlow::OperatorAttributeListIndexAccess>::to_json(
-    json &j, FlexFlow::OperatorAttributeListIndexAccess const &v) {
+void adl_serializer<::FlexFlow::OperatorAttributeListIndexAccess>::to_json(
+    json &j, ::FlexFlow::OperatorAttributeListIndexAccess const &v) {
   j["__type"] = "OperatorAttributeListIndexAccess";
   j["attribute_key"] = v.attribute_key;
   j["index"] = v.index;
@@ -77,9 +77,9 @@ void adl_serializer<FlexFlow::OperatorAttributeListIndexAccess>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::OperatorAttributeListIndexAccess>
-    Arbitrary<FlexFlow::OperatorAttributeListIndexAccess>::arbitrary() {
-  return gen::construct<FlexFlow::OperatorAttributeListIndexAccess>(
+Gen<::FlexFlow::OperatorAttributeListIndexAccess>
+    Arbitrary<::FlexFlow::OperatorAttributeListIndexAccess>::arbitrary() {
+  return gen::construct<::FlexFlow::OperatorAttributeListIndexAccess>(
       gen::arbitrary<::FlexFlow::OperatorAttributeKey>(),
       gen::arbitrary<int>());
 }
diff --git a/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_list_size.dtg.cc b/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_list_size.dtg.cc
index eb7ae28131..2879aca911 100644
--- a/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_list_size.dtg.cc
+++ b/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_list_size.dtg.cc
@@ -44,7 +44,7 @@ bool OperatorAttributeListSize::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::OperatorAttributeListSize>::operator()(
-    FlexFlow::OperatorAttributeListSize const &x) const {
+    ::FlexFlow::OperatorAttributeListSize const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::OperatorAttributeKey>{}(x.attribute_key) +
             0x9e3779b9 + (result << 6) + (result >> 2);
@@ -53,23 +53,23 @@ size_t hash<FlexFlow::OperatorAttributeListSize>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::OperatorAttributeListSize
-    adl_serializer<FlexFlow::OperatorAttributeListSize>::from_json(
+::FlexFlow::OperatorAttributeListSize
+    adl_serializer<::FlexFlow::OperatorAttributeListSize>::from_json(
         json const &j) {
-  return {
+  return ::FlexFlow::OperatorAttributeListSize{
       j.at("attribute_key").template get<::FlexFlow::OperatorAttributeKey>()};
 }
-void adl_serializer<FlexFlow::OperatorAttributeListSize>::to_json(
-    json &j, FlexFlow::OperatorAttributeListSize const &v) {
+void adl_serializer<::FlexFlow::OperatorAttributeListSize>::to_json(
+    json &j, ::FlexFlow::OperatorAttributeListSize const &v) {
   j["__type"] = "OperatorAttributeListSize";
   j["attribute_key"] = v.attribute_key;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::OperatorAttributeListSize>
-    Arbitrary<FlexFlow::OperatorAttributeListSize>::arbitrary() {
-  return gen::construct<FlexFlow::OperatorAttributeListSize>(
+Gen<::FlexFlow::OperatorAttributeListSize>
+    Arbitrary<::FlexFlow::OperatorAttributeListSize>::arbitrary() {
+  return gen::construct<::FlexFlow::OperatorAttributeListSize>(
       gen::arbitrary<::FlexFlow::OperatorAttributeKey>());
 }
 } // namespace rc
diff --git a/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_pattern.dtg.cc b/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_pattern.dtg.cc
index 5eaf54bb5f..7aca1e75fc 100644
--- a/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_pattern.dtg.cc
+++ b/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_pattern.dtg.cc
@@ -33,7 +33,7 @@ bool OperatorAttributePattern::operator!=(
 
 namespace std {
 size_t hash<FlexFlow::OperatorAttributePattern>::operator()(
-    FlexFlow::OperatorAttributePattern const &x) const {
+    ::FlexFlow::OperatorAttributePattern const &x) const {
   size_t result = 0;
   result ^=
       std::hash<std::unordered_set<::FlexFlow::OperatorAttributeConstraint>>{}(
@@ -44,16 +44,16 @@ size_t hash<FlexFlow::OperatorAttributePattern>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::OperatorAttributePattern
-    adl_serializer<FlexFlow::OperatorAttributePattern>::from_json(
+::FlexFlow::OperatorAttributePattern
+    adl_serializer<::FlexFlow::OperatorAttributePattern>::from_json(
         json const &j) {
-  return {
+  return ::FlexFlow::OperatorAttributePattern{
       j.at("attribute_constraints")
           .template get<
               std::unordered_set<::FlexFlow::OperatorAttributeConstraint>>()};
 }
-void adl_serializer<FlexFlow::OperatorAttributePattern>::to_json(
-    json &j, FlexFlow::OperatorAttributePattern const &v) {
+void adl_serializer<::FlexFlow::OperatorAttributePattern>::to_json(
+    json &j, ::FlexFlow::OperatorAttributePattern const &v) {
   j["__type"] = "OperatorAttributePattern";
   j["attribute_constraints"] = v.attribute_constraints;
 }
diff --git a/lib/substitutions/src/substitutions/output_graph/attr_constant.dtg.cc b/lib/substitutions/src/substitutions/output_graph/attr_constant.dtg.cc
index f20afc1164..c0dc667822 100644
--- a/lib/substitutions/src/substitutions/output_graph/attr_constant.dtg.cc
+++ b/lib/substitutions/src/substitutions/output_graph/attr_constant.dtg.cc
@@ -37,7 +37,7 @@ bool AttrConstant::operator>=(AttrConstant const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::AttrConstant>::operator()(
-    FlexFlow::AttrConstant const &x) const {
+    ::FlexFlow::AttrConstant const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::OperatorAttributeValue>{}(x.value) +
             0x9e3779b9 + (result << 6) + (result >> 2);
diff --git a/lib/substitutions/src/substitutions/output_graph/output_operator_attr_access.dtg.cc b/lib/substitutions/src/substitutions/output_graph/output_operator_attr_access.dtg.cc
index 0c6abc925d..2864ccbfac 100644
--- a/lib/substitutions/src/substitutions/output_graph/output_operator_attr_access.dtg.cc
+++ b/lib/substitutions/src/substitutions/output_graph/output_operator_attr_access.dtg.cc
@@ -52,7 +52,7 @@ bool OutputOperatorAttrAccess::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::OutputOperatorAttrAccess>::operator()(
-    FlexFlow::OutputOperatorAttrAccess const &x) const {
+    ::FlexFlow::OutputOperatorAttrAccess const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::Node>{}(x.node) + 0x9e3779b9 + (result << 6) +
             (result >> 2);
diff --git a/lib/substitutions/src/substitutions/output_graph/output_operator_attrs_assignment.dtg.cc b/lib/substitutions/src/substitutions/output_graph/output_operator_attrs_assignment.dtg.cc
index 7a1950482a..98183c9a14 100644
--- a/lib/substitutions/src/substitutions/output_graph/output_operator_attrs_assignment.dtg.cc
+++ b/lib/substitutions/src/substitutions/output_graph/output_operator_attrs_assignment.dtg.cc
@@ -32,7 +32,7 @@ bool OutputOperatorAttrsAssignment::operator!=(
 
 namespace std {
 size_t hash<FlexFlow::OutputOperatorAttrsAssignment>::operator()(
-    FlexFlow::OutputOperatorAttrsAssignment const &x) const {
+    ::FlexFlow::OutputOperatorAttrsAssignment const &x) const {
   size_t result = 0;
   result ^=
       std::hash<std::unordered_map<::FlexFlow::OperatorAttributeKey,
diff --git a/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_constraint.dtg.cc b/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_constraint.dtg.cc
index 6f9df90fb2..17147b3681 100644
--- a/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_constraint.dtg.cc
+++ b/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_constraint.dtg.cc
@@ -73,7 +73,7 @@ bool TensorAttributeConstraint::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::TensorAttributeConstraint>::operator()(
-    FlexFlow::TensorAttributeConstraint const &x) const {
+    ::FlexFlow::TensorAttributeConstraint const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::ConstraintType>{}(x.constraint_type) +
             0x9e3779b9 + (result << 6) + (result >> 2);
@@ -86,16 +86,16 @@ size_t hash<FlexFlow::TensorAttributeConstraint>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::TensorAttributeConstraint
-    adl_serializer<FlexFlow::TensorAttributeConstraint>::from_json(
+::FlexFlow::TensorAttributeConstraint
+    adl_serializer<::FlexFlow::TensorAttributeConstraint>::from_json(
         json const &j) {
-  return {
+  return ::FlexFlow::TensorAttributeConstraint{
       j.at("constraint_type").template get<::FlexFlow::ConstraintType>(),
       j.at("attribute_expr").template get<::FlexFlow::TensorAttributeExpr>(),
       j.at("attribute_value").template get<::FlexFlow::TensorAttributeValue>()};
 }
-void adl_serializer<FlexFlow::TensorAttributeConstraint>::to_json(
-    json &j, FlexFlow::TensorAttributeConstraint const &v) {
+void adl_serializer<::FlexFlow::TensorAttributeConstraint>::to_json(
+    json &j, ::FlexFlow::TensorAttributeConstraint const &v) {
   j["__type"] = "TensorAttributeConstraint";
   j["constraint_type"] = v.constraint_type;
   j["attribute_expr"] = v.attribute_expr;
diff --git a/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_list_access.dtg.cc b/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_list_access.dtg.cc
index 4e28de2c28..c7e81718ed 100644
--- a/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_list_access.dtg.cc
+++ b/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_list_access.dtg.cc
@@ -50,7 +50,7 @@ bool TensorAttributeListIndexAccess::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::TensorAttributeListIndexAccess>::operator()(
-    FlexFlow::TensorAttributeListIndexAccess const &x) const {
+    ::FlexFlow::TensorAttributeListIndexAccess const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::TensorAttributeKey>{}(x.attribute_key) +
             0x9e3779b9 + (result << 6) + (result >> 2);
@@ -61,14 +61,15 @@ size_t hash<FlexFlow::TensorAttributeListIndexAccess>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::TensorAttributeListIndexAccess
-    adl_serializer<FlexFlow::TensorAttributeListIndexAccess>::from_json(
+::FlexFlow::TensorAttributeListIndexAccess
+    adl_serializer<::FlexFlow::TensorAttributeListIndexAccess>::from_json(
         json const &j) {
-  return {j.at("attribute_key").template get<::FlexFlow::TensorAttributeKey>(),
-          j.at("index").template get<int>()};
+  return ::FlexFlow::TensorAttributeListIndexAccess{
+      j.at("attribute_key").template get<::FlexFlow::TensorAttributeKey>(),
+      j.at("index").template get<int>()};
 }
-void adl_serializer<FlexFlow::TensorAttributeListIndexAccess>::to_json(
-    json &j, FlexFlow::TensorAttributeListIndexAccess const &v) {
+void adl_serializer<::FlexFlow::TensorAttributeListIndexAccess>::to_json(
+    json &j, ::FlexFlow::TensorAttributeListIndexAccess const &v) {
   j["__type"] = "TensorAttributeListIndexAccess";
   j["attribute_key"] = v.attribute_key;
   j["index"] = v.index;
@@ -76,9 +77,9 @@ void adl_serializer<FlexFlow::TensorAttributeListIndexAccess>::to_json(
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::TensorAttributeListIndexAccess>
-    Arbitrary<FlexFlow::TensorAttributeListIndexAccess>::arbitrary() {
-  return gen::construct<FlexFlow::TensorAttributeListIndexAccess>(
+Gen<::FlexFlow::TensorAttributeListIndexAccess>
+    Arbitrary<::FlexFlow::TensorAttributeListIndexAccess>::arbitrary() {
+  return gen::construct<::FlexFlow::TensorAttributeListIndexAccess>(
       gen::arbitrary<::FlexFlow::TensorAttributeKey>(), gen::arbitrary<int>());
 }
 } // namespace rc
diff --git a/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_list_size.dtg.cc b/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_list_size.dtg.cc
index 24d8b6c025..52a61a8a87 100644
--- a/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_list_size.dtg.cc
+++ b/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_list_size.dtg.cc
@@ -44,7 +44,7 @@ bool TensorAttributeListSize::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::TensorAttributeListSize>::operator()(
-    FlexFlow::TensorAttributeListSize const &x) const {
+    ::FlexFlow::TensorAttributeListSize const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::TensorAttributeKey>{}(x.attribute_key) +
             0x9e3779b9 + (result << 6) + (result >> 2);
@@ -53,22 +53,23 @@ size_t hash<FlexFlow::TensorAttributeListSize>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::TensorAttributeListSize
-    adl_serializer<FlexFlow::TensorAttributeListSize>::from_json(
+::FlexFlow::TensorAttributeListSize
+    adl_serializer<::FlexFlow::TensorAttributeListSize>::from_json(
         json const &j) {
-  return {j.at("attribute_key").template get<::FlexFlow::TensorAttributeKey>()};
+  return ::FlexFlow::TensorAttributeListSize{
+      j.at("attribute_key").template get<::FlexFlow::TensorAttributeKey>()};
 }
-void adl_serializer<FlexFlow::TensorAttributeListSize>::to_json(
-    json &j, FlexFlow::TensorAttributeListSize const &v) {
+void adl_serializer<::FlexFlow::TensorAttributeListSize>::to_json(
+    json &j, ::FlexFlow::TensorAttributeListSize const &v) {
   j["__type"] = "TensorAttributeListSize";
   j["attribute_key"] = v.attribute_key;
 }
 } // namespace nlohmann
 
 namespace rc {
-Gen<FlexFlow::TensorAttributeListSize>
-    Arbitrary<FlexFlow::TensorAttributeListSize>::arbitrary() {
-  return gen::construct<FlexFlow::TensorAttributeListSize>(
+Gen<::FlexFlow::TensorAttributeListSize>
+    Arbitrary<::FlexFlow::TensorAttributeListSize>::arbitrary() {
+  return gen::construct<::FlexFlow::TensorAttributeListSize>(
       gen::arbitrary<::FlexFlow::TensorAttributeKey>());
 }
 } // namespace rc
diff --git a/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_pattern.dtg.cc b/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_pattern.dtg.cc
index 121549d4dc..8f96fd49b8 100644
--- a/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_pattern.dtg.cc
+++ b/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_pattern.dtg.cc
@@ -33,7 +33,7 @@ bool TensorAttributePattern::operator!=(
 
 namespace std {
 size_t hash<FlexFlow::TensorAttributePattern>::operator()(
-    FlexFlow::TensorAttributePattern const &x) const {
+    ::FlexFlow::TensorAttributePattern const &x) const {
   size_t result = 0;
   result ^=
       std::hash<std::unordered_set<::FlexFlow::TensorAttributeConstraint>>{}(
@@ -44,14 +44,16 @@ size_t hash<FlexFlow::TensorAttributePattern>::operator()(
 } // namespace std
 
 namespace nlohmann {
-FlexFlow::TensorAttributePattern
-    adl_serializer<FlexFlow::TensorAttributePattern>::from_json(json const &j) {
-  return {j.at("attribute_constraints")
-              .template get<
-                  std::unordered_set<::FlexFlow::TensorAttributeConstraint>>()};
+::FlexFlow::TensorAttributePattern
+    adl_serializer<::FlexFlow::TensorAttributePattern>::from_json(
+        json const &j) {
+  return ::FlexFlow::TensorAttributePattern{
+      j.at("attribute_constraints")
+          .template get<
+              std::unordered_set<::FlexFlow::TensorAttributeConstraint>>()};
 }
-void adl_serializer<FlexFlow::TensorAttributePattern>::to_json(
-    json &j, FlexFlow::TensorAttributePattern const &v) {
+void adl_serializer<::FlexFlow::TensorAttributePattern>::to_json(
+    json &j, ::FlexFlow::TensorAttributePattern const &v) {
   j["__type"] = "TensorAttributePattern";
   j["attribute_constraints"] = v.attribute_constraints;
 }
diff --git a/lib/substitutions/src/substitutions/unlabelled/closed_pattern_edge.dtg.cc b/lib/substitutions/src/substitutions/unlabelled/closed_pattern_edge.dtg.cc
index fbefc6f01a..401c738d88 100644
--- a/lib/substitutions/src/substitutions/unlabelled/closed_pattern_edge.dtg.cc
+++ b/lib/substitutions/src/substitutions/unlabelled/closed_pattern_edge.dtg.cc
@@ -36,7 +36,7 @@ bool ClosedPatternEdge::operator>=(ClosedPatternEdge const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::ClosedPatternEdge>::operator()(
-    FlexFlow::ClosedPatternEdge const &x) const {
+    ::FlexFlow::ClosedPatternEdge const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::MultiDiEdge>{}(x.raw_edge) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
diff --git a/lib/substitutions/src/substitutions/unlabelled/downward_open_pattern_edge.dtg.cc b/lib/substitutions/src/substitutions/unlabelled/downward_open_pattern_edge.dtg.cc
index 30c52fbbb2..65c87db0e4 100644
--- a/lib/substitutions/src/substitutions/unlabelled/downward_open_pattern_edge.dtg.cc
+++ b/lib/substitutions/src/substitutions/unlabelled/downward_open_pattern_edge.dtg.cc
@@ -43,7 +43,7 @@ bool DownwardOpenPatternEdge::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::DownwardOpenPatternEdge>::operator()(
-    FlexFlow::DownwardOpenPatternEdge const &x) const {
+    ::FlexFlow::DownwardOpenPatternEdge const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::DownwardOpenMultiDiEdge>{}(x.raw_edge) +
             0x9e3779b9 + (result << 6) + (result >> 2);
diff --git a/lib/substitutions/src/substitutions/unlabelled/input_pattern_edge.dtg.cc b/lib/substitutions/src/substitutions/unlabelled/input_pattern_edge.dtg.cc
index f3f5a8ce45..e46becf4be 100644
--- a/lib/substitutions/src/substitutions/unlabelled/input_pattern_edge.dtg.cc
+++ b/lib/substitutions/src/substitutions/unlabelled/input_pattern_edge.dtg.cc
@@ -36,7 +36,7 @@ bool InputPatternEdge::operator>=(InputPatternEdge const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::InputPatternEdge>::operator()(
-    FlexFlow::InputPatternEdge const &x) const {
+    ::FlexFlow::InputPatternEdge const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::InputMultiDiEdge>{}(x.raw_edge) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
diff --git a/lib/substitutions/src/substitutions/unlabelled/output_pattern_edge.dtg.cc b/lib/substitutions/src/substitutions/unlabelled/output_pattern_edge.dtg.cc
index fb9de06135..152115d52a 100644
--- a/lib/substitutions/src/substitutions/unlabelled/output_pattern_edge.dtg.cc
+++ b/lib/substitutions/src/substitutions/unlabelled/output_pattern_edge.dtg.cc
@@ -37,7 +37,7 @@ bool OutputPatternEdge::operator>=(OutputPatternEdge const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::OutputPatternEdge>::operator()(
-    FlexFlow::OutputPatternEdge const &x) const {
+    ::FlexFlow::OutputPatternEdge const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::OutputMultiDiEdge>{}(x.raw_edge) +
             0x9e3779b9 + (result << 6) + (result >> 2);
diff --git a/lib/substitutions/src/substitutions/unlabelled/pattern_edge.dtg.cc b/lib/substitutions/src/substitutions/unlabelled/pattern_edge.dtg.cc
index e4d11d0d7e..a19e5bb6d1 100644
--- a/lib/substitutions/src/substitutions/unlabelled/pattern_edge.dtg.cc
+++ b/lib/substitutions/src/substitutions/unlabelled/pattern_edge.dtg.cc
@@ -36,7 +36,7 @@ bool PatternEdge::operator>=(PatternEdge const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::PatternEdge>::operator()(
-    FlexFlow::PatternEdge const &x) const {
+    ::FlexFlow::PatternEdge const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::OpenMultiDiEdge>{}(x.raw_edge) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
diff --git a/lib/substitutions/src/substitutions/unlabelled/pattern_node.dtg.cc b/lib/substitutions/src/substitutions/unlabelled/pattern_node.dtg.cc
index 6ea64de69e..b2cd557c06 100644
--- a/lib/substitutions/src/substitutions/unlabelled/pattern_node.dtg.cc
+++ b/lib/substitutions/src/substitutions/unlabelled/pattern_node.dtg.cc
@@ -36,7 +36,7 @@ bool PatternNode::operator>=(PatternNode const &other) const {
 
 namespace std {
 size_t hash<FlexFlow::PatternNode>::operator()(
-    FlexFlow::PatternNode const &x) const {
+    ::FlexFlow::PatternNode const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::Node>{}(x.raw_node) + 0x9e3779b9 +
             (result << 6) + (result >> 2);
diff --git a/lib/substitutions/src/substitutions/unlabelled/pattern_split.dtg.cc b/lib/substitutions/src/substitutions/unlabelled/pattern_split.dtg.cc
index bbcd4c3902..d678a1edfe 100644
--- a/lib/substitutions/src/substitutions/unlabelled/pattern_split.dtg.cc
+++ b/lib/substitutions/src/substitutions/unlabelled/pattern_split.dtg.cc
@@ -30,15 +30,15 @@ bool PatternSplit::operator!=(PatternSplit const &other) const {
 } // namespace FlexFlow
 
 namespace nlohmann {
-FlexFlow::PatternSplit
-    adl_serializer<FlexFlow::PatternSplit>::from_json(json const &j) {
-  return {
+::FlexFlow::PatternSplit
+    adl_serializer<::FlexFlow::PatternSplit>::from_json(json const &j) {
+  return ::FlexFlow::PatternSplit{
       j.at("first").template get<std::unordered_set<::FlexFlow::PatternNode>>(),
       j.at("second")
           .template get<std::unordered_set<::FlexFlow::PatternNode>>()};
 }
-void adl_serializer<FlexFlow::PatternSplit>::to_json(
-    json &j, FlexFlow::PatternSplit const &v) {
+void adl_serializer<::FlexFlow::PatternSplit>::to_json(
+    json &j, ::FlexFlow::PatternSplit const &v) {
   j["__type"] = "PatternSplit";
   j["first"] = v.first;
   j["second"] = v.second;
diff --git a/lib/substitutions/src/substitutions/unlabelled/upward_open_pattern_edge.dtg.cc b/lib/substitutions/src/substitutions/unlabelled/upward_open_pattern_edge.dtg.cc
index ca8dd6c020..1fe34ed778 100644
--- a/lib/substitutions/src/substitutions/unlabelled/upward_open_pattern_edge.dtg.cc
+++ b/lib/substitutions/src/substitutions/unlabelled/upward_open_pattern_edge.dtg.cc
@@ -43,7 +43,7 @@ bool UpwardOpenPatternEdge::operator>=(
 
 namespace std {
 size_t hash<FlexFlow::UpwardOpenPatternEdge>::operator()(
-    FlexFlow::UpwardOpenPatternEdge const &x) const {
+    ::FlexFlow::UpwardOpenPatternEdge const &x) const {
   size_t result = 0;
   result ^= std::hash<::FlexFlow::UpwardOpenMultiDiEdge>{}(x.raw_edge) +
             0x9e3779b9 + (result << 6) + (result >> 2);
diff --git a/lib/substitutions/test/src/test_pattern_matches.cc b/lib/substitutions/test/src/test_pattern_matches.cc
index 5d72bbff7e..e130d0f5d6 100644
--- a/lib/substitutions/test/src/test_pattern_matches.cc
+++ b/lib/substitutions/test/src/test_pattern_matches.cc
@@ -44,7 +44,7 @@ struct Arbitrary<MultiDiGraph> {
 } // namespace rc
 
 // TEST_CASE("find_pattern_matches") {
-//   rc::check([](MultiDiGraph const &g) {
+//   RC_SUBCASE([](MultiDiGraph const &g) {
 //     std::unordered_set<Node> subgraph_nodes = *rc::subset_of(get_nodes(g));
 //     OpenMultiDiGraphView subgraph =
 //         get_subgraph<OpenMultiDiSubgraphView>(as_openmultidigraph(g),
diff --git a/lib/utils/include/utils/containers/without_nullopts.h b/lib/utils/include/utils/containers/without_nullopts.h
new file mode 100644
index 0000000000..f888654b60
--- /dev/null
+++ b/lib/utils/include/utils/containers/without_nullopts.h
@@ -0,0 +1,22 @@
+#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_WITHOUT_NULLOPTS_H
+#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_WITHOUT_NULLOPTS_H
+
+#include <optional>
+#include <vector>
+
+namespace FlexFlow {
+
+template <typename T>
+std::vector<T> without_nullopts(std::vector<std::optional<T>> const &v) {
+  std::vector<T> result;
+  for (std::optional<T> const &t : v) {
+    if (t.has_value()) {
+      result.push_back(t.value());
+    }
+  }
+  return result;
+}
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/utils/include/utils/fmt.decl.h b/lib/utils/include/utils/fmt.decl.h
index 04902c8240..5b8d474025 100644
--- a/lib/utils/include/utils/fmt.decl.h
+++ b/lib/utils/include/utils/fmt.decl.h
@@ -26,31 +26,6 @@ typename std::enable_if<delegate_ostream_operator<std::decay_t<T>>::value,
 
 namespace fmt {
 
-template <typename T, typename Char>
-struct formatter<
-    ::std::unordered_set<T>,
-    Char,
-    std::enable_if_t<!detail::has_format_as<std::unordered_set<T>>::value>>
-    : formatter<::std::string, Char> {
-  template <typename FormatContext>
-  auto format(::std::unordered_set<T> const &m, FormatContext &ctx)
-      -> decltype(ctx.out());
-};
-
-/* template <typename T> */
-/* std::string format_as(::std::unordered_set<T> const &); */
-
-template <typename T, typename Char>
-struct formatter<
-    ::std::vector<T>,
-    Char,
-    std::enable_if_t<!detail::has_format_as<std::vector<T>>::value>>
-    : formatter<::std::string> {
-  template <typename FormatContext>
-  auto format(::std::vector<T> const &m, FormatContext &ctx)
-      -> decltype(ctx.out());
-};
-
 template <typename... Ts>
 struct formatter<::std::variant<Ts...>> : formatter<::std::string> {
   template <typename FormatContext>
diff --git a/lib/utils/include/utils/fmt.h b/lib/utils/include/utils/fmt.h
index 967a41f22b..72fca552d8 100644
--- a/lib/utils/include/utils/fmt.h
+++ b/lib/utils/include/utils/fmt.h
@@ -12,38 +12,6 @@
 
 namespace fmt {
 
-template <typename T, typename Char>
-template <typename FormatContext>
-auto formatter<
-    ::std::unordered_set<T>,
-    Char,
-    std::enable_if_t<!detail::has_format_as<std::unordered_set<T>>::value>>::
-    format(::std::unordered_set<T> const &m, FormatContext &ctx)
-        -> decltype(ctx.out()) {
-  /* CHECK_FMTABLE(T); */
-
-  /* std::string result = ::FlexFlow::join_strings( */
-  /*     m.cbegin(), m.cend(), ", ", [](T const &t) { return fmt::to_string(t);
-   * }); */
-  std::string result = "";
-  return formatter<std::string>::format(result, ctx);
-}
-
-template <typename T, typename Char>
-template <typename FormatContext>
-auto formatter<
-    ::std::vector<T>,
-    Char,
-    std::enable_if_t<!detail::has_format_as<std::vector<T>>::value>>::
-    format(::std::vector<T> const &m, FormatContext &ctx)
-        -> decltype(ctx.out()) {
-  CHECK_FMTABLE(T);
-
-  std::string result = ::FlexFlow::join_strings(
-      m.cbegin(), m.cend(), ", ", [](T const &t) { return fmt::to_string(t); });
-  return formatter<std::string>::format("[" + result + "]", ctx);
-}
-
 template <typename... Ts>
 template <typename FormatContext>
 auto formatter<::std::variant<Ts...>>::format(::std::variant<Ts...> const &m,
@@ -58,15 +26,6 @@ auto formatter<::std::variant<Ts...>>::format(::std::variant<Ts...> const &m,
 
 namespace FlexFlow {
 
-template <typename T>
-struct delegate_ostream_operator<std::vector<T>> : std::true_type {};
-
-template <typename T>
-struct delegate_ostream_operator<std::unordered_set<T>> : std::true_type {};
-
-template <typename T1, typename T2>
-struct delegate_ostream_operator<std::pair<T1, T2>> : std::true_type {};
-
 template <typename T>
 struct delegate_ostream_operator<std::optional<T>> : std::true_type {};
 
diff --git a/lib/utils/include/utils/fmt/unordered_set.h b/lib/utils/include/utils/fmt/unordered_set.h
new file mode 100644
index 0000000000..8954faf7c5
--- /dev/null
+++ b/lib/utils/include/utils/fmt/unordered_set.h
@@ -0,0 +1,43 @@
+#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_FMT_UNORDERED_SET_H
+#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_FMT_UNORDERED_SET_H
+
+#include "utils/check_fmtable.h"
+#include "utils/join_strings.h"
+#include <fmt/format.h>
+#include <unordered_set>
+
+namespace fmt {
+
+template <typename T, typename Char>
+struct formatter<
+    ::std::unordered_set<T>,
+    Char,
+    std::enable_if_t<!detail::has_format_as<std::unordered_set<T>>::value>>
+    : formatter<::std::string> {
+  template <typename FormatContext>
+  auto format(::std::unordered_set<T> const &m, FormatContext &ctx)
+      -> decltype(ctx.out()) {
+    CHECK_FMTABLE(T);
+
+    std::string result =
+        ::FlexFlow::join_strings(m.cbegin(), m.cend(), ", ", [](T const &t) {
+          return fmt::to_string(t);
+        });
+    return formatter<std::string>::format("{" + result + "}", ctx);
+  }
+};
+
+} // namespace fmt
+
+namespace FlexFlow {
+
+template <typename T>
+std::ostream &operator<<(std::ostream &s, std::unordered_set<T> const &x) {
+  CHECK_FMTABLE(T);
+
+  return s << fmt::to_string(x);
+}
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/utils/include/utils/fmt/vector.h b/lib/utils/include/utils/fmt/vector.h
new file mode 100644
index 0000000000..5d9ca0aeae
--- /dev/null
+++ b/lib/utils/include/utils/fmt/vector.h
@@ -0,0 +1,43 @@
+#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_FMT_VECTOR_H
+#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_FMT_VECTOR_H
+
+#include "utils/check_fmtable.h"
+#include "utils/join_strings.h"
+#include <fmt/format.h>
+#include <vector>
+
+namespace fmt {
+
+template <typename T, typename Char>
+struct formatter<
+    ::std::vector<T>,
+    Char,
+    std::enable_if_t<!detail::has_format_as<std::vector<T>>::value>>
+    : formatter<::std::string> {
+  template <typename FormatContext>
+  auto format(::std::vector<T> const &m, FormatContext &ctx)
+      -> decltype(ctx.out()) {
+    CHECK_FMTABLE(T);
+
+    std::string result =
+        ::FlexFlow::join_strings(m.cbegin(), m.cend(), ", ", [](T const &t) {
+          return fmt::to_string(t);
+        });
+    return formatter<std::string>::format("[" + result + "]", ctx);
+  }
+};
+
+} // namespace fmt
+
+namespace FlexFlow {
+
+template <typename T>
+std::ostream &operator<<(std::ostream &s, std::vector<T> const &v) {
+  CHECK_FMTABLE(T);
+
+  return s << fmt::to_string(v);
+}
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/utils/include/utils/graph/algorithms.h b/lib/utils/include/utils/graph/algorithms.h
index 87b42a90d2..4114b7a936 100644
--- a/lib/utils/include/utils/graph/algorithms.h
+++ b/lib/utils/include/utils/graph/algorithms.h
@@ -266,8 +266,6 @@ std::vector<Node>
     get_bfs_ordering(DiGraphView const &,
                      std::unordered_set<Node> const &starting_points);
 std::vector<Node> get_topological_ordering(DiGraphView const &);
-// std::vector<Node> get_topological_ordering(MultiDiGraphView const &);
-// std::vector<Node> get_topological_ordering(OpenMultiDiGraphView const &);
 std::vector<Node> get_unchecked_topological_ordering(DiGraphView const &);
 
 std::vector<DirectedEdge> get_edge_topological_ordering(DiGraphView const &);
diff --git a/lib/utils/include/utils/graph/multidiedge.h b/lib/utils/include/utils/graph/multidiedge.h
index d7c2c1590b..de4ab4fd82 100644
--- a/lib/utils/include/utils/graph/multidiedge.h
+++ b/lib/utils/include/utils/graph/multidiedge.h
@@ -4,6 +4,7 @@
 #include "diedge.h"
 #include "node.h"
 #include "node_port.h"
+#include "utils/fmt/pair.h"
 #include "utils/strong_typedef.h"
 #include "utils/visitable.h"
 
diff --git a/lib/utils/include/utils/stack_string.h b/lib/utils/include/utils/stack_string.h
index 0074877768..19743b8301 100644
--- a/lib/utils/include/utils/stack_string.h
+++ b/lib/utils/include/utils/stack_string.h
@@ -7,6 +7,7 @@
 #include "utils/json.h"
 #include "utils/type_traits.h"
 #include <cstring>
+#include <rapidcheck.h>
 #include <string>
 
 namespace FlexFlow {
@@ -17,6 +18,9 @@ struct stack_basic_string {
 
   stack_basic_string(Char const *c) : contents(c, c + std::strlen(c)) {}
 
+  template <typename Iterator>
+  stack_basic_string(Iterator start, Iterator end) : contents(start, end) {}
+
   stack_basic_string(std::basic_string<Char> const &s)
       : stack_basic_string(s.c_str()) {}
 
@@ -92,6 +96,20 @@ struct hash<::FlexFlow::stack_basic_string<Char, MAXSIZE>> {
 
 } // namespace std
 
+namespace rc {
+
+template <typename Char, size_t MAXSIZE>
+struct Arbitrary<::FlexFlow::stack_basic_string<Char, MAXSIZE>> {
+  static Gen<::FlexFlow::stack_basic_string<Char, MAXSIZE>> arbitrary() {
+    return gen::mapcat(gen::inRange<size_t>(0, MAXSIZE), [](size_t size) {
+      return gen::container<::FlexFlow::stack_basic_string<Char, MAXSIZE>>(
+          size, gen::arbitrary<Char>());
+    });
+  }
+};
+
+} // namespace rc
+
 namespace FlexFlow {
 
 static_assert(is_default_constructible<stack_string<1>>::value,
diff --git a/lib/utils/include/utils/stack_vector.h b/lib/utils/include/utils/stack_vector.h
index d47886b055..c2fdbe0afe 100644
--- a/lib/utils/include/utils/stack_vector.h
+++ b/lib/utils/include/utils/stack_vector.h
@@ -5,6 +5,7 @@
 #include "hash-utils.h"
 #include "rapidcheck.h"
 #include "utils/fmt.h"
+#include "utils/fmt/vector.h"
 #include "utils/json.h"
 #include "utils/test_types.h"
 #include "utils/type_traits.h"
@@ -294,7 +295,7 @@ struct stack_vector {
   }
 
   friend std::vector<T> format_as(stack_vector<T, MAXSIZE> const &v) {
-    // CHECK_FMTABLE(std::vector<T>);
+    CHECK_FMTABLE(std::vector<T>);
 
     return static_cast<std::vector<T>>(v);
   }
@@ -314,9 +315,9 @@ struct stack_vector {
 };
 
 template <typename T, std::size_t MAXSIZE>
-struct delegate_ostream_operator<stack_vector<T, MAXSIZE>> : std::true_type {};
-
-// CHECK_FMTABLE(stack_vector<test_types::fmtable, 5>);
+std::ostream &operator<<(std::ostream &s, stack_vector<T, MAXSIZE> const &v) {
+  return s << fmt::to_string(v);
+}
 
 template <typename T, std::size_t MAXSIZE>
 void to_json(json &j, stack_vector<T, MAXSIZE> const &v) {
diff --git a/lib/utils/test/CMakeLists.txt b/lib/utils/test/CMakeLists.txt
index 40ff07285e..3b0c6ce432 100644
--- a/lib/utils/test/CMakeLists.txt
+++ b/lib/utils/test/CMakeLists.txt
@@ -3,6 +3,7 @@ ff_add_test_executable(
     utils-tests
   SRC_PATTERNS
     src/test_cow_ptr.cc
+    src/test_optional.cc
   PRIVATE_INCLUDE
     src/
   DEPS
diff --git a/lib/utils/test/common/include/test/utils/all.h b/lib/utils/test/common/include/test/utils/all.h
index 308b58e630..ced1c9ce38 100644
--- a/lib/utils/test/common/include/test/utils/all.h
+++ b/lib/utils/test/common/include/test/utils/all.h
@@ -1,5 +1,2 @@
-#include "doctest.h"
-#include "doctest/doctest.h"
-#include "rapidcheck/doctest.h"
-#include "rapidcheck/gen.h"
-#include "rapidcheck/visitable.h"
+#include "test/utils/doctest.h"
+#include "test/utils/rapidcheck.h"
diff --git a/lib/utils/test/common/include/test/utils/rapidcheck.h b/lib/utils/test/common/include/test/utils/rapidcheck.h
new file mode 100644
index 0000000000..473f3f019a
--- /dev/null
+++ b/lib/utils/test/common/include/test/utils/rapidcheck.h
@@ -0,0 +1,4 @@
+#include "rapidcheck/doctest.h"
+#include "rapidcheck/gen.h"
+#include "rapidcheck/some.h"
+#include "rapidcheck/visitable.h"
diff --git a/lib/utils/test/common/include/test/utils/rapidcheck/doctest.h b/lib/utils/test/common/include/test/utils/rapidcheck/doctest.h
index c0ab94ef97..ccd47e48d1 100644
--- a/lib/utils/test/common/include/test/utils/rapidcheck/doctest.h
+++ b/lib/utils/test/common/include/test/utils/rapidcheck/doctest.h
@@ -4,38 +4,11 @@
 #include "doctest/doctest.h"
 #include "rapidcheck.h"
 
-namespace rc {
+namespace FlexFlow {
 
-/**
- * Checks the given predicate by applying it to randomly generated arguments.
- *
- * Quotes the given description string if the predicate can be falsified.
- *
- * Traces a progress message to 'stdout' if the flag 'v' is true.
- *
- * Like the function 'rc::check', but integrates with 'doctest' to include its
- * result in the statistics that are gathered for a test run.
- *
- * For example:
- *
- *  TEST_CASE("addition is commutative")
- *  {
- *    wol::test::check("a+b == b+a", [](int a, int b) { return a+b == b+a; });
- *  }
- *
- * @param  d  A description of the predicate being checked.
- * @param  t  A predicate to check.
- * @param  v  A flag requesting verbose output.
- *
- * @see    https://github.com/emil-e/rapidcheck/blob/master/doc/properties.md
- *         for more on 'rc::check', on which this function is modeled.
- *
- * @see    https://github.com/emil-e/rapidcheck/blob/master/doc/catch.md
- *         for more on the integration of 'rapidcheck' and 'catch', on which
- *         this implementation is based.
- */
 template <class testable>
-void dc_check(char const *d, testable &&t, bool v = false) {
+void RC_SUBCASE(char const *d, testable &&t, bool v = false) {
+  using namespace ::rc;
   using namespace ::rc::detail;
   using namespace ::doctest::detail;
 
@@ -59,41 +32,11 @@ void dc_check(char const *d, testable &&t, bool v = false) {
     }
   }
 }
-
-/**
- * Checks the given predicate by applying it to randomly generated arguments.
- *
- * Quotes the given description string if the predicate can be falsified.
- *
- * Traces a progress message to 'stdout' if the flag 'v' is true.
- *
- * Like the function 'rc::check', but integrates with 'doctest' to include its
- * result in the statitics that are gathered for a test run.
- *
- * For example:
- *
- *  TEST_CASE("addition is commutative")
- *  {
- *    wol::test::check("a+b == b+a", [](int a, int b) { return a+b == b+a; });
- *  }
- *
- * @param  t  A predicate to check.
- * @param  v  A flag requesting verbose output.
- *
- * @see    https://github.com/emil-e/rapidcheck/blob/master/doc/properties.md
- *         for more on 'rc::check', on which this function is modeled.
- *
- * @see    https://github.com/emil-e/rapidcheck/blob/master/doc/catch.md
- *         for more on the integration of 'rapidcheck' and 'catch', on which
- *         this implementation is based.
- */
 template <class testable>
-inline void dc_check(testable &&t, bool v = false) {
-  check("", t, v);
+void RC_SUBCASE(testable &&t, bool v = false) {
+  RC_SUBCASE("", t, v);
 }
 
-#define RC_SUBCASE(NAME) rc
-
-} // namespace rc
+} // namespace FlexFlow
 
 #endif
diff --git a/lib/utils/test/common/include/test/utils/rapidcheck/some.h b/lib/utils/test/common/include/test/utils/rapidcheck/some.h
new file mode 100644
index 0000000000..3db5e35052
--- /dev/null
+++ b/lib/utils/test/common/include/test/utils/rapidcheck/some.h
@@ -0,0 +1,16 @@
+#ifndef _FLEXFLOW_LIB_UTILS_TEST_COMMON_INCLUDE_TEST_UTILS_RAPIDCHECK_SOME_H
+#define _FLEXFLOW_LIB_UTILS_TEST_COMMON_INCLUDE_TEST_UTILS_RAPIDCHECK_SOME_H
+
+#include <rapidcheck.h>
+
+namespace FlexFlow {
+
+template <typename T>
+T some() {
+  rc::Random r{};
+  return rc::gen::arbitrary<T>()(r).value();
+}
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/utils/test/src/test_optional.cc b/lib/utils/test/src/test_optional.cc
index 8ef9e18f18..35a54e8133 100644
--- a/lib/utils/test/src/test_optional.cc
+++ b/lib/utils/test/src/test_optional.cc
@@ -1,10 +1,11 @@
 #include "test/utils/doctest.h"
+#include "test/utils/rapidcheck.h"
 #include "utils/optional.h"
 #include <rapidcheck.h>
 
 TEST_SUITE(FF_TEST_SUITE) {
-
-  TEST_CASE_TEMPLATE("RC arbitrary", T, int, double, char) {
-    CHECK(rc::check("generate", [](std::optional<T> o) {}));
+  TEST_CASE_TEMPLATE(
+      "Arbitrary<std::optional<T>> with T=", T, int, double, char) {
+    RC_SUBCASE([](std::optional<T> o) {});
   }
 }
diff --git a/lib/utils/test/src/test_stack_string.cc b/lib/utils/test/src/test_stack_string.cc
index 1836e0824a..a044f85fe3 100644
--- a/lib/utils/test/src/test_stack_string.cc
+++ b/lib/utils/test/src/test_stack_string.cc
@@ -1,4 +1,5 @@
 #include "test/utils/doctest.h"
+#include "test/utils/rapidcheck.h"
 #include "utils/stack_string.h"
 
 using namespace FlexFlow;
@@ -80,4 +81,9 @@ TEST_SUITE(FF_TEST_SUITE) {
     std::string stdStr = static_cast<std::string>(str);
     CHECK(stdStr == "Hello");
   }
+
+  TEST_CASE("Arbitrary<stack_string>") {
+    constexpr std::size_t MAXSIZE = 10;
+    RCSUBCASE([](stack_string<MAXSIZE> const &s) {});
+  }
 }
diff --git a/lib/utils/test/src/test_stack_vector.cc b/lib/utils/test/src/test_stack_vector.cc
index 141cd30e95..1af43b6993 100644
--- a/lib/utils/test/src/test_stack_vector.cc
+++ b/lib/utils/test/src/test_stack_vector.cc
@@ -1,7 +1,7 @@
 #include "test/utils/doctest.h"
+#include "test/utils/rapidcheck.h"
 #include "utils/stack_vector.h"
 #include <iterator>
-#include <rapidcheck.h>
 
 using namespace FlexFlow;
 
@@ -78,10 +78,10 @@ TEST_SUITE(FF_TEST_SUITE) {
     CHECK(vector.back() == 20);
   }
 
-  TEST_CASE_TEMPLATE("RC arbitrary", T, int, double, char) {
+  TEST_CASE_TEMPLATE("Arbitrary<stack_vector>", T, int, double, char) {
     constexpr std::size_t MAXSIZE = 10;
-    CHECK(rc::check("within bound", [](stack_vector<T, MAXSIZE> v) {
-      return v.size() <= MAXSIZE;
-    }));
+    RC_SUBCASE("within bound", [&](stack_vector<T, MAXSIZE> v) {
+      RC_ASSERT(v.size() <= MAXSIZE);
+    });
   }
 }
diff --git a/lib/utils/test/src/test_undirected_graph.cc b/lib/utils/test/src/test_undirected_graph.cc
index 3616ee59aa..c30fb64341 100644
--- a/lib/utils/test/src/test_undirected_graph.cc
+++ b/lib/utils/test/src/test_undirected_graph.cc
@@ -35,7 +35,7 @@ TEST_SUITE(FF_TEST_SUITE) {
   TEST_CASE_TEMPLATE(
       "UndirectedGraph implementations", T, HashmapUndirectedGraph) {
 
-    rc::dc_check("Full", [&]() {
+    RC_SUBCASE("Full", [&]() {
       UndirectedGraph g = UndirectedGraph::create<T>();
       int num_nodes = *gen::inRange(1, 10);
       std::vector<Node> n = repeat(num_nodes, [&] { return g.add_node(); });
diff --git a/lib/utils/test/src/test_variant.cc b/lib/utils/test/src/test_variant.cc
index 7cffe9fbe4..98b28a48e9 100644
--- a/lib/utils/test/src/test_variant.cc
+++ b/lib/utils/test/src/test_variant.cc
@@ -1,6 +1,6 @@
 #include "test/utils/doctest.h"
+#include "test/utils/rapidcheck.h"
 #include "utils/variant.h"
-#include <rapidcheck.h>
 
 TEST_SUITE(FF_TEST_SUITE) {
   TEST_CASE("widen and narrow functions") {
@@ -71,9 +71,9 @@ TEST_SUITE(FF_TEST_SUITE) {
     CHECK(get<int>(wider_variant) == 42);
   }
 
-  TEST_CASE("RC arbitrary") {
-    CHECK(rc::check("valid type", [](std::variant<int, float> v) {
+  TEST_CASE("Arbitrary<std::variant>") {
+    RC_SUBCASE("valid type", [](std::variant<int, float> v) {
       return std::holds_alternative<int>(v) || std::holds_alternative<float>(v);
-    }));
+    });
   }
 }