diff --git a/lib/kernels/include/kernels/reshape_kernels.h b/lib/kernels/include/kernels/reshape_kernels.h
index 7cb30254f6..6be4012073 100644
--- a/lib/kernels/include/kernels/reshape_kernels.h
+++ b/lib/kernels/include/kernels/reshape_kernels.h
@@ -3,25 +3,28 @@
 
 #include "kernels/accessor.h"
 #include "kernels/device.h"
+#include "utils/required_core.h"
 
 namespace FlexFlow {
 
-class ReshapePerDeviceState : public PerDeviceOpState {
-public:
-  ReshapePerDeviceState(FFHandler handler);
-  DataType data_type;
+struct ReshapePerDeviceState {
+  req<DataType> data_type;
 };
 
+FF_VISITABLE_STRUCT(ReshapePerDeviceState, data_type);
+
 namespace Kernels {
 namespace Reshape {
 
+ReshapePerDeviceState init_kernel(DataType data_type);
+
 void forward_kernel(ffStream_t stream,
-                    ReshapePerDeviceState const *m,
+                    ReshapePerDeviceState const &per_device_state,
                     GenericTensorAccessorR const &input,
                     GenericTensorAccessorW const &output);
 
 void backward_kernel(ffStream_t stream,
-                     ReshapePerDeviceState const *m,
+                     ReshapePerDeviceState const &per_device_state,
                      GenericTensorAccessorW const &input,
                      GenericTensorAccessorR const &output);
 
diff --git a/lib/kernels/src/cuda/reshape_kernels.cu b/lib/kernels/src/cuda/reshape_kernels.cu
index 15ae0c7109..e935b0d0c2 100644
--- a/lib/kernels/src/cuda/reshape_kernels.cu
+++ b/lib/kernels/src/cuda/reshape_kernels.cu
@@ -19,8 +19,9 @@
 
 namespace FlexFlow {
 
-ReshapePerDeviceState::ReshapePerDeviceState(FFHandler handler)
-    : PerDeviceOpState(handler) {}
+ReshapePerDeviceState init_kernel(DataType data_type) {
+  return ReshapePerDeviceState{data_type};
+}
 
 namespace Kernels {
 namespace Reshape {
@@ -41,7 +42,6 @@ struct ForwardKernel {
 template <DataType T>
 struct BackwardKernel {
   void operator()(cudaStream_t stream,
-                  ReshapePerDeviceState const *m,
                   GenericTensorAccessorW const &input,
                   GenericTensorAccessorR const &output) {
     float alpha = 1.0f;
@@ -54,17 +54,17 @@ struct BackwardKernel {
 }
 
 void forward_kernel(cudaStream_t stream,
-                    ReshapePerDeviceState const *m,
+                    ReshapePerDeviceState const &m,
                     GenericTensorAccessorR const &input,
                     GenericTensorAccessorW const &output) {
-  DataTypeDispatch1<ForwardKernel>{}(m->data_type, stream, m, input, output);
+  DataTypeDispatch1<ForwardKernel>{}(m.data_type, stream, input, output);
 }
 
 void backward_kernel(cudaStream_t stream,
-                     ReshapePerDeviceState const *m,
+                     ReshapePerDeviceState const &m,
                      GenericTensorAccessorW const &input,
                      GenericTensorAccessorR const &output) {
-  DataTypeDispatch1<BackwardKernel>{}(m->data_type, stream, m, input, output);
+  DataTypeDispatch1<BackwardKernel>{}(m.data_type, stream, input, output);
 }
 
 } // namespace Reshape
diff --git a/lib/runtime/src/ops/reshape.cc b/lib/runtime/src/ops/reshape.cc
index 71fb10bc9c..c9dc8cff8d 100644
--- a/lib/runtime/src/ops/reshape.cc
+++ b/lib/runtime/src/ops/reshape.cc
@@ -16,7 +16,6 @@
 #include "reshape.h"
 #include "kernels/reshape_kernels.h"
 #include "legion/legion_utilities.h"
-#include "utils/hash-utils.h"
 
 namespace FlexFlow {
 // declare Legion names
@@ -37,427 +36,162 @@ using Legion::TaskLauncher;
 
 using namespace FlexFlow::Kernels::Reshape;
 
-/* Params */
-bool operator==(ReshapeParams const &lhs, ReshapeParams const &rhs) {
-  return lhs.shape == rhs.shape;
-}
+enum slots { INPUT, OUTPUT, ATTRS, PROFILING, PER_DEVICE_STATE };
 
-bool ReshapeParams::is_valid(ParallelTensorShape const &input) const {
-  return input.is_valid();
-}
+OpTaskInvocation init(ReshapeAttrs const &attrs) {
+  OpTaskBinding binding;
 
-Tensor FFModel::reshape(const Tensor input,
-                        std::vector<int> const &shape,
-                        char const *name) {
-  Layer *reshape = new Layer(this,
-                             OP_RESHAPE,
-                             DT_FLOAT,
-                             name,
-                             1 /*inputs*/,
-                             0 /*weights*/,
-                             1 /*outputs*/,
-                             input);
-  int dims[MAX_TENSOR_DIM];
-  int numdim = shape.size();
-  for (int i = 0; i < numdim; i++) {
-    assert(shape[i] > 0);
-    dims[i] = shape[i];
-  }
-  reshape->outputs[0] = create_tensor(
-      numdim, dims, input->data_type, reshape, 0, true /*create_grad*/);
-  reshape->add_int_vector_property("shape", shape);
-  layers.push_back(reshape);
-  return reshape->outputs[0];
-}
+  binding.bind_arg(ATTRS, attrs);
 
-Op *Reshape::create_operator_from_layer(
-    FFModel &model,
-    Layer const *layer,
-    std::vector<ParallelTensor> const &inputs) {
-  std::vector<int> shape;
-  layer->get_int_vector_property("shape", shape);
-  return new Reshape(model, layer->layer_guid, inputs[0], shape, layer->name);
+  return {RESHAPE_INIT_TASK_ID, binding};
 }
 
-Reshape::Reshape(FFModel &model,
-                 LayerID const &_layer_guid,
-                 const ParallelTensor input,
-                 std::vector<int> const &_shape,
-                 char const *name)
-    : Op(model,
-         OP_RESHAPE,
-         input->data_type,
-         name,
-         1 /*inputs*/,
-         0 /*weights*/,
-         1 /*outputs*/,
-         input) {
-  layer_guid = _layer_guid;
-  shape_length = _shape.size();
-  assert(shape_length <= MAX_TENSOR_DIM);
-  for (int i = 0; i < shape_length; i++) {
-    shape_array[i] = _shape[i];
-  }
-  numOutputs = 1;
-  numWeights = 0;
-  int num_replica_dims = 0;
-  for (int i = 0; i < input->num_dims; i++) {
-    if (input->dims[i].is_replica_dim) {
-      num_replica_dims++;
-    }
-  }
-  // assert that all replica dims are leading dims
-  for (int i = 0; i < num_replica_dims; i++) {
-    assert(input->dims[input->num_dims - 1 - i].is_replica_dim);
-  }
-  int numdim = (int)_shape.size();
-  ParallelDim dims[MAX_TENSOR_DIM];
-  for (int i = 0; i < numdim; i++) {
-    dims[i].size = _shape[numdim - 1 - i];
-    dims[i].degree = 1;
-    dims[i].parallel_idx = -1;
-    dims[i].is_replica_dim = false;
-  }
-  // copy all replica dims
-  for (int i = 0; i < num_replica_dims; i++) {
-    dims[i + numdim] = input->dims[input->num_dims - 1 - i];
-  }
-  numdim += num_replica_dims;
-  for (int i = num_replica_dims; i < numdim && i < input->num_dims; i++) {
-    if (dims[numdim - 1 - i].size !=
-        input->dims[input->num_dims - 1 - i].size) {
-      break;
-    }
-    dims[numdim - 1 - i] = input->dims[input->num_dims - 1 - i];
-  }
-  outputs[0] = model.create_parallel_tensor_legion_ordering(
-      numdim, dims, input->data_type, this);
-  assert(outputs[0]->get_volume() == inputs[0]->get_volume());
-}
+OpTaskInvocation forward(ReshapeAttrs const &attrs) {
+  OpTaskBinding binding;
 
-Reshape::Reshape(FFModel &model,
-                 ReshapeParams const &params,
-                 const ParallelTensor input,
-                 char const *name)
-    : Reshape(model, params.layer_guid, input, params.shape, name) {}
-
-void Reshape::init(FFModel const &ff) {
-  assert(check_output_input_weight_same_parallel_is());
-  parallel_is = outputs[0]->parallel_is;
-  ArgumentMap argmap;
-  Context ctx = ff.config.lg_ctx;
-  Runtime *runtime = ff.config.lg_hlr;
-  set_argumentmap_for_init(ff, argmap);
-  IndexLauncher launcher(RESHAPE_INIT_TASK_ID,
-                         parallel_is,
-                         TaskArgument(this, sizeof(Reshape)),
-                         argmap,
-                         Predicate::TRUE_PRED,
-                         false /*must*/,
-                         0 /*mapper_id*/,
-                         outputs[0]->machine_view.hash());
-  launcher.add_region_requirement(RegionRequirement(inputs[0]->part,
-                                                    0 /*projection id*/,
-                                                    READ_ONLY,
-                                                    EXCLUSIVE,
-                                                    inputs[0]->region));
-  launcher.add_field(0, FID_DATA);
-  launcher.add_region_requirement(RegionRequirement(outputs[0]->part,
-                                                    0 /*projection id*/,
-                                                    WRITE_ONLY,
-                                                    EXCLUSIVE,
-                                                    outputs[0]->region));
-  launcher.add_field(1, FID_DATA);
-  FutureMap fm = runtime->execute_index_space(ctx, launcher);
-  fm.wait_all_results();
-  set_opmeta_from_futuremap(ff, fm);
+  binding.bind_arg(PER_DEVICE_STATE,
+                   per_device_op_state<ReshapePerDeviceState>());
+  binding.bind_arg(PROFILING, profiling_settings());
+
+  binding.bind(INPUT, input_tensor(0));
+  binding.bind(OUTPUT, output_tensor(0));
+  return {RESHAPE_FWD_TASK_ID, binding};
 }
 
-PerDeviceOpState *Reshape::init_task(Task const *task,
-                                     std::vector<PhysicalRegion> const &regions,
-                                     Context ctx,
-                                     Runtime *runtime) {
-  Reshape const *reshape = (Reshape *)task->args;
-  FFHandler handle = *((FFHandler const *)task->local_args);
-  ReshapeMeta *m = new ReshapeMeta(handle);
-  m->data_type = reshape->outputs[0]->data_type;
-  return m;
+OpTaskInvocation backward(ReshapeAttrs const &attrs) {
+  OpTaskBinding binding = infer_bwd_binding(forward(attrs).binding);
+
+  return {RESHAPE_BWD_TASK_ID, binding};
 }
 
-void Reshape::forward(FFModel const &ff) {
-  ArgumentMap argmap;
-  Context ctx = ff.config.lg_ctx;
-  Runtime *runtime = ff.config.lg_hlr;
-  set_argumentmap_for_forward(ff, argmap);
-  IndexLauncher launcher(RESHAPE_FWD_TASK_ID,
-                         parallel_is,
-                         TaskArgument(NULL, 0),
-                         argmap,
-                         Predicate::TRUE_PRED,
-                         false /*must*/,
-                         0 /*mapper_id*/,
-                         outputs[0]->machine_view.hash());
-  launcher.add_region_requirement(RegionRequirement(inputs[0]->part,
-                                                    0 /*projection id*/,
-                                                    READ_ONLY,
-                                                    EXCLUSIVE,
-                                                    inputs[0]->region));
-  launcher.add_field(0, FID_DATA);
-  launcher.add_region_requirement(RegionRequirement(outputs[0]->part,
-                                                    0 /*projection id*/,
-                                                    WRITE_ONLY,
-                                                    EXCLUSIVE,
-                                                    outputs[0]->region));
-  launcher.add_field(1, FID_DATA);
-  runtime->execute_index_space(ctx, launcher);
+static DeviceSpecific<ReshapePerDeviceState>
+    init_task_impl(TaskArgumentAccessor const &acc) {
+  auto attrs = acc.get_argument<ReshapeAttrs>(ATTRS);
+
+  DeviceSpecific<ReshapePerDeviceState> per_device_state =
+      acc.create_device_specific<ReshapePerDeviceState>(
+          init_kernel(attrs.shape.data_type));
+  return per_device_state;
 }
 
-void Reshape::forward_task(Task const *task,
-                           std::vector<PhysicalRegion> const &regions,
-                           Context ctx,
-                           Runtime *runtime) {
-  assert(regions.size() == 2);
-  assert(task->regions.size() == 2);
-  // const Reshape* reshape = (const Reshape*) task->args;
-  ReshapeMeta const *m = *((ReshapeMeta **)task->local_args);
-  Domain in_domain = runtime->get_index_space_domain(
-      ctx, task->regions[0].region.get_index_space());
-  Domain out_domain = runtime->get_index_space_domain(
-      ctx, task->regions[1].region.get_index_space());
-  assert(in_domain.get_volume() == out_domain.get_volume());
-
-  if (m->data_type == DT_FLOAT) {
-    float const *in_ptr = helperGetTensorPointerRO<float>(
-        regions[0], task->regions[0], FID_DATA, ctx, runtime);
-    float *out_ptr = helperGetTensorPointerWO<float>(
-        regions[1], task->regions[1], FID_DATA, ctx, runtime);
-    forward_kernel_wrapper<float>(in_ptr, out_ptr, in_domain.get_volume());
-  } else if (m->data_type == DT_DOUBLE) {
-    double const *in_ptr = helperGetTensorPointerRO<double>(
-        regions[0], task->regions[0], FID_DATA, ctx, runtime);
-    double *out_ptr = helperGetTensorPointerWO<double>(
-        regions[1], task->regions[1], FID_DATA, ctx, runtime);
-    forward_kernel_wrapper<double>(in_ptr, out_ptr, in_domain.get_volume());
-  } else if (m->data_type == DT_INT32) {
-    int32_t const *in_ptr = helperGetTensorPointerRO<int32_t>(
-        regions[0], task->regions[0], FID_DATA, ctx, runtime);
-    int32_t *out_ptr = helperGetTensorPointerWO<int32_t>(
-        regions[1], task->regions[1], FID_DATA, ctx, runtime);
-    forward_kernel_wrapper<int32_t>(in_ptr, out_ptr, in_domain.get_volume());
-  } else if (m->data_type == DT_INT64) {
-    int64_t const *in_ptr = helperGetTensorPointerRO<int64_t>(
-        regions[0], task->regions[0], FID_DATA, ctx, runtime);
-    int64_t *out_ptr = helperGetTensorPointerWO<int64_t>(
-        regions[1], task->regions[1], FID_DATA, ctx, runtime);
-    forward_kernel_wrapper<int64_t>(in_ptr, out_ptr, in_domain.get_volume());
-  } else {
-    assert(false && "Unsupported data type in Reshape forward");
-  }
+static DeviceSpecific<ReshapePerDeviceState>
+    init_task(Task const *task,
+              std::vector<PhysicalRegion> const &regions,
+              Context ctx,
+              Runtime *runtime) {
+  TaskArgumentAccessor acc(task, regions, ctx, runtime);
+  return init_task_impl(acc);
 }
 
-void Reshape::backward(FFModel const &ff) {
-  ArgumentMap argmap;
-  Context ctx = ff.config.lg_ctx;
-  Runtime *runtime = ff.config.lg_hlr;
-  set_argumentmap_for_backward(ff, argmap);
-  IndexLauncher launcher(RESHAPE_BWD_TASK_ID,
-                         parallel_is,
-                         TaskArgument(NULL, 0),
-                         argmap,
-                         Predicate::TRUE_PRED,
-                         false /*must*/,
-                         0 /*mapper_id*/,
-                         outputs[0]->machine_view.hash());
-  // regions[0](I): output_grad
-  launcher.add_region_requirement(RegionRequirement(outputs[0]->part_grad,
-                                                    0 /*projection id*/,
-                                                    READ_ONLY,
-                                                    EXCLUSIVE,
-                                                    outputs[0]->region_grad));
-  launcher.add_field(0, FID_DATA);
-  // regions[3](I/O): input0_grad
-  launcher.add_region_requirement(RegionRequirement(inputs[0]->part_grad,
-                                                    0 /*projection id*/,
-                                                    READ_WRITE,
-                                                    EXCLUSIVE,
-                                                    inputs[0]->region_grad));
-  launcher.add_field(1, FID_DATA);
-  runtime->execute_index_space(ctx, launcher);
+static optional<float> forward_task_impl(TaskArgumentAccessor const &acc) {
+  auto per_device_state =
+      acc.get_argument<ReshapePerDeviceState>(PER_DEVICE_STATE);
+  Profiling profiling = acc.get_argument<ProfilingSettings>(PROFILING);
+
+  auto input = acc.get_tensor<Permissions::RO>(INPUT);
+  auto output = acc.get_tensor<Permissions::WO>(OUTPUT);
+
+  return profile(forward_kernel,
+                 profiling,
+                 "[Reshape] forward time = %.2lfms\n",
+                 per_device_state,
+                 input,
+                 output);
 }
 
-ReshapeParams Reshape::get_params() const {
-  std::vector<int> shape_vec;
-  for (size_t i = 0; i < shape_length; i++) {
-    shape_vec.push_back(shape_array[i]);
-  }
-  ReshapeParams params;
-  params.shape = shape_vec;
-  params.layer_guid = this->layer_guid;
-  return params;
+static void forward_task(Task const *task,
+                         std::vector<PhysicalRegion> const &regions,
+                         Context ctx,
+                         Runtime *runtime) {
+  TaskArgumentAccessor acc(task, regions, ctx, runtime);
+  forward_task_impl(acc);
 }
 
-void Reshape::backward_task(Task const *task,
-                            std::vector<PhysicalRegion> const &regions,
-                            Context ctx,
-                            Runtime *runtime) {
-  assert(regions.size() == 2);
-  assert(task->regions.size() == 2);
-  // const Reshape* reshape = (const Reshape*) task->args;
-  ReshapeMeta const *m = *((ReshapeMeta **)task->local_args);
-  Domain out_grad_domain = runtime->get_index_space_domain(
-      ctx, task->regions[0].region.get_index_space());
-  Domain in_grad_domain = runtime->get_index_space_domain(
-      ctx, task->regions[1].region.get_index_space());
-  assert(in_grad_domain.get_volume() == out_grad_domain.get_volume());
-
-  if (m->data_type == DT_FLOAT) {
-    float const *out_grad_ptr = helperGetTensorPointerRO<float>(
-        regions[0], task->regions[0], FID_DATA, ctx, runtime);
-    float *in_grad_ptr = helperGetTensorPointerRW<float>(
-        regions[1], task->regions[1], FID_DATA, ctx, runtime);
-    backward_kernel_wrapper<float>(
-        in_grad_ptr, out_grad_ptr, in_grad_domain.get_volume());
-  } else if (m->data_type == DT_DOUBLE) {
-    double const *out_grad_ptr = helperGetTensorPointerRO<double>(
-        regions[0], task->regions[0], FID_DATA, ctx, runtime);
-    double *in_grad_ptr = helperGetTensorPointerRW<double>(
-        regions[1], task->regions[1], FID_DATA, ctx, runtime);
-    backward_kernel_wrapper<double>(
-        in_grad_ptr, out_grad_ptr, in_grad_domain.get_volume());
-  } else if (m->data_type == DT_INT32) {
-    int32_t const *out_grad_ptr = helperGetTensorPointerRO<int32_t>(
-        regions[0], task->regions[0], FID_DATA, ctx, runtime);
-    int32_t *in_grad_ptr = helperGetTensorPointerRW<int32_t>(
-        regions[1], task->regions[1], FID_DATA, ctx, runtime);
-    backward_kernel_wrapper<int32_t>(
-        in_grad_ptr, out_grad_ptr, in_grad_domain.get_volume());
-  } else if (m->data_type == DT_INT64) {
-    int64_t const *out_grad_ptr = helperGetTensorPointerRO<int64_t>(
-        regions[0], task->regions[0], FID_DATA, ctx, runtime);
-    int64_t *in_grad_ptr = helperGetTensorPointerRW<int64_t>(
-        regions[1], task->regions[1], FID_DATA, ctx, runtime);
-    backward_kernel_wrapper<int64_t>(
-        in_grad_ptr, out_grad_ptr, in_grad_domain.get_volume());
-  } else {
-    assert(false && "Unsupported data type in Reshape backward");
-  }
+static optional<float> backward_task_impl(TaskArgumentAccessor const &acc) {
+  auto per_device_state =
+      acc.get_argument<ReshapePerDeviceState>(PER_DEVICE_STATE);
+  Profiling profiling = acc.get_argument<ProfilingSettings>(PROFILING);
+
+  auto input_grad = acc.get_tensor_grad<Permissions::RW>(INPUT);
+  auto output_grad = acc.get_tensor_grad<Permissions::RO>(OUTPUT);
+
+  return profile(backward_kernel,
+                 profiling,
+                 "[Reshape] backward time = %.2lfms\n",
+                 per_device_state,
+                 input_grad,
+                 output_grad);
 }
 
-bool Reshape::measure_operator_cost(Simulator *sim,
-                                    MachineView const &mv,
-                                    CostMetrics &cost_metrics) const {
-  ParallelTensorBase sub_input, sub_output;
-  if (!outputs[0]->get_sub_tensor(mv, sub_output)) {
-    return false;
-  }
-  if (!inputs[0]->get_sub_tensor(mv, sub_input)) {
-    return false;
-  }
-
-  sim->free_all();
-  float *input_ptr = (float *)sim->allocate(sub_input.get_volume(), DT_FLOAT);
-  assert(input_ptr != NULL);
-  cost_metrics.inputs_memory += cost_metrics.total_mem_diff_from(sim->offset);
-
-  float *output_ptr = (float *)sim->allocate(sub_output.get_volume(), DT_FLOAT);
-  assert(output_ptr != NULL);
-  cost_metrics.outputs_memory += cost_metrics.total_mem_diff_from(sim->offset);
-
-  assert(sub_output.get_volume() == sub_input.get_volume());
-  size_t num_elements = sub_input.get_volume();
-
-  std::function<void()> forward, backward;
-  forward = [&] {
-    forward_kernel_wrapper(input_ptr, output_ptr, num_elements);
-  };
-  if (sim->computationMode == COMP_MODE_TRAINING) {
-    float *input_grad_ptr =
-        (float *)sim->allocate(sub_input.get_volume(), DT_FLOAT);
-    assert(input_grad_ptr != NULL);
-    cost_metrics.inputs_memory += cost_metrics.total_mem_diff_from(sim->offset);
-
-    float *output_grad_ptr =
-        (float *)sim->allocate(sub_output.get_volume(), DT_FLOAT);
-    assert(output_grad_ptr != NULL);
-    cost_metrics.outputs_memory +=
-        cost_metrics.total_mem_diff_from(sim->offset);
-
-    backward = [&] {
-      backward_kernel_wrapper(input_grad_ptr, output_grad_ptr, num_elements);
-    };
-  }
-
-  inner_measure_operator_cost(sim, forward, backward, cost_metrics);
-
-  if (sim->computationMode == COMP_MODE_TRAINING) {
-    printf(
-        "[Measure Reshape] name(%s) forward_time(%.4lf) backward_time(%.4lf)\n",
-        name,
-        cost_metrics.forward_time,
-        cost_metrics.backward_time);
-  } else {
-    printf("[Measure Reshape] name(%s) forward_time(%.4lf)\n",
-           name,
-           cost_metrics.forward_time);
-  }
-  return true;
+static void backward_task(Task const *task,
+                          std::vector<PhysicalRegion> const &regions,
+                          Context ctx,
+                          Runtime *runtime) {
+  TaskArgumentAccessor acc(task, regions, ctx, runtime);
+  backward_task_impl(acc);
 }
 
-void Reshape::serialize(Legion::Serializer &sez) const {
-  sez.serialize(this->shape_length);
-  for (size_t i = 0; i < this->shape_length; i++) {
-    sez.serialize(this->shape_array[i]);
-  }
-  sez.serialize(this->layer_guid.id);
+CostMetrics measure_operator_cost(SimEnvFactory const &sim_factory,
+                                  ReshapeAttrs const &attrs,
+                                  InputParallelTensorDesc const &input,
+                                  ProfilingSettings const &settings,
+                                  MachineView const &machine_view) {
+
+  SimTaskBinding init_binding;
+  init_binding.bind_arg(ATTRS, attrs);
+  auto init_accessor =
+      env.get_init_accessor(RESHAPE_INIT_TASK_ID, init_binding);
+  auto per_device_state = init_task_impl(init_accessor);
+
+  SimTaskBinding fwd_binding;
+  ParallelTensorShape output_shape = get_output_shape(attrs, input.shape);
+  fwd_binding.bind_arg(PER_DEVICE_STATE, per_device_state);
+  fwd_binding.bind_arg(PROFILING, settings);
+  fwd_binding.bind(INPUT, input.shape);
+  fwd_binding.bind(OUTPUT, output_shape);
+
+  SimTaskBinding bwd_binding = infer_bwd_binding(fwd_binding);
+
+  auto fwd_accessor = env.get_fwd_accessor(RESHAPE_FWD_TASK_ID, fwd_binding);
+  auto bwd_accessor = env.get_bwd_accessor(RESHAPE_BWD_TASK_ID, bwd_binding);
+
+  float forward_time = forward_task_impl(fwd_accessor).value();
+  float backward_time = backward_task_impl(bwd_accessor).value();
+
+  float sync_time = default_estimate_sync_time(env);
+  return make_metrics(forward_time, backward_time, sync_time, env);
 }
 
-using PCG::Node;
-
-Node Reshape::deserialize(FFModel &ff,
-                          Legion::Deserializer &dez,
-                          ParallelTensor inputs[],
-                          int num_inputs) {
-  assert(num_inputs == 1);
-  size_t shape_length;
-  std::vector<int> shape;
-  dez.deserialize(shape_length);
-  for (size_t i = 0; i < shape_length; i++) {
-    int value;
-    dez.deserialize(value);
-    shape.push_back(value);
-  }
-  size_t id;
-  dez.deserialize(id);
-  LayerID layer_guid(id);
-
-  ReshapeParams params;
-  params.shape = shape;
-  params.layer_guid = layer_guid;
-  return ff.get_or_create_node<Reshape>(inputs[0], params);
+template <>
+void register_task<RESHAPE_INIT_TASK_ID>() {
+  OpTaskSignature init(OpTaskType::INIT);
+
+  init.add_arg_slot<ReshapeAttrs>(ATTRS);
+
+  init.add_return_value<ReshapePerDeviceState>(PER_DEVICE_STATE);
+
+  register_task(RESHAPE_INIT_TASK_ID, "Reshape Init", init, init_task);
 }
 
-Op *Reshape::materialize(FFModel &ff,
-                         ParallelTensor inputs[],
-                         int num_inputs) const {
-  assert(num_inputs == 1);
-  std::vector<int> shape;
-  for (size_t i = 0; i < this->shape_length; i++) {
-    shape.push_back(shape_array[i]);
-  }
-  return new Reshape(ff, this->layer_guid, inputs[0], shape, this->name);
+template <>
+void register_task<RESHAPE_FWD_TASK_ID>() {
+  OpTaskSignature fwd(OpTaskType::FWD);
+
+  fwd.add_arg_slot<ProfilingSettings>(PROFILING);
+  fwd.add_unchecked_arg_slot<ReshapePerDeviceState>(PER_DEVICE_STATE);
+
+  fwd.add_input_slot(INPUT);
+  fwd.add_output_slot(OUTPUT);
+
+  register_task(RESHAPE_FWD_TASK_ID, "Reshape Fwd", fwd, forward_task);
 }
 
-}; // namespace FlexFlow
+template <>
+void register_task<RESHAPE_BWD_TASK_ID>() {
+  OpTaskSignature bwd =
+      infer_bwd_binding(get_op_signature(RESHAPE_FWD_TASK_ID));
 
-namespace std {
-size_t hash<FlexFlow::ReshapeParams>::operator()(
-    FlexFlow::ReshapeParams const &params) const {
-  size_t key = 0;
-  hash_combine(key, params.shape.size());
-  for (int n : params.shape) {
-    hash_combine(key, n);
-  }
-  hash_combine(key, params.layer_guid.id);
-  return key;
+  register_task(RESHAPE_BWD_TASK_ID, "Reshape Bwd", bwd, backward_task);
 }
-}; // namespace std
+
+}; // namespace FlexFlow
diff --git a/lib/runtime/src/ops/reshape.h b/lib/runtime/src/ops/reshape.h
index 42bbefd9db..f044e3f057 100644
--- a/lib/runtime/src/ops/reshape.h
+++ b/lib/runtime/src/ops/reshape.h
@@ -2,8 +2,8 @@
 #define _FLEXFLOW_RESHAPE_H
 
 #include "op-attrs/ops/reshape.h"
-#include "op_task_invocation.h"
 #include "sim_environment.h"
+#include "task_spec/op_task_invocation.h"
 
 namespace FlexFlow {
 
@@ -20,7 +20,7 @@ OpTaskInvocation backward(ReshapeAttrs const &);
 
 CostMetrics measure_operator_cost(SimEnvFactory const &sim_factory,
                                   ReshapeAttrs const &attrs,
-                                  ParallelTensorShape const &input_shape,
+                                  InputParallelTensorDesc const &input,
                                   ProfilingSettings const &settings,
                                   MachineView const &machine_view);