From c36d42c5ce1d4831709e802b5190bd93064c86f4 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Thu, 5 Oct 2023 14:21:53 +0000 Subject: [PATCH 01/69] try to implement the get-output_shape in attention.cc --- lib/op-attrs/include/op-attrs/ops/attention.h | 33 ++++++----- lib/op-attrs/include/op-attrs/ops/concat.h | 2 + lib/op-attrs/src/attention.cc | 58 ++++++++++++------- lib/op-attrs/src/get_output_shapes.cc | 2 + 4 files changed, 60 insertions(+), 35 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/attention.h b/lib/op-attrs/include/op-attrs/ops/attention.h index ec3e592607..79469206a3 100644 --- a/lib/op-attrs/include/op-attrs/ops/attention.h +++ b/lib/op-attrs/include/op-attrs/ops/attention.h @@ -7,20 +7,6 @@ namespace FlexFlow { -struct MultiHeadAttentionAttrs { - req embed_dim, num_heads, kdim, vdim; - req dropout; - req bias, add_bias_kv, add_zero_attn; -}; -FF_VISITABLE_STRUCT(MultiHeadAttentionAttrs, - embed_dim, - num_heads, - kdim, - vdim, - dropout, - bias, - add_bias_kv, - add_zero_attn); template struct MultiHeadAttentionInputs @@ -43,6 +29,24 @@ struct MultiHeadAttentionInputs TensorType value; }; +struct MultiHeadAttentionAttrs { + req embed_dim, num_heads, kdim, vdim; + req dropout; + req bias, add_bias_kv, add_zero_attn; + bool is_valid(MultiHeadAttentionInputs const &) const; +}; + +FF_VISITABLE_STRUCT(MultiHeadAttentionAttrs, + embed_dim, + num_heads, + kdim, + vdim, + dropout, + bias, + add_bias_kv, + add_zero_attn); +CHECK_VALID_OP_ATTR(MultiHeadAttentionAttrs); + int get_qProjSize(MultiHeadAttentionAttrs const &); int get_vProjSize(MultiHeadAttentionAttrs const &); int get_kProjSize(MultiHeadAttentionAttrs const &); @@ -67,6 +71,7 @@ ParallelTensorShape ParallelTensorShape get_output_shape(MultiHeadAttentionAttrs const &, MultiHeadAttentionInputs const &); + TensorShape get_output_shape(MultiHeadAttentionAttrs const &, MultiHeadAttentionInputs const &); diff --git a/lib/op-attrs/include/op-attrs/ops/concat.h b/lib/op-attrs/include/op-attrs/ops/concat.h index b9bd14a231..776963a2d2 100644 --- a/lib/op-attrs/include/op-attrs/ops/concat.h +++ b/lib/op-attrs/include/op-attrs/ops/concat.h @@ -10,7 +10,9 @@ namespace FlexFlow { struct ConcatAttrs { ff_dim_t axis; + bool is_valid(std::vector const & input) const; }; + FF_VISITABLE_STRUCT(ConcatAttrs, axis); CHECK_VALID_OP_ATTR(ConcatAttrs); diff --git a/lib/op-attrs/src/attention.cc b/lib/op-attrs/src/attention.cc index e9ae6ec803..7ccefb49ba 100644 --- a/lib/op-attrs/src/attention.cc +++ b/lib/op-attrs/src/attention.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/attention.h" +#include "op-attrs/parallel_tensor_shape.h" namespace FlexFlow { @@ -10,6 +11,14 @@ namespace FlexFlow { /* return is_valid; */ /* } */ +bool MultiHeadAttentionAttrs::is_valid(MultiHeadAttentionInputs const & input) const { + bool valid = true; + valid &= is_valid(input.key); + valid &= is_valid(input.query); + valid &= is_valid(input.value); + return valid; +} + int get_qProjSize(MultiHeadAttentionAttrs const &attrs) { return attrs.kdim; } @@ -52,34 +61,41 @@ TensorShape return {dims, DataType::FLOAT}; } +//these two functions are not defined in the attention.h +// ParallelTensorShape get_output_shape(MultiHeadAttentionAttrs const &attrs, +// ParallelTensorShape const &query_shape, +// ParallelTensorShape const &key_shape, +// ParallelTensorShape const &value_shape) { +// /* ParallelDim replica_dim = query_shape.at(ff_dim_t(query_shape.num_dims() - +// * 2)); */ +// /* replica_dim.size = replica_dim.degree; */ -ParallelTensorShape get_output_shape(MultiHeadAttentionAttrs const &attrs, - ParallelTensorShape const &query_shape, - ParallelTensorShape const &key_shape, - ParallelTensorShape const &value_shape) { - /* ParallelDim replica_dim = query_shape.at(ff_dim_t(query_shape.num_dims() - - * 2)); */ - /* replica_dim.size = replica_dim.degree; */ +// /* ParallelDim */ - /* ParallelDim */ +// ParallelTensorShape output_shape = query_shape; +// output_shape.at(ff_dim_t(output_shape.num_dims() - 1)).size = attrs.embed_dim; +// return output_shape; +// } - ParallelTensorShape output_shape = query_shape; +// TensorShape get_output_shape(MultiHeadAttentionAttrs const &attrs, +// TensorShape const &query_shape, +// TensorShape const &key_shape, +// TensorShape const &value_shape) { +// ParallelTensorShape parallel_shape = +// get_output_shape(attrs, +// static_cast(query_shape), +// static_cast(key_shape), +// static_cast(value_shape)); +// return get_tensor_shape_unsafe(parallel_shape); +// } + +ParallelTensorShape get_output_shape(MultiHeadAttentionAttrs const & attrs, + MultiHeadAttentionInputs const &inputs) { + ParallelTensorShape output_shape = inputs.query; output_shape.at(ff_dim_t(output_shape.num_dims() - 1)).size = attrs.embed_dim; return output_shape; } -TensorShape get_output_shape(MultiHeadAttentionAttrs const &attrs, - TensorShape const &query_shape, - TensorShape const &key_shape, - TensorShape const &value_shape) { - ParallelTensorShape parallel_shape = - get_output_shape(attrs, - static_cast(query_shape), - static_cast(key_shape), - static_cast(value_shape)); - return get_tensor_shape_unsafe(parallel_shape); -} - } // namespace FlexFlow // Tensor FFModel::multihead_attention(const Tensor query, diff --git a/lib/op-attrs/src/get_output_shapes.cc b/lib/op-attrs/src/get_output_shapes.cc index f44a677873..b41912d577 100644 --- a/lib/op-attrs/src/get_output_shapes.cc +++ b/lib/op-attrs/src/get_output_shapes.cc @@ -20,4 +20,6 @@ TensorShape get_output_shape(AggregateAttrs const &attrs, as_parallel(exp_preds))); } + + } // namespace FlexFlow From 959c50f04f98057d4d628141d25cdb2569a36cb2 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sat, 7 Oct 2023 20:44:12 +0000 Subject: [PATCH 02/69] leave the implementation --- .../include/op-attrs/ops/batch_matmul.h | 6 ++++++ .../include/op-attrs/parallel_tensor_shape.h | 2 ++ lib/op-attrs/src/attention.cc | 6 +++--- lib/op-attrs/src/batch_matmul.cc | 17 +++++++++++++++++ 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h index c74824570c..dbcc292fd6 100644 --- a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h +++ b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h @@ -9,11 +9,17 @@ namespace FlexFlow { struct BatchMatmulAttrs { req a_seq_length_dim, b_seq_length_dim; + bool is_valid(ParallelTensorShape const &, + ParallelTensorShape const &); }; FF_VISITABLE_STRUCT(BatchMatmulAttrs, a_seq_length_dim, b_seq_length_dim); CHECK_VALID_OP_ATTR(BatchMatmulAttrs); +ParallelTensorShape get_output_shape(BatchMatmulAttrs const &, + ParallelTensorShape const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h index fd560352bb..ca980966e8 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h @@ -32,6 +32,8 @@ struct ParallelTensorShape : public use_visitable_cmp { ParallelDim const &operator[](ff_dim_t const &) const; ParallelDim &operator[](ff_dim_t const &); + bool is_valid() const; + public: ParallelTensorDims dims; DataType data_type; diff --git a/lib/op-attrs/src/attention.cc b/lib/op-attrs/src/attention.cc index 7ccefb49ba..c8148cd45d 100644 --- a/lib/op-attrs/src/attention.cc +++ b/lib/op-attrs/src/attention.cc @@ -13,9 +13,9 @@ namespace FlexFlow { bool MultiHeadAttentionAttrs::is_valid(MultiHeadAttentionInputs const & input) const { bool valid = true; - valid &= is_valid(input.key); - valid &= is_valid(input.query); - valid &= is_valid(input.value); + valid &= input.key.is_valid(); + valid &= input.query.is_valid(); + valid &= input.value.is_valid(); return valid; } diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index 1cc8c5cfda..31ca31d81b 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -2,6 +2,23 @@ namespace FlexFlow { + bool BatchMatmulAttrs::is_valid(ParallelTensorShape const & lhs, ParallelTensorShape const & rhs) { + if (!lhs.is_valid() || !rhs.is_valid()) { + return false; + } + if (lhs.num_dims() != rhs.num_dims()) { + return false; + } + return true; +} + +ParallelTensorShape get_output_shape(BatchMatmulAttrs const &, + ParallelTensorShape const &, + ParallelTensorShape const &) { + +} + + /* bool BatchMatmulAttrs::is_valid( */ /* ParallelTensorShape const &lhs, ParallelTensorShape const &rhs) const { */ From be7e04f68b2d7f35ea9fe25a41011b6cc12939f7 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 8 Oct 2023 21:10:31 +0000 Subject: [PATCH 03/69] implement the get output_shape for batch_matmul --- .../include/op-attrs/ops/batch_matmul.h | 1 - lib/op-attrs/src/batch_matmul.cc | 21 +++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h index dbcc292fd6..6473f923a2 100644 --- a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h +++ b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h @@ -19,7 +19,6 @@ CHECK_VALID_OP_ATTR(BatchMatmulAttrs); ParallelTensorShape get_output_shape(BatchMatmulAttrs const &, ParallelTensorShape const &, ParallelTensorShape const &); - } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index 31ca31d81b..537c08f0aa 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -1,8 +1,11 @@ #include "op-attrs/ops/batch_matmul.h" +#include "op-attrs/ff_dim.h" +#include "op-attrs/parallel_tensor_shape.h" namespace FlexFlow { - bool BatchMatmulAttrs::is_valid(ParallelTensorShape const & lhs, ParallelTensorShape const & rhs) { +//maybe we should add more check here +bool BatchMatmulAttrs::is_valid(ParallelTensorShape const & lhs, ParallelTensorShape const & rhs) { if (!lhs.is_valid() || !rhs.is_valid()) { return false; } @@ -12,11 +15,17 @@ namespace FlexFlow { return true; } -ParallelTensorShape get_output_shape(BatchMatmulAttrs const &, - ParallelTensorShape const &, - ParallelTensorShape const &) { - -} +//how to get the batch size? and lhs: [b, s1, k], rhs: [b, k, s1] +ParallelTensorShape get_output_shape(BatchMatmulAttrs const & attrs, + ParallelTensorShape const & lhs, + ParallelTensorShape const & rhs) { + ParallelTensorShape output_shape = lhs; + output_shape.at(ff_dim_t(0)).size = lhs.at(ff_dim_t(0)).size; + output_shape.at(ff_dim_t(1)).size = attrs.a_seq_length_dim; + output_shape.at(ff_dim_t(2)).size = attrs.b_seq_length_dim; + //TODO: Do we need to set the ParallelDim for output_shape + return output_shape; +} /* bool BatchMatmulAttrs::is_valid( */ From f64bd64b189966f649f143a01b1f1437e1ab204d Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 8 Oct 2023 21:13:18 +0000 Subject: [PATCH 04/69] implement the batch_norm --- lib/op-attrs/include/op-attrs/ops/batch_norm.h | 3 ++- lib/op-attrs/src/batch_matmul.cc | 1 + lib/op-attrs/src/batch_norm.cc | 15 ++++++++++++++- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/batch_norm.h b/lib/op-attrs/include/op-attrs/ops/batch_norm.h index 4ec823d4ae..65ab18c33c 100644 --- a/lib/op-attrs/include/op-attrs/ops/batch_norm.h +++ b/lib/op-attrs/include/op-attrs/ops/batch_norm.h @@ -9,10 +9,11 @@ namespace FlexFlow { struct BatchNormAttrs { req relu; + bool is_valid(ParallelTensorShape const &); }; FF_VISITABLE_STRUCT(BatchNormAttrs, relu); -ParallelTensorShape get_output_shape(BatchNormAttrs const &); +ParallelTensorShape get_output_shape(BatchNormAttrs const &, ParallelTensorShape const &); CHECK_VALID_OP_ATTR(BatchNormAttrs); diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index 537c08f0aa..170ee655d2 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -12,6 +12,7 @@ bool BatchMatmulAttrs::is_valid(ParallelTensorShape const & lhs, ParallelTensorS if (lhs.num_dims() != rhs.num_dims()) { return false; } + return true; } diff --git a/lib/op-attrs/src/batch_norm.cc b/lib/op-attrs/src/batch_norm.cc index 4e352d5f1c..a1123667d2 100644 --- a/lib/op-attrs/src/batch_norm.cc +++ b/lib/op-attrs/src/batch_norm.cc @@ -1,3 +1,16 @@ #include "op-attrs/ops/batch_norm.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + + bool BatchNormAttrs::is_valid(ParallelTensorShape const & input) { + if(!input.is_valid()) { + return false; + } + return true; + } + +ParallelTensorShape get_output_shape(BatchNormAttrs const & attrs, ParallelTensorShape const & input) { + return input; +} + +} // namespace FlexFlow From 30d42558e29fe74c40fcdde6c4250fe5ee2e2230 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 8 Oct 2023 21:15:30 +0000 Subject: [PATCH 05/69] implement the cast --- lib/op-attrs/include/op-attrs/ops/cast.h | 4 ++++ lib/op-attrs/src/cast.cc | 18 +++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/cast.h b/lib/op-attrs/include/op-attrs/ops/cast.h index 63563f8df8..39d6fe1cc1 100644 --- a/lib/op-attrs/include/op-attrs/ops/cast.h +++ b/lib/op-attrs/include/op-attrs/ops/cast.h @@ -10,9 +10,13 @@ namespace FlexFlow { struct CastAttrs { req dtype; + bool is_valid(ParallelTensorShape const &input) const; }; FF_VISITABLE_STRUCT(CastAttrs, dtype); +ParallelTensorShape get_output_shape(CastAttrs const &, + ParallelTensorShape const &); + CHECK_VALID_OP_ATTR(CastAttrs); } // namespace FlexFlow diff --git a/lib/op-attrs/src/cast.cc b/lib/op-attrs/src/cast.cc index e4ab178a7e..e7dab4689f 100644 --- a/lib/op-attrs/src/cast.cc +++ b/lib/op-attrs/src/cast.cc @@ -2,10 +2,18 @@ namespace FlexFlow { -/* bool CastAttrs::is_valid(ParallelTensorShape const &input) const { */ -/* bool valid = input.is_valid(); */ -/* valid &= (input.at(input.num_dims() - 1).degree == 1); */ -/* return valid; */ -/* } */ +bool CastAttrs::is_valid(ParallelTensorShape const &input) const { + if (!input.is_valid()) { + return false; + } + return true; +} + +ParallelTensorShape get_output_shape(CastAttrs const &attrs, + ParallelTensorShape const &input) { + ParallelTensorShape output = input; + output.data_type = attrs.dtype; + return output; +} } // namespace FlexFlow From 1e5d742c1ef36bdf549272f88352eca4e1b9b325 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 8 Oct 2023 21:18:24 +0000 Subject: [PATCH 06/69] implement the combine --- lib/op-attrs/include/op-attrs/ops/combine.h | 4 ++++ lib/op-attrs/src/combine.cc | 22 +++++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/combine.h b/lib/op-attrs/include/op-attrs/ops/combine.h index deaba9e093..ffc04d4656 100644 --- a/lib/op-attrs/include/op-attrs/ops/combine.h +++ b/lib/op-attrs/include/op-attrs/ops/combine.h @@ -11,10 +11,14 @@ namespace FlexFlow { struct CombineAttrs { ff_dim_t combine_dim; req combine_degree; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(CombineAttrs, combine_dim, combine_degree); CHECK_VALID_OP_ATTR(CombineAttrs); +ParallelTensorShape get_output_shape(CombineAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/combine.cc b/lib/op-attrs/src/combine.cc index cdca524538..8cfe6dfb8c 100644 --- a/lib/op-attrs/src/combine.cc +++ b/lib/op-attrs/src/combine.cc @@ -3,16 +3,18 @@ namespace FlexFlow { -/* bool CombineAttrs::is_valid(ParallelTensorShape const &input) const { */ -/* return input.at(this->combine_legion_dim).degree % this->combine_degree == - * 0; */ -/* } */ +bool CombineAttrs::is_valid(ParallelTensorShape const &input) const { + if (!input.is_valid()) { + return false; + } + return true; +} -/* ParallelTensorShape CombineAttrs::output_shape(ParallelTensorShape const - * &input_shape) const { */ -/* ParallelTensorShape output = input_shape; */ -/* output.at(this->combine_legion_dim).degree /= this->combine_degree; */ -/* return output; */ -/* } */ +ParallelTensorShape get_output_shape(CombineAttrs const & attrs, + ParallelTensorShape const & input) { + ParallelTensorShape output = input_shape; + output.at(attrs.combine_dim).degree /= attrs.combine_degree; + return output; +} } // namespace FlexFlow From 5f4cf5ccaad778ed54e351d32b8bb3781655960e Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 8 Oct 2023 21:19:18 +0000 Subject: [PATCH 07/69] add concat --- lib/op-attrs/include/op-attrs/ops/concat.h | 3 +++ lib/op-attrs/src/concat.cc | 25 +++++++++++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/concat.h b/lib/op-attrs/include/op-attrs/ops/concat.h index 776963a2d2..afe3e0dd8d 100644 --- a/lib/op-attrs/include/op-attrs/ops/concat.h +++ b/lib/op-attrs/include/op-attrs/ops/concat.h @@ -13,6 +13,9 @@ struct ConcatAttrs { bool is_valid(std::vector const & input) const; }; +ParallelTensorShape get_output_shape(ConcatAttrs const &, + std::vector const &); + FF_VISITABLE_STRUCT(ConcatAttrs, axis); CHECK_VALID_OP_ATTR(ConcatAttrs); diff --git a/lib/op-attrs/src/concat.cc b/lib/op-attrs/src/concat.cc index 065c58f365..e4b9496e69 100644 --- a/lib/op-attrs/src/concat.cc +++ b/lib/op-attrs/src/concat.cc @@ -2,13 +2,22 @@ namespace FlexFlow { -/* bool ConcatAttrs::is_valid( */ -/* std::vector const &input) const { */ -/* bool valid = true; */ -/* for (auto p : input) { */ -/* valid &= p.is_valid(); */ -/* } */ -/* return valid; */ -/* } */ +bool ConcatAttrs::is_valid( + std::vector const &input) const { + bool valid = true; + for (auto p : input) { + valid &= p.is_valid(); + } + return valid; +} + +ParallelTensorShape + get_output_shape(ConcatAttrs const &attrs, + std::vector const &inputs) { + ParallelTensorShape output = inputs[0]; + for (auto &i : inputs) { + output.at(attrs.axis).size += i.at(attrs.axis).size; + } +} } // namespace FlexFlow From ba8386f3d3d1c8c46fc8e1d1b8171702002643f6 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 8 Oct 2023 21:29:01 +0000 Subject: [PATCH 08/69] try to implement the conv2d --- lib/op-attrs/include/op-attrs/ops/conv_2d.h | 4 +++ lib/op-attrs/src/conv_2d.cc | 30 +++++++-------------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/conv_2d.h b/lib/op-attrs/include/op-attrs/ops/conv_2d.h index 3034dc8c62..c8491877a7 100644 --- a/lib/op-attrs/include/op-attrs/ops/conv_2d.h +++ b/lib/op-attrs/include/op-attrs/ops/conv_2d.h @@ -14,6 +14,7 @@ struct Conv2DAttrs { padding_w, groups; req> activation; req use_bias; + bool is_valid(TensorShape const & input) const; }; FF_VISITABLE_STRUCT(Conv2DAttrs, @@ -32,6 +33,9 @@ CHECK_VALID_OP_ATTR(Conv2DAttrs); TensorShape get_kernel_shape(Conv2DAttrs const &, TensorShape const &); TensorShape get_bias_shape(Conv2DAttrs const &, TensorShape const &); +ParallelTensorShape get_output_shape(Conv2DAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/conv_2d.cc b/lib/op-attrs/src/conv_2d.cc index d000d31feb..9e37031eae 100644 --- a/lib/op-attrs/src/conv_2d.cc +++ b/lib/op-attrs/src/conv_2d.cc @@ -81,27 +81,17 @@ std::vector return mappings; } -/* bool Conv2DAttrs::is_valid(ParallelTensorShape const &input_shape) const { */ -/* bool is_valid = true; */ -/* is_valid &= input_shape.is_valid(); */ -/* is_valid &= this->calculate_output_shape(input_shape).is_valid(); */ -/* is_valid &= this->calculate_kernel_shape(input_shape).is_valid(); */ -/* if (use_bias) { */ -/* is_valid &= this->calculate_bias_shape(input_shape).is_valid(); */ -/* } */ - -/* // TODO FIXME: Currently disable parallelizing the height and width - * dimension */ -/* if (input_shape.at(0).degree > 1 || input_shape.at(1).degree > 1) { */ -/* return false; */ -/* } */ - -/* return is_valid; */ +bool Conv2DAttrs::is_valid(TensorShape const &input) const { + if (!input.is_valid()) { + return false; + } + return true; +} -/* } */ +//according to pytorch, the input shape: [] +ParallelTensorShape get_output_shape(Conv2DAttrs const & attrs, + ParallelTensorShape const & input) { -/* OperatorType Conv2DAttrs::op_type() const { */ -/* return OP_CONV2D; */ -/* } */ +} } // namespace FlexFlow From 5efd4ff6725a7f7a835b14241b32953df7e75530 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 8 Oct 2023 21:36:51 +0000 Subject: [PATCH 09/69] add conv_2d --- lib/op-attrs/src/conv_2d.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/op-attrs/src/conv_2d.cc b/lib/op-attrs/src/conv_2d.cc index 9e37031eae..d5d2ad90c1 100644 --- a/lib/op-attrs/src/conv_2d.cc +++ b/lib/op-attrs/src/conv_2d.cc @@ -88,10 +88,20 @@ bool Conv2DAttrs::is_valid(TensorShape const &input) const { return true; } -//according to pytorch, the input shape: [] +//according to pytorch, the input shape: [b, input_channel, input_h, input_w] +//kernel shape: [output_channel, input_channel, kernel_h, kernel_w] +//we may have stide_h and padding_h +//output shape: [b, output_channel, output_h, output_w] +//output_h = (input_h + 2 * padding_h - kernel_h) / stride_h + 1 +//output_w = (input_w + 2 * padding_w - kernel_w) / stride_w + 1 ParallelTensorShape get_output_shape(Conv2DAttrs const & attrs, ParallelTensorShape const & input) { - + ParallelTensorShape output = input; + output.at(ff_dim_t(1)).size = attrs.out_channels; + output.at(ff_dim_t(2)).size = (input.at(ff_dim_t(2)).size + + 2 * attrs.padding_h - attrs.kernel_h) / attrs.stride_h + 1; + output.at(ff_dim_t(3)).size = (input.at(ff_dim_t(3)).size + 2 * attrs.padding_w - attrs.kernel_w) /attrs.stride_w +1; + return output; } } // namespace FlexFlow From fbdb407e5665cf0a895f2cc5ddf5892296dff7c1 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 8 Oct 2023 21:40:59 +0000 Subject: [PATCH 10/69] add dropout --- lib/op-attrs/include/op-attrs/ops/dropout.h | 4 ++++ lib/op-attrs/src/dropout.cc | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 lib/op-attrs/src/dropout.cc diff --git a/lib/op-attrs/include/op-attrs/ops/dropout.h b/lib/op-attrs/include/op-attrs/ops/dropout.h index 8e0049f526..04f244f27f 100644 --- a/lib/op-attrs/include/op-attrs/ops/dropout.h +++ b/lib/op-attrs/include/op-attrs/ops/dropout.h @@ -10,10 +10,14 @@ namespace FlexFlow { struct DropoutAttrs { req rate; req seed; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(DropoutAttrs, rate, seed); CHECK_VALID_OP_ATTR(DropoutAttrs); +ParallelTensorShape get_output_shape(DropoutAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/dropout.cc b/lib/op-attrs/src/dropout.cc new file mode 100644 index 0000000000..dc60d8cf94 --- /dev/null +++ b/lib/op-attrs/src/dropout.cc @@ -0,0 +1,19 @@ +#include "dropout.h" +#include "op-attrs/get_output_shapes.h" + +namespace FlexFlow { + +bool DropoutAttrs::is_valid(ParallelTensorShape const & input) const { + if(!input.is_valid()) { + return false; + } + return true; +} + +ParallelTensorShape get_output_shape(DropoutAttrs const &attrs, + ParallelTensorShape const &input) { + ParallelTensorShape output = input; + return output; +} + +} // namespace FlexFlow \ No newline at end of file From 32aa332743d9fb58b539d9c4b75adeebcd946e84 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 8 Oct 2023 21:45:02 +0000 Subject: [PATCH 11/69] add element binary --- .../include/op-attrs/ops/element_binary.h | 6 ++++ lib/op-attrs/src/element_binary.cc | 36 ++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/lib/op-attrs/include/op-attrs/ops/element_binary.h b/lib/op-attrs/include/op-attrs/ops/element_binary.h index c4a096166d..f455333347 100644 --- a/lib/op-attrs/include/op-attrs/ops/element_binary.h +++ b/lib/op-attrs/include/op-attrs/ops/element_binary.h @@ -14,6 +14,8 @@ struct ElementBinaryAttrs { req compute_type; req should_broadcast_lhs; req should_broadcast_rhs; + bool is_valid(ParallelTensorShape const & lhs, + ParallelTensorShape const & rhs) const; }; FF_VISITABLE_STRUCT(ElementBinaryAttrs, type, @@ -22,6 +24,10 @@ FF_VISITABLE_STRUCT(ElementBinaryAttrs, should_broadcast_rhs); CHECK_VALID_OP_ATTR(ElementBinaryAttrs); +ParallelTensorShape get_output_shape(ElementBinaryAttrs const &, + ParallelTensorShape const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/element_binary.cc b/lib/op-attrs/src/element_binary.cc index b713c6753f..2e0b8f8e34 100644 --- a/lib/op-attrs/src/element_binary.cc +++ b/lib/op-attrs/src/element_binary.cc @@ -1,3 +1,37 @@ #include "op-attrs/ops/element_binary.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + +bool ElementBinaryAttrs::is_valid(ParallelTensorShape const & input1, + ParallelTensorShape const & input2) const { + if(!input1.is_valid() || !input2.is_valid()) { + return false; + } + return true; +} + +ParallelTensorShape get_output_shape(ElementBinaryAttrs const & atts, + ParallelTensorShape const & lhs, + ParallelTensorShape const & rhs) { + ParallelTensorShape output = lhs.num_dims() >= rhs.num_dims() ? lhs : rhs; + for (int i = 0; i < output.num_dims(); i++) { + if (i >= lhs.num_dims()) { + output.at(ff_dim_t(i)) = rhs.at(ff_dim_t(i)); + } else if (i >= rhs.num_dims()) { + output.at(ff_dim_t(i)) = lhs.at(ff_dim_t(i)); + } else if (lhs.at(ff_dim_t(i)).size == rhs.at(ff_dim_t(i)).size) { + output.at(ff_dim_t(i)) = lhs.at(ff_dim_t(i)); + } else if (lhs.at(ff_dim_t(i)).size == 1) { + output.at(ff_dim_t(i)) = rhs.at(ff_dim_t(i)); + } else if (rhs.at(ff_dim_t(i)).size == 1) { + output.at(ff_dim_t(i)) = lhs.at(ff_dim_t(i)); + } else { + assert(false && "Operands could not be broadcast together"); + exit(0); + } + } + + return output; +} + +} // namespace FlexFlow From 9a85d59a70b2d85647eae540adf96bab6fd81a25 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 8 Oct 2023 21:46:21 +0000 Subject: [PATCH 12/69] add elemenet unary --- .../include/op-attrs/ops/element_unary.h | 4 ++++ lib/op-attrs/src/element_unary.cc | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/op-attrs/include/op-attrs/ops/element_unary.h b/lib/op-attrs/include/op-attrs/ops/element_unary.h index 1b72e83cb5..562c50e4ed 100644 --- a/lib/op-attrs/include/op-attrs/ops/element_unary.h +++ b/lib/op-attrs/include/op-attrs/ops/element_unary.h @@ -18,10 +18,14 @@ CHECK_VALID_OP_ATTR(ElementScalarUnaryAttrs); struct ElementUnaryAttrs { req op; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(ElementUnaryAttrs, op); CHECK_VALID_OP_ATTR(ElementUnaryAttrs); +ParallelTensorShape get_output_shape(ElementUnaryAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/element_unary.cc b/lib/op-attrs/src/element_unary.cc index 481151fafb..b59ba92529 100644 --- a/lib/op-attrs/src/element_unary.cc +++ b/lib/op-attrs/src/element_unary.cc @@ -1,3 +1,17 @@ #include "op-attrs/ops/element_unary.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + +bool ElementUnaryAttrs::is_valid(ParallelTensorShape const & input) const { + if(!input.is_valid()) { + return false; + } + return true; +} + +ParallelTensorShape get_output_shape(ElementUnaryAttrs const & atts, + ParallelTensorShape const & input) { + ParallelTensorShape output = input; + return output; + +} // namespace FlexFlow From 698a72980d923551cb6ed81c82d6831436d56c33 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 8 Oct 2023 21:53:30 +0000 Subject: [PATCH 13/69] add embedding --- lib/op-attrs/include/op-attrs/ops/embedding.h | 4 ++++ lib/op-attrs/src/embedding.cc | 21 ++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/lib/op-attrs/include/op-attrs/ops/embedding.h b/lib/op-attrs/include/op-attrs/ops/embedding.h index 8b00fa22ce..c7af920d5a 100644 --- a/lib/op-attrs/include/op-attrs/ops/embedding.h +++ b/lib/op-attrs/include/op-attrs/ops/embedding.h @@ -19,10 +19,14 @@ struct EmbeddingAttrs { req num_entries, out_channels; req aggr; req data_type; + bool is_valid(ParallelTensorShape const & input) const; }; FF_VISITABLE_STRUCT(EmbeddingAttrs, num_entries, out_channels, aggr, data_type); CHECK_VALID_OP_ATTR(EmbeddingAttrs); +ParallelTensorShape get_output_shape(EmbeddingAttrs const &, + ParallelTensorShape const &); + TensorShape get_weights_shape(EmbeddingAttrs const &, TensorShape const &); } // namespace FlexFlow diff --git a/lib/op-attrs/src/embedding.cc b/lib/op-attrs/src/embedding.cc index 02cbfaa031..b782e1282c 100644 --- a/lib/op-attrs/src/embedding.cc +++ b/lib/op-attrs/src/embedding.cc @@ -1,3 +1,22 @@ #include "op-attrs/ops/embedding.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + +bool EmbeddingAttrs::is_valid(ParallelTensorShape const & input) const { + if(!input.is_valid()) { + return false; + } + return true; +} + +//pytorch nn.Embedding +//Embedding OP: (num_embeddings, embedding_dim) (num_entries, out_channels) +//Input: (batch_size, seq_len) +//Output: (batch_size, seq_len, embedding_dim) +ParallelTensorShape get_output_shape(EmbeddingAttrs const & atts, + ParallelTensorShape const & input) { + ParallelTensorShape output = input; + output.at(ff_dim_t(1)).size = input.at(ff_dim_t(1)).size; + output.at(ff_dim_t(2)).size= atts.out_channels; + return output; +} // namespace FlexFlow From 72f43bcec6560e11e78e472711542be246d09a6f Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 8 Oct 2023 21:57:51 +0000 Subject: [PATCH 14/69] add flat --- lib/op-attrs/src/flat.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/op-attrs/src/flat.cc b/lib/op-attrs/src/flat.cc index 75d31beae4..65da5be1f0 100644 --- a/lib/op-attrs/src/flat.cc +++ b/lib/op-attrs/src/flat.cc @@ -14,6 +14,22 @@ namespace Output { constexpr int NUMDIM = 3, CHANNEL = 0, SAMPLE = 1, REPLICA = 2; } +//flat is like the pytorch view +//tensor = torch.randn(2, 3, 4) ,flattened_tensor = tensor.view(-1) #shape: (24) +ParallelTensorShape get_output_shape(FlatAttrs const &attrs, + ParallelTensorShape const &input) { + ParallelTensorShape output_shape(input.dims, input.data_type); + + output_shape.at(ff_dim_t(Output::CHANNEL)).size = + input.at(ff_dim_t(Input::CHANNEL)).size * + input.at(ff_dim_t(Input::HEIGHT)).size * + input.at(ff_dim_t(Input::WIDTH)).size; + output_shape.at(ff_dim_t(Output::CHANNEL)).degree = + input.at(ff_dim_t(Input::CHANNEL)).degree; + + return output_shape; +} + /* bool FlatAttrs::is_valid(ParallelTensorShape const &input) const { */ /* ParallelTensorShape output_shape = this->calculate_output_shape(input); */ From e0f05be4d92ca7f5839e516acef1cefc9f2bc859 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Mon, 9 Oct 2023 13:51:34 +0000 Subject: [PATCH 15/69] leave the get_otput_shape for gather --- lib/op-attrs/include/op-attrs/ops/gather.h | 5 +++++ lib/op-attrs/src/gather.cc | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/lib/op-attrs/include/op-attrs/ops/gather.h b/lib/op-attrs/include/op-attrs/ops/gather.h index ca2406ef75..55b438cc15 100644 --- a/lib/op-attrs/include/op-attrs/ops/gather.h +++ b/lib/op-attrs/include/op-attrs/ops/gather.h @@ -10,10 +10,15 @@ namespace FlexFlow { struct GatherAttrs { ff_dim_t dim; + bool is_valid(ParallelTensorShape const &, ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(GatherAttrs, dim); CHECK_VALID_OP_ATTR(GatherAttrs); +std::vector get_output_shapes(GatherAttrs const &, + ParallelTensorShape const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/gather.cc b/lib/op-attrs/src/gather.cc index 4f2c13c794..66f4163a6d 100644 --- a/lib/op-attrs/src/gather.cc +++ b/lib/op-attrs/src/gather.cc @@ -2,6 +2,26 @@ namespace FlexFlow { +bool GatherAttrs::is_valid(ParallelTensorShape const &lhs, + ParallelTensorShape const &rhs) const { + if (lhs.dims.num_dims() != rhs.dims.num_dims()) { + return false; + } + for (auto i : lhs.dims) { + if (ff_dim_t(i.size) != this->dim && + lhs.at(ff_dim_t(i.size)).size < rhs.at(ff_dim_t(i.size)).size) { + return false; + } + } + return true; +} + +std::vector get_output_shapes(GatherAttrs const & attrs, + ParallelTensorShape const & lhs, + ParallelTensorShape const & rhs ) { + +} + /* bool GatherAttrs::is_valid(ParallelTensorShape const &lhs, * ParallelTensorShape const &rhs) const { */ /* if (lhs.num_dims() != rhs.num_dims()) { */ From 1d18f35c6066ccafc3407342528ff5cb9864d8ae Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Mon, 9 Oct 2023 14:03:43 +0000 Subject: [PATCH 16/69] skip groupby --- lib/op-attrs/include/op-attrs/ops/gather.h | 4 ++-- lib/op-attrs/include/op-attrs/ops/groupby.h | 5 +++++ lib/op-attrs/src/gather.cc | 4 +++- lib/op-attrs/src/groupby.cc | 19 ++++++++++++++++++- 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/gather.h b/lib/op-attrs/include/op-attrs/ops/gather.h index 55b438cc15..44ec8b1fd7 100644 --- a/lib/op-attrs/include/op-attrs/ops/gather.h +++ b/lib/op-attrs/include/op-attrs/ops/gather.h @@ -15,10 +15,10 @@ struct GatherAttrs { FF_VISITABLE_STRUCT(GatherAttrs, dim); CHECK_VALID_OP_ATTR(GatherAttrs); + std::vector get_output_shapes(GatherAttrs const &, - ParallelTensorShape const &, + ParallelTensorShape const & , ParallelTensorShape const &); - } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/ops/groupby.h b/lib/op-attrs/include/op-attrs/ops/groupby.h index 174c40242e..702cbd2a1c 100644 --- a/lib/op-attrs/include/op-attrs/ops/groupby.h +++ b/lib/op-attrs/include/op-attrs/ops/groupby.h @@ -10,10 +10,15 @@ namespace FlexFlow { struct Group_byAttrs { req n; req alpha; + bool is_valid(ParallelTensorShape const &, ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(Group_byAttrs, n, alpha); CHECK_VALID_OP_ATTR(Group_byAttrs); +ParallelTensorShape get_output_shape(Group_byAttrs const &, + ParallelTensorShape const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/gather.cc b/lib/op-attrs/src/gather.cc index 66f4163a6d..d514b439d4 100644 --- a/lib/op-attrs/src/gather.cc +++ b/lib/op-attrs/src/gather.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/gather.h" +#include "utils/exception.decl.h" namespace FlexFlow { @@ -16,10 +17,11 @@ bool GatherAttrs::is_valid(ParallelTensorShape const &lhs, return true; } +//todo: why return a vector? std::vector get_output_shapes(GatherAttrs const & attrs, ParallelTensorShape const & lhs, ParallelTensorShape const & rhs ) { - + NOT_IMPLEMENTED(); } /* bool GatherAttrs::is_valid(ParallelTensorShape const &lhs, diff --git a/lib/op-attrs/src/groupby.cc b/lib/op-attrs/src/groupby.cc index 96c9db2838..efe22e2a25 100644 --- a/lib/op-attrs/src/groupby.cc +++ b/lib/op-attrs/src/groupby.cc @@ -1,3 +1,20 @@ #include "op-attrs/ops/groupby.h" +#include "utils/exception.decl.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + +bool Group_byAttrs::is_valid(ParallelTensorShape const &lhs, + ParallelTensorShape const &rhs) const { + if(!lhs.is_valid() || !rhs.is_valid()) { + return false; + } + NOT_IMPLEMENTED(); +} + +ParallelTensorShape get_output_shape(Group_byAttrs const & attrs, + ParallelTensorShape const & lhs, + ParallelTensorShape const & rhs) { + NOT_IMPLEMENTED(); +} + +} // namespace FlexFlow From c81d5f81179f5b1e0180ff74b2a55c2b56491bf6 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Mon, 9 Oct 2023 14:10:52 +0000 Subject: [PATCH 17/69] add layer norm --- lib/op-attrs/include/op-attrs/ops/layer_norm.h | 4 ++++ lib/op-attrs/src/layer_norm.cc | 18 +++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/lib/op-attrs/include/op-attrs/ops/layer_norm.h b/lib/op-attrs/include/op-attrs/ops/layer_norm.h index dab055b2c9..15b6729262 100644 --- a/lib/op-attrs/include/op-attrs/ops/layer_norm.h +++ b/lib/op-attrs/include/op-attrs/ops/layer_norm.h @@ -12,10 +12,14 @@ struct LayerNormAttrs { stack_vector axes; req elementwise_affine; req eps; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(LayerNormAttrs, axes, elementwise_affine, eps); CHECK_VALID_OP_ATTR(LayerNormAttrs); +ParallelTensorShape get_output_shape(LayerNormAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/layer_norm.cc b/lib/op-attrs/src/layer_norm.cc index ab88de3622..8a660f733c 100644 --- a/lib/op-attrs/src/layer_norm.cc +++ b/lib/op-attrs/src/layer_norm.cc @@ -1,3 +1,19 @@ #include "op-attrs/ops/layer_norm.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + +bool LayerNormAttrs::is_valid(ParallelTensorShape const & input) const { + if(!input.is_valid()) { + return false; + } + return true; +} + +//todo: maybe we need to set the degree of parallel_dim +ParallelTensorShape get_output_shape(LayerNormAttrs const & attrs, + ParallelTensorShape const & input) { + ParallelTensorShape output = input; + return output; +} + +} // namespace FlexFlow From d721da71c8463394939e7a1829fb26a1e7dbc066 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Mon, 9 Oct 2023 14:20:00 +0000 Subject: [PATCH 18/69] add linear --- lib/op-attrs/include/op-attrs/ops/linear.h | 4 ++++ lib/op-attrs/src/linear.cc | 23 +++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/lib/op-attrs/include/op-attrs/ops/linear.h b/lib/op-attrs/include/op-attrs/ops/linear.h index 3be8be2040..54a3864e8d 100644 --- a/lib/op-attrs/include/op-attrs/ops/linear.h +++ b/lib/op-attrs/include/op-attrs/ops/linear.h @@ -29,11 +29,15 @@ struct LinearAttrs { req data_type; req activation; req> regularizer; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT( LinearAttrs, out_channels, use_bias, data_type, activation, regularizer); CHECK_VALID_OP_ATTR(LinearAttrs); +ParallelTensorShape get_output_shape(LinearAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/linear.cc b/lib/op-attrs/src/linear.cc index 16a94e7f6c..3aa361c342 100644 --- a/lib/op-attrs/src/linear.cc +++ b/lib/op-attrs/src/linear.cc @@ -1,3 +1,24 @@ #include "op-attrs/ops/linear.h" +#include "op-attrs/ff_dim.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + +bool LinearAttrs::is_valid(ParallelTensorShape const & input) const { + if(!input.is_valid()) { + return false; + } + return true; +} + +//pytorch: input shape:{batch_size, input_channels} +//pytorch linearattrs: should be {input_channels, output_channels} +//pytorch: output shape:{batch_size, output_channels} +//question: the Linearattrs doesn't have input_channels +ParallelTensorShape get_output_shape(LinearAttrs const & atts, + ParallelTensorShape const & input) { + ParallelTensorShape out_shape = input; + out_shape.at(ff_dim_t(0)).size = atts.out_channels; + return out_shape; +} + +} // namespace FlexFlow From 23a266e019e5e73c4fa26e119be28f0c5cd2aee8 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Mon, 9 Oct 2023 14:30:31 +0000 Subject: [PATCH 19/69] add pool2d --- lib/op-attrs/include/op-attrs/ops/pool_2d.h | 4 +++ lib/op-attrs/src/pool_2d.cc | 31 +++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/lib/op-attrs/include/op-attrs/ops/pool_2d.h b/lib/op-attrs/include/op-attrs/ops/pool_2d.h index efe29b3b2e..b688be85f5 100644 --- a/lib/op-attrs/include/op-attrs/ops/pool_2d.h +++ b/lib/op-attrs/include/op-attrs/ops/pool_2d.h @@ -17,6 +17,7 @@ struct Pool2DAttrs { req kernel_h, kernel_w, stride_h, stride_w, padding_h, padding_w; req pool_type; req activation; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(Pool2DAttrs, kernel_h, @@ -29,6 +30,9 @@ FF_VISITABLE_STRUCT(Pool2DAttrs, activation); CHECK_VALID_OP_ATTR(Pool2DAttrs); +ParallelTensorShape get_output_shape(Pool2DAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow namespace fmt { diff --git a/lib/op-attrs/src/pool_2d.cc b/lib/op-attrs/src/pool_2d.cc index 0867aeb344..8587b114a6 100644 --- a/lib/op-attrs/src/pool_2d.cc +++ b/lib/op-attrs/src/pool_2d.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/pool_2d.h" +#include "op-attrs/ff_dim.h" #include "parallel_dim_mapping_record.h" #include "parallel_dim_mapping_record_solver.h" @@ -39,6 +40,36 @@ static ParallelDimMappingSolution return solve_parallel_dim_mappings(construct_mappings(input), {input}, 0, 1); } +bool Pool2DAttrs::is_valid(ParallelTensorShape const & input) const { + if(!input.is_valid()) { + return false; + } + return true; +} + +//pytorch: we have two type of pool2d, maxpool2d and avgpool2d +//input shape: (batch_size, channels, input_height, input_width) +//for avgpool2d, output shape: (batch_size, channels, 1, 1) +//for maxpool2d, output shape: (batch_size, channels, output_height, output_width) +//output_height = (input_height + 2 * padding_h - kernel_h) / stride_h + 1 +//output_width = (input_width + 2 * padding_w - kernel_w) / stride_w + 1 +ParallelTensorShape get_output_shape(Pool2DAttrs const & attrs, + ParallelTensorShape const & input) { + ParallelTensorShape output_shape = input; + if(attrs.pool_type == PoolOp::AVG) { + output_shape.at(ff_dim_t(2)).size = 1; + output_shape.at(ff_dim_t(3)).size = 1; + } else if(attrs.pool_type == PoolOp::MAX) { + output_shape.at(ff_dim_t(2)).size = (input.at(ff_dim_t(2)).size + 2 * attrs.padding_h - attrs.kernel_h) / attrs.stride_h + 1; + output_shape.at(ff_dim_t(3)).size = (input.at(ff_dim_t(3)).size + 2 * attrs.padding_w - attrs.kernel_w) / attrs.stride_w + 1; + } else { + assert(false && "unsupported pool type"); + } + return output_shape; +} + +} + /* ParallelTensorShape Pool2DAttrs::calculate_output_shape(ParallelTensorShape * const &input) const { */ /* return solve_mappings(input).output_shapes.at(0); */ From ee9bbaabfe5c9c78bc7f0b6424960debf91db97c Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Mon, 9 Oct 2023 14:37:15 +0000 Subject: [PATCH 20/69] leave the reduce --- lib/op-attrs/include/op-attrs/ops/reduce.h | 4 ++++ lib/op-attrs/src/reduce.cc | 14 +++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/lib/op-attrs/include/op-attrs/ops/reduce.h b/lib/op-attrs/include/op-attrs/ops/reduce.h index 193d3b0dc8..c18d4cd888 100644 --- a/lib/op-attrs/include/op-attrs/ops/reduce.h +++ b/lib/op-attrs/include/op-attrs/ops/reduce.h @@ -14,10 +14,14 @@ struct ReduceAttrs { stack_vector axes; req op_type; req keepdims; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(ReduceAttrs, axes, op_type, keepdims); CHECK_VALID_OP_ATTR(ReduceAttrs); +ParallelTensorShape get_output_shape(ReduceAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/reduce.cc b/lib/op-attrs/src/reduce.cc index 9d1770d5be..f6e4c1c829 100644 --- a/lib/op-attrs/src/reduce.cc +++ b/lib/op-attrs/src/reduce.cc @@ -1,3 +1,15 @@ #include "op-attrs/ops/reduce.h" +#include "utils/exception.decl.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + +bool ReduceAttrs::is_valid(ParallelTensorShape const & input) const { + NOT_IMPLEMENTED() +} + +ParallelTensorShape get_output_shape(ReduceAttrs const & attrs, + ParallelTensorShape const & input) { + NOT_IMPLEMENTED() +} + +} // namespace FlexFlow From 5e354baf7e90a902ce2158acc7d06c87e6a5ac1c Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Mon, 9 Oct 2023 14:42:17 +0000 Subject: [PATCH 21/69] add reduction --- lib/op-attrs/include/op-attrs/ops/reduction.h | 4 ++++ lib/op-attrs/src/reduction.cc | 15 +++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/lib/op-attrs/include/op-attrs/ops/reduction.h b/lib/op-attrs/include/op-attrs/ops/reduction.h index f848f879fc..a8e7abd318 100644 --- a/lib/op-attrs/include/op-attrs/ops/reduction.h +++ b/lib/op-attrs/include/op-attrs/ops/reduction.h @@ -11,10 +11,14 @@ namespace FlexFlow { struct ReductionAttrs { ff_dim_t reduction_dim; req reduction_degree; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(ReductionAttrs, reduction_dim, reduction_degree); CHECK_VALID_OP_ATTR(ReductionAttrs); +ParallelTensorShape get_output_shape(ReductionAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/reduction.cc b/lib/op-attrs/src/reduction.cc index 22fc9bab6a..9196000a05 100644 --- a/lib/op-attrs/src/reduction.cc +++ b/lib/op-attrs/src/reduction.cc @@ -10,4 +10,19 @@ namespace FlexFlow { /* return output; */ /* } */ +bool ReductionAttrs::is_valid(ParallelTensorShape const &input) const { + if (!input.is_valid()) { + return false; + } + return true; +} + +ParallelTensorShape get_output_shape(ReductionAttrs const &attrs, + ParallelTensorShape const &input_shape) { + ParallelTensorShape output(input_shape.dims, input_shape.data_type); + output.at(attrs.reduction_dim).degree /= attrs.reduction_degree; + output.at(attrs.reduction_dim).size /= attrs.reduction_degree; + return output; +} + } // namespace FlexFlow From e95f195642d30641716a328140c8759250c00bbc Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Mon, 9 Oct 2023 15:04:56 +0000 Subject: [PATCH 22/69] add reshape --- .../include/op-attrs/ops/repartition.h | 4 +++ lib/op-attrs/include/op-attrs/ops/replicate.h | 4 +++ lib/op-attrs/include/op-attrs/ops/reshape.h | 5 +++ lib/op-attrs/src/repartition.cc | 18 ++++++++++ lib/op-attrs/src/replicate.cc | 17 +++++++++- lib/op-attrs/src/reshape.cc | 33 ++++++++++++++++++- 6 files changed, 79 insertions(+), 2 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/repartition.h b/lib/op-attrs/include/op-attrs/ops/repartition.h index 83c4ae870b..a795017bf4 100644 --- a/lib/op-attrs/include/op-attrs/ops/repartition.h +++ b/lib/op-attrs/include/op-attrs/ops/repartition.h @@ -11,10 +11,14 @@ namespace FlexFlow { struct RepartitionAttrs { ff_dim_t repartition_dim; req repartition_degree; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(RepartitionAttrs, repartition_dim, repartition_degree); CHECK_VALID_OP_ATTR(RepartitionAttrs); +ParallelTensorShape get_output_shape(RepartitionAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/ops/replicate.h b/lib/op-attrs/include/op-attrs/ops/replicate.h index 92e64a4120..c2a9b6abf0 100644 --- a/lib/op-attrs/include/op-attrs/ops/replicate.h +++ b/lib/op-attrs/include/op-attrs/ops/replicate.h @@ -11,10 +11,14 @@ namespace FlexFlow { struct ReplicateAttrs { ff_dim_t replicate_dim; req replicate_degree; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(ReplicateAttrs, replicate_dim, replicate_degree); CHECK_VALID_OP_ATTR(ReplicateAttrs); +ParallelTensorShape get_output_shape(ReplicateAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/ops/reshape.h b/lib/op-attrs/include/op-attrs/ops/reshape.h index b118482a2b..cf0eb07a95 100644 --- a/lib/op-attrs/include/op-attrs/ops/reshape.h +++ b/lib/op-attrs/include/op-attrs/ops/reshape.h @@ -3,16 +3,21 @@ #include "core.h" #include "op-attrs/tensor_shape.h" +#include "op-attrs/parallel_tensor_shape.h" #include "utils/visitable.h" namespace FlexFlow { struct ReshapeAttrs { TensorShape shape; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(ReshapeAttrs, shape); CHECK_VALID_OP_ATTR(ReshapeAttrs); +ParallelTensorShape get_output_shape(ReshapeAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/repartition.cc b/lib/op-attrs/src/repartition.cc index 672e68b4f6..ad037b7cf6 100644 --- a/lib/op-attrs/src/repartition.cc +++ b/lib/op-attrs/src/repartition.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/repartition.h" +#include "op-attrs/parallel_dim.h" namespace FlexFlow { @@ -8,4 +9,21 @@ namespace FlexFlow { /* return (dim.size % this->repartition_degree * dim.degree == 0); */ /* } */ +bool RepartitionAttrs::is_valid(ParallelTensorShape const &input) const { + if (!input.is_valid()) { + return false; + } + ParallelDim dim = input.at(this->repartition_dim); + return (dim.size % this->repartition_degree * dim.degree == 0); +} + +//this may be wrong partition by n multiplies degree by n and keeps shape the same +ParallelTensorShape get_output_shape(RepartitionAttrs const &attrs, + ParallelTensorShape const &input_shape) { + ParallelTensorShape output(input_shape.dims, input_shape.data_type); + output.at(attrs.repartition_dim).degree *= attrs.repartition_degree; + return output; +} + + } // namespace FlexFlow diff --git a/lib/op-attrs/src/replicate.cc b/lib/op-attrs/src/replicate.cc index 73ad288d8c..26861d3624 100644 --- a/lib/op-attrs/src/replicate.cc +++ b/lib/op-attrs/src/replicate.cc @@ -1,3 +1,18 @@ #include "op-attrs/ops/replicate.h" +#include "utils/exception.decl.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + +bool ReplicateAttrs::is_valid(ParallelTensorShape const &input) const { + NOT_IMPLEMENTED(); +} + +//replicate by n multiplies degree by n and shape by n +ParallelTensorShape get_output_shape(ReplicateAttrs const & attrs, + ParallelTensorShape const & input) { + NOT_IMPLEMENTED(); +} + + + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/reshape.cc b/lib/op-attrs/src/reshape.cc index e8349e1f26..777f2eef1b 100644 --- a/lib/op-attrs/src/reshape.cc +++ b/lib/op-attrs/src/reshape.cc @@ -1,3 +1,34 @@ #include "op-attrs/ops/reshape.h" +#include "op-attrs/ff_dim.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + +//pytorch: the input: [2,3,4], shape maybe [-1,6], should we add this? and the output is [4, 6] +bool ReshapeAttrs::is_valid(ParallelTensorShape const &input) const { + if (!input.is_valid()) { + return false; + } + std::size_t input_volume =1; + for (int i = 0; i < input.num_dims(); i++) { + input_volume *= input.at(ff_dim_t(i)).size; + } + std::size_t attrs_volume =1; + for (int i = 0; i < this->shape.dims.num_dims(); i++) { + attrs_volume *= this->shape.at(ff_dim_t(i)); + } + return (input_volume == attrs_volume); +} + +//pytorch: the input: [2,3,4], shape maybe [-1,6], should we add this? and the output is [4, 6] +//currently we doesn't consider the case of -1,we can support this later +//the input:[2,3,4], attrs.shape:[4,6], the output is [4, 6] +ParallelTensorShape get_output_shape(ReshapeAttrs const & attrs, + ParallelTensorShape const & input) { + + assert(attrs.is_valid(input) && "input is not valid"); + ParallelTensorDims dims{attrs.shape.dims}; + ParallelTensorShape output{dims, input.data_type}; + return output; +} + +} // namespace FlexFlow From f053a20c902ff8a17300184bbd13b4e7a771671c Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Mon, 9 Oct 2023 15:19:42 +0000 Subject: [PATCH 23/69] add reverse draft --- lib/op-attrs/include/op-attrs/ops/reverse.h | 5 +++++ lib/op-attrs/src/reduce.cc | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/op-attrs/include/op-attrs/ops/reverse.h b/lib/op-attrs/include/op-attrs/ops/reverse.h index 6030285f14..a0bd38b9c9 100644 --- a/lib/op-attrs/include/op-attrs/ops/reverse.h +++ b/lib/op-attrs/include/op-attrs/ops/reverse.h @@ -4,15 +4,20 @@ #include "core.h" #include "op-attrs/ff_dim.h" #include "utils/visitable.h" +#include "op-attrs/parallel_tensor_shape.h" namespace FlexFlow { struct ReverseAttrs { ff_dim_t axis; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(ReverseAttrs, axis); CHECK_VALID_OP_ATTR(ReverseAttrs); +ParallelTensorShape get_output_shape(ReverseAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/reduce.cc b/lib/op-attrs/src/reduce.cc index f6e4c1c829..2c42e5cfad 100644 --- a/lib/op-attrs/src/reduce.cc +++ b/lib/op-attrs/src/reduce.cc @@ -1,5 +1,5 @@ #include "op-attrs/ops/reduce.h" -#include "utils/exception.decl.h" +#include "utils/exceptions.h" namespace FlexFlow { From 6f6f61e21cab4a2b30617ce0a22a2b4dbaa24e72 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Mon, 9 Oct 2023 15:21:44 +0000 Subject: [PATCH 24/69] add layer norm valid check --- lib/op-attrs/src/layer_norm.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/op-attrs/src/layer_norm.cc b/lib/op-attrs/src/layer_norm.cc index 8a660f733c..4e706cbd28 100644 --- a/lib/op-attrs/src/layer_norm.cc +++ b/lib/op-attrs/src/layer_norm.cc @@ -6,6 +6,9 @@ bool LayerNormAttrs::is_valid(ParallelTensorShape const & input) const { if(!input.is_valid()) { return false; } + if(input.num_dims() < 2) { + return false; + } return true; } From d47198e8c527965e33cfe90e95da262b7a16ff2a Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Wed, 11 Oct 2023 13:07:33 +0000 Subject: [PATCH 25/69] add replicate --- lib/op-attrs/src/replicate.cc | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/lib/op-attrs/src/replicate.cc b/lib/op-attrs/src/replicate.cc index 26861d3624..b5e3e81d7e 100644 --- a/lib/op-attrs/src/replicate.cc +++ b/lib/op-attrs/src/replicate.cc @@ -1,16 +1,34 @@ #include "op-attrs/ops/replicate.h" +#include "op-attrs/parallel_dim.h" #include "utils/exception.decl.h" namespace FlexFlow { bool ReplicateAttrs::is_valid(ParallelTensorShape const &input) const { - NOT_IMPLEMENTED(); + if(!input.is_valid()) { + return false; + } + if(this->replicate_dim >= input.num_dims() || this->replicate_degree <= 0) { + return false; + } + + return true; } //replicate by n multiplies degree by n and shape by n +//seems it is like pytorch's repeat +//original_tensor = torch.tensor([1, 2, 3]) torch.Size([3]) +///replicated_tensor = original_tensor.repeat(3) torch.Size([9]) + +//original_tensor = torch.randn(2, 3, 4) torch.Size([2, 3, 4]) +//repeated_tensor = original_tensor.repeat(3, 1, 1) torch.Size([6, 3, 4]) + ParallelTensorShape get_output_shape(ReplicateAttrs const & attrs, ParallelTensorShape const & input) { - NOT_IMPLEMENTED(); + assert(attrs.is_valid(input)); + ParallelTensorShape output = input; + output.at(attrs.replicate_dim).size *= attrs.replicate_degree; + return output; } From ea0297e5856d5a0c043a1ad4e6eaed3ed889a3f3 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Wed, 11 Oct 2023 13:19:34 +0000 Subject: [PATCH 26/69] add softmax --- lib/op-attrs/include/op-attrs/ops/softmax.h | 4 ++++ lib/op-attrs/src/softmax.cc | 21 ++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/lib/op-attrs/include/op-attrs/ops/softmax.h b/lib/op-attrs/include/op-attrs/ops/softmax.h index 9a776737f5..8e7a00e661 100644 --- a/lib/op-attrs/include/op-attrs/ops/softmax.h +++ b/lib/op-attrs/include/op-attrs/ops/softmax.h @@ -10,10 +10,14 @@ namespace FlexFlow { struct SoftmaxAttrs { ff_dim_t dim; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(SoftmaxAttrs, dim); CHECK_VALID_OP_ATTR(SoftmaxAttrs); +ParallelTensorShape get_output_shape(SoftmaxAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/softmax.cc b/lib/op-attrs/src/softmax.cc index 9f95da4fb7..f5795cc037 100644 --- a/lib/op-attrs/src/softmax.cc +++ b/lib/op-attrs/src/softmax.cc @@ -1,3 +1,22 @@ #include "op-attrs/ops/softmax.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + +bool SoftmaxAttrs::is_valid(ParallelTensorShape const &input) const { + if(!input.is_valid()) { + return false; + } + if(input.num_dims() < 2) { + return false; + } + return true; +} + +ParallelTensorShape get_output_shape(SoftmaxAttrs const & attrs, + ParallelTensorShape const & input) { + assert(attrs.is_valid(input)); + ParallelTensorShape output = input; + return output; +} + +} // namespace FlexFlow From 125a9ad5b98c07e0421bdc5e32e2942686002758 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Wed, 11 Oct 2023 14:08:12 +0000 Subject: [PATCH 27/69] add split --- lib/op-attrs/include/op-attrs/ops/split.h | 3 ++ lib/op-attrs/src/split.cc | 34 ++++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/lib/op-attrs/include/op-attrs/ops/split.h b/lib/op-attrs/include/op-attrs/ops/split.h index fa66bc46f5..02b5e3b45e 100644 --- a/lib/op-attrs/include/op-attrs/ops/split.h +++ b/lib/op-attrs/include/op-attrs/ops/split.h @@ -11,9 +11,12 @@ namespace FlexFlow { struct SplitAttrs { req> splits; ff_dim_t axis; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(SplitAttrs, splits, axis); CHECK_VALID_OP_ATTR(SplitAttrs); +std::vector get_output_shapes(SplitAttrs const &, + ParallelTensorShape const &); } // namespace FlexFlow diff --git a/lib/op-attrs/src/split.cc b/lib/op-attrs/src/split.cc index acda8f3262..6ba0b711d1 100644 --- a/lib/op-attrs/src/split.cc +++ b/lib/op-attrs/src/split.cc @@ -1,3 +1,35 @@ #include "op-attrs/ops/split.h" +#include "op-attrs/ff_dim.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + +bool SplitAttrs::is_valid(ParallelTensorShape const & input) const { + if(!input.is_valid()) { + return false; + } + std::size_t dims_sum = 0; + + for(std::size_t i = 0; i < this->splits.size(); ++i) { + dims_sum += splits[i]; + } + + if(dims_sum != input.at(ff_dim_t(axis)).size) { + return false; + } + return true; +} + + +std::vector get_output_shapes(SplitAttrs const & attrs, + ParallelTensorShape const & input) { + + assert(attrs.is_valid(input)); + std::vector outputs; + for(std::size_t i = 0 ; i < attrs.splits.size(); ++i) { + outputs.emplace_back(input); + outputs.back().at(ff_dim_t(attrs.axis)).size = attrs.splits[i]; + } + return outputs; +} + +} // namespace FlexFlow From 61e09c650dd6ed6e114b40e7cda321fc09c58aff Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Wed, 11 Oct 2023 14:24:58 +0000 Subject: [PATCH 28/69] add topk --- lib/op-attrs/include/op-attrs/ops/split.h | 3 +-- lib/op-attrs/include/op-attrs/ops/topk.h | 9 ++++++++- lib/op-attrs/src/topk.cc | 24 ++++++++++++++++++++++- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/split.h b/lib/op-attrs/include/op-attrs/ops/split.h index 02b5e3b45e..94a648382f 100644 --- a/lib/op-attrs/include/op-attrs/ops/split.h +++ b/lib/op-attrs/include/op-attrs/ops/split.h @@ -7,7 +7,6 @@ #include "utils/visitable.h" namespace FlexFlow { - struct SplitAttrs { req> splits; ff_dim_t axis; @@ -17,7 +16,7 @@ FF_VISITABLE_STRUCT(SplitAttrs, splits, axis); CHECK_VALID_OP_ATTR(SplitAttrs); std::vector get_output_shapes(SplitAttrs const &, ParallelTensorShape const &); - + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/ops/topk.h b/lib/op-attrs/include/op-attrs/ops/topk.h index 413855913c..0db94fcaf4 100644 --- a/lib/op-attrs/include/op-attrs/ops/topk.h +++ b/lib/op-attrs/include/op-attrs/ops/topk.h @@ -7,13 +7,20 @@ namespace FlexFlow { +//I think we should add axis +//pytorch code: torch.topk(input_tensor, k, largest=True, sorted=True, dim=dim) struct TopKAttrs { req k; req sorted; + req axis; + bool is_valid(ParallelTensorShape const &) const; }; -FF_VISITABLE_STRUCT(TopKAttrs, k, sorted); +FF_VISITABLE_STRUCT(TopKAttrs, k, sorted,axis); CHECK_VALID_OP_ATTR(TopKAttrs); +ParallelTensorShape get_output_shape(TopKAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/topk.cc b/lib/op-attrs/src/topk.cc index 9d701e4868..00c2d97902 100644 --- a/lib/op-attrs/src/topk.cc +++ b/lib/op-attrs/src/topk.cc @@ -1,3 +1,25 @@ #include "op-attrs/ops/topk.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + +bool TopKAttrs::is_valid(ParallelTensorShape const & input) const { + if(!input.is_valid()) { + return false; + } + + if(k > input.at(ff_dim_t(axis)).size) { + return false; + } + return true; +} + + +ParallelTensorShape get_output_shape(TopKAttrs const & attrs, + ParallelTensorShape const & input) { + assert(attrs.is_valid(input)); + ParallelTensorShape output = input; + output.at(ff_dim_t(attrs.axis)).size = attrs.k; + return output; +} + +} // namespace FlexFlow From f3d65246d8094623668f8ba7dd6c50f80f72b3e5 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Wed, 11 Oct 2023 14:33:54 +0000 Subject: [PATCH 29/69] add transpose --- lib/op-attrs/include/op-attrs/ops/attention.h | 1 - .../include/op-attrs/ops/batch_matmul.h | 3 +- .../include/op-attrs/ops/batch_norm.h | 3 +- lib/op-attrs/include/op-attrs/ops/concat.h | 2 +- lib/op-attrs/include/op-attrs/ops/conv_2d.h | 2 +- .../include/op-attrs/ops/element_binary.h | 4 +- lib/op-attrs/include/op-attrs/ops/embedding.h | 2 +- lib/op-attrs/include/op-attrs/ops/gather.h | 3 +- lib/op-attrs/include/op-attrs/ops/reshape.h | 4 +- lib/op-attrs/include/op-attrs/ops/reverse.h | 2 +- lib/op-attrs/include/op-attrs/ops/split.h | 2 +- lib/op-attrs/include/op-attrs/ops/topk.h | 6 +- lib/op-attrs/include/op-attrs/ops/transpose.h | 4 ++ lib/op-attrs/src/attention.cc | 30 +++++----- lib/op-attrs/src/batch_matmul.cc | 38 ++++++------- lib/op-attrs/src/batch_norm.cc | 19 ++++--- lib/op-attrs/src/cast.cc | 8 +-- lib/op-attrs/src/combine.cc | 14 ++--- lib/op-attrs/src/conv_2d.cc | 27 +++++---- lib/op-attrs/src/dropout.cc | 14 ++--- lib/op-attrs/src/element_binary.cc | 18 +++--- lib/op-attrs/src/element_unary.cc | 14 ++--- lib/op-attrs/src/embedding.cc | 24 ++++---- lib/op-attrs/src/flat.cc | 5 +- lib/op-attrs/src/gather.cc | 9 +-- lib/op-attrs/src/get_output_shapes.cc | 2 - lib/op-attrs/src/groupby.cc | 16 +++--- lib/op-attrs/src/layer_norm.cc | 26 ++++----- lib/op-attrs/src/linear.cc | 28 +++++----- lib/op-attrs/src/pool_2d.cc | 55 +++++++++++-------- lib/op-attrs/src/reduce.cc | 11 ++-- lib/op-attrs/src/repartition.cc | 4 +- lib/op-attrs/src/replicate.cc | 22 ++++---- lib/op-attrs/src/reshape.cc | 39 ++++++------- lib/op-attrs/src/reverse.cc | 23 ++++++++ lib/op-attrs/src/softmax.cc | 24 ++++---- lib/op-attrs/src/split.cc | 50 ++++++++--------- lib/op-attrs/src/topk.cc | 29 +++++----- lib/op-attrs/src/transpose.cc | 40 +++++++++++++- 39 files changed, 352 insertions(+), 275 deletions(-) create mode 100644 lib/op-attrs/src/reverse.cc diff --git a/lib/op-attrs/include/op-attrs/ops/attention.h b/lib/op-attrs/include/op-attrs/ops/attention.h index 79469206a3..670e4018cc 100644 --- a/lib/op-attrs/include/op-attrs/ops/attention.h +++ b/lib/op-attrs/include/op-attrs/ops/attention.h @@ -7,7 +7,6 @@ namespace FlexFlow { - template struct MultiHeadAttentionInputs : public use_visitable_cmp> { diff --git a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h index 6473f923a2..f64b2fd8fb 100644 --- a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h +++ b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h @@ -9,8 +9,7 @@ namespace FlexFlow { struct BatchMatmulAttrs { req a_seq_length_dim, b_seq_length_dim; - bool is_valid(ParallelTensorShape const &, - ParallelTensorShape const &); + bool is_valid(ParallelTensorShape const &, ParallelTensorShape const &); }; FF_VISITABLE_STRUCT(BatchMatmulAttrs, a_seq_length_dim, b_seq_length_dim); diff --git a/lib/op-attrs/include/op-attrs/ops/batch_norm.h b/lib/op-attrs/include/op-attrs/ops/batch_norm.h index 65ab18c33c..c35d7bcd41 100644 --- a/lib/op-attrs/include/op-attrs/ops/batch_norm.h +++ b/lib/op-attrs/include/op-attrs/ops/batch_norm.h @@ -13,7 +13,8 @@ struct BatchNormAttrs { }; FF_VISITABLE_STRUCT(BatchNormAttrs, relu); -ParallelTensorShape get_output_shape(BatchNormAttrs const &, ParallelTensorShape const &); +ParallelTensorShape get_output_shape(BatchNormAttrs const &, + ParallelTensorShape const &); CHECK_VALID_OP_ATTR(BatchNormAttrs); diff --git a/lib/op-attrs/include/op-attrs/ops/concat.h b/lib/op-attrs/include/op-attrs/ops/concat.h index afe3e0dd8d..84def59066 100644 --- a/lib/op-attrs/include/op-attrs/ops/concat.h +++ b/lib/op-attrs/include/op-attrs/ops/concat.h @@ -10,7 +10,7 @@ namespace FlexFlow { struct ConcatAttrs { ff_dim_t axis; - bool is_valid(std::vector const & input) const; + bool is_valid(std::vector const &input) const; }; ParallelTensorShape get_output_shape(ConcatAttrs const &, diff --git a/lib/op-attrs/include/op-attrs/ops/conv_2d.h b/lib/op-attrs/include/op-attrs/ops/conv_2d.h index c8491877a7..31290d153e 100644 --- a/lib/op-attrs/include/op-attrs/ops/conv_2d.h +++ b/lib/op-attrs/include/op-attrs/ops/conv_2d.h @@ -14,7 +14,7 @@ struct Conv2DAttrs { padding_w, groups; req> activation; req use_bias; - bool is_valid(TensorShape const & input) const; + bool is_valid(TensorShape const &input) const; }; FF_VISITABLE_STRUCT(Conv2DAttrs, diff --git a/lib/op-attrs/include/op-attrs/ops/element_binary.h b/lib/op-attrs/include/op-attrs/ops/element_binary.h index f455333347..c068fcc45c 100644 --- a/lib/op-attrs/include/op-attrs/ops/element_binary.h +++ b/lib/op-attrs/include/op-attrs/ops/element_binary.h @@ -14,8 +14,8 @@ struct ElementBinaryAttrs { req compute_type; req should_broadcast_lhs; req should_broadcast_rhs; - bool is_valid(ParallelTensorShape const & lhs, - ParallelTensorShape const & rhs) const; + bool is_valid(ParallelTensorShape const &lhs, + ParallelTensorShape const &rhs) const; }; FF_VISITABLE_STRUCT(ElementBinaryAttrs, type, diff --git a/lib/op-attrs/include/op-attrs/ops/embedding.h b/lib/op-attrs/include/op-attrs/ops/embedding.h index c7af920d5a..506b8a6186 100644 --- a/lib/op-attrs/include/op-attrs/ops/embedding.h +++ b/lib/op-attrs/include/op-attrs/ops/embedding.h @@ -19,7 +19,7 @@ struct EmbeddingAttrs { req num_entries, out_channels; req aggr; req data_type; - bool is_valid(ParallelTensorShape const & input) const; + bool is_valid(ParallelTensorShape const &input) const; }; FF_VISITABLE_STRUCT(EmbeddingAttrs, num_entries, out_channels, aggr, data_type); CHECK_VALID_OP_ATTR(EmbeddingAttrs); diff --git a/lib/op-attrs/include/op-attrs/ops/gather.h b/lib/op-attrs/include/op-attrs/ops/gather.h index 44ec8b1fd7..1789edf649 100644 --- a/lib/op-attrs/include/op-attrs/ops/gather.h +++ b/lib/op-attrs/include/op-attrs/ops/gather.h @@ -15,9 +15,8 @@ struct GatherAttrs { FF_VISITABLE_STRUCT(GatherAttrs, dim); CHECK_VALID_OP_ATTR(GatherAttrs); - std::vector get_output_shapes(GatherAttrs const &, - ParallelTensorShape const & , + ParallelTensorShape const &, ParallelTensorShape const &); } // namespace FlexFlow diff --git a/lib/op-attrs/include/op-attrs/ops/reshape.h b/lib/op-attrs/include/op-attrs/ops/reshape.h index cf0eb07a95..7fbe573c93 100644 --- a/lib/op-attrs/include/op-attrs/ops/reshape.h +++ b/lib/op-attrs/include/op-attrs/ops/reshape.h @@ -2,15 +2,15 @@ #define _FLEXFLOW_RESHAPE_ATTRS_H #include "core.h" -#include "op-attrs/tensor_shape.h" #include "op-attrs/parallel_tensor_shape.h" +#include "op-attrs/tensor_shape.h" #include "utils/visitable.h" namespace FlexFlow { struct ReshapeAttrs { TensorShape shape; - bool is_valid(ParallelTensorShape const &) const; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(ReshapeAttrs, shape); CHECK_VALID_OP_ATTR(ReshapeAttrs); diff --git a/lib/op-attrs/include/op-attrs/ops/reverse.h b/lib/op-attrs/include/op-attrs/ops/reverse.h index a0bd38b9c9..0c8657c6ec 100644 --- a/lib/op-attrs/include/op-attrs/ops/reverse.h +++ b/lib/op-attrs/include/op-attrs/ops/reverse.h @@ -3,8 +3,8 @@ #include "core.h" #include "op-attrs/ff_dim.h" -#include "utils/visitable.h" #include "op-attrs/parallel_tensor_shape.h" +#include "utils/visitable.h" namespace FlexFlow { diff --git a/lib/op-attrs/include/op-attrs/ops/split.h b/lib/op-attrs/include/op-attrs/ops/split.h index 94a648382f..e2abeb2581 100644 --- a/lib/op-attrs/include/op-attrs/ops/split.h +++ b/lib/op-attrs/include/op-attrs/ops/split.h @@ -16,7 +16,7 @@ FF_VISITABLE_STRUCT(SplitAttrs, splits, axis); CHECK_VALID_OP_ATTR(SplitAttrs); std::vector get_output_shapes(SplitAttrs const &, ParallelTensorShape const &); - + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/ops/topk.h b/lib/op-attrs/include/op-attrs/ops/topk.h index 0db94fcaf4..914ac1afc2 100644 --- a/lib/op-attrs/include/op-attrs/ops/topk.h +++ b/lib/op-attrs/include/op-attrs/ops/topk.h @@ -7,15 +7,15 @@ namespace FlexFlow { -//I think we should add axis -//pytorch code: torch.topk(input_tensor, k, largest=True, sorted=True, dim=dim) +// I think we should add axis +// pytorch code: torch.topk(input_tensor, k, largest=True, sorted=True, dim=dim) struct TopKAttrs { req k; req sorted; req axis; bool is_valid(ParallelTensorShape const &) const; }; -FF_VISITABLE_STRUCT(TopKAttrs, k, sorted,axis); +FF_VISITABLE_STRUCT(TopKAttrs, k, sorted, axis); CHECK_VALID_OP_ATTR(TopKAttrs); ParallelTensorShape get_output_shape(TopKAttrs const &, diff --git a/lib/op-attrs/include/op-attrs/ops/transpose.h b/lib/op-attrs/include/op-attrs/ops/transpose.h index 87db435979..461aa0aacb 100644 --- a/lib/op-attrs/include/op-attrs/ops/transpose.h +++ b/lib/op-attrs/include/op-attrs/ops/transpose.h @@ -10,10 +10,14 @@ namespace FlexFlow { struct TransposeAttrs { req> perm; + bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(TransposeAttrs, perm); CHECK_VALID_OP_ATTR(TransposeAttrs); +ParallelTensorShape get_output_shape(TransposeAttrs const &, + ParallelTensorShape const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/attention.cc b/lib/op-attrs/src/attention.cc index c8148cd45d..a6ae56ddfd 100644 --- a/lib/op-attrs/src/attention.cc +++ b/lib/op-attrs/src/attention.cc @@ -11,7 +11,8 @@ namespace FlexFlow { /* return is_valid; */ /* } */ -bool MultiHeadAttentionAttrs::is_valid(MultiHeadAttentionInputs const & input) const { +bool MultiHeadAttentionAttrs::is_valid( + MultiHeadAttentionInputs const &input) const { bool valid = true; valid &= input.key.is_valid(); valid &= input.query.is_valid(); @@ -61,20 +62,22 @@ TensorShape return {dims, DataType::FLOAT}; } -//these two functions are not defined in the attention.h -// ParallelTensorShape get_output_shape(MultiHeadAttentionAttrs const &attrs, -// ParallelTensorShape const &query_shape, -// ParallelTensorShape const &key_shape, -// ParallelTensorShape const &value_shape) { -// /* ParallelDim replica_dim = query_shape.at(ff_dim_t(query_shape.num_dims() - -// * 2)); */ -// /* replica_dim.size = replica_dim.degree; */ +// these two functions are not defined in the attention.h +// ParallelTensorShape get_output_shape(MultiHeadAttentionAttrs const &attrs, +// ParallelTensorShape const &query_shape, +// ParallelTensorShape const &key_shape, +// ParallelTensorShape const &value_shape) +// { +// /* ParallelDim replica_dim = +// query_shape.at(ff_dim_t(query_shape.num_dims() - +// * 2)); */ +// /* replica_dim.size = replica_dim.degree; */ // /* ParallelDim */ // ParallelTensorShape output_shape = query_shape; -// output_shape.at(ff_dim_t(output_shape.num_dims() - 1)).size = attrs.embed_dim; -// return output_shape; +// output_shape.at(ff_dim_t(output_shape.num_dims() - 1)).size = +// attrs.embed_dim; return output_shape; // } // TensorShape get_output_shape(MultiHeadAttentionAttrs const &attrs, @@ -89,8 +92,9 @@ TensorShape // return get_tensor_shape_unsafe(parallel_shape); // } -ParallelTensorShape get_output_shape(MultiHeadAttentionAttrs const & attrs, - MultiHeadAttentionInputs const &inputs) { +ParallelTensorShape get_output_shape( + MultiHeadAttentionAttrs const &attrs, + MultiHeadAttentionInputs const &inputs) { ParallelTensorShape output_shape = inputs.query; output_shape.at(ff_dim_t(output_shape.num_dims() - 1)).size = attrs.embed_dim; return output_shape; diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index 170ee655d2..f30869e035 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -4,30 +4,30 @@ namespace FlexFlow { -//maybe we should add more check here -bool BatchMatmulAttrs::is_valid(ParallelTensorShape const & lhs, ParallelTensorShape const & rhs) { - if (!lhs.is_valid() || !rhs.is_valid()) { - return false; - } - if (lhs.num_dims() != rhs.num_dims()) { - return false; - } - - return true; +// maybe we should add more check here +bool BatchMatmulAttrs::is_valid(ParallelTensorShape const &lhs, + ParallelTensorShape const &rhs) { + if (!lhs.is_valid() || !rhs.is_valid()) { + return false; + } + if (lhs.num_dims() != rhs.num_dims()) { + return false; + } + + return true; } -//how to get the batch size? and lhs: [b, s1, k], rhs: [b, k, s1] -ParallelTensorShape get_output_shape(BatchMatmulAttrs const & attrs, - ParallelTensorShape const & lhs, - ParallelTensorShape const & rhs) { - ParallelTensorShape output_shape = lhs; +// how to get the batch size? and lhs: [b, s1, k], rhs: [b, k, s1] +ParallelTensorShape get_output_shape(BatchMatmulAttrs const &attrs, + ParallelTensorShape const &lhs, + ParallelTensorShape const &rhs) { + ParallelTensorShape output_shape = lhs; output_shape.at(ff_dim_t(0)).size = lhs.at(ff_dim_t(0)).size; output_shape.at(ff_dim_t(1)).size = attrs.a_seq_length_dim; output_shape.at(ff_dim_t(2)).size = attrs.b_seq_length_dim; - //TODO: Do we need to set the ParallelDim for output_shape - return output_shape; -} - + // TODO: Do we need to set the ParallelDim for output_shape + return output_shape; +} /* bool BatchMatmulAttrs::is_valid( */ /* ParallelTensorShape const &lhs, ParallelTensorShape const &rhs) const { diff --git a/lib/op-attrs/src/batch_norm.cc b/lib/op-attrs/src/batch_norm.cc index a1123667d2..9b15913d1f 100644 --- a/lib/op-attrs/src/batch_norm.cc +++ b/lib/op-attrs/src/batch_norm.cc @@ -1,16 +1,17 @@ #include "op-attrs/ops/batch_norm.h" namespace FlexFlow { - - bool BatchNormAttrs::is_valid(ParallelTensorShape const & input) { - if(!input.is_valid()) { - return false; - } - return true; - } -ParallelTensorShape get_output_shape(BatchNormAttrs const & attrs, ParallelTensorShape const & input) { - return input; +bool BatchNormAttrs::is_valid(ParallelTensorShape const &input) { + if (!input.is_valid()) { + return false; + } + return true; +} + +ParallelTensorShape get_output_shape(BatchNormAttrs const &attrs, + ParallelTensorShape const &input) { + return input; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/cast.cc b/lib/op-attrs/src/cast.cc index e7dab4689f..d3a6961a2e 100644 --- a/lib/op-attrs/src/cast.cc +++ b/lib/op-attrs/src/cast.cc @@ -3,10 +3,10 @@ namespace FlexFlow { bool CastAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - return true; + if (!input.is_valid()) { + return false; + } + return true; } ParallelTensorShape get_output_shape(CastAttrs const &attrs, diff --git a/lib/op-attrs/src/combine.cc b/lib/op-attrs/src/combine.cc index 8cfe6dfb8c..7814442926 100644 --- a/lib/op-attrs/src/combine.cc +++ b/lib/op-attrs/src/combine.cc @@ -4,17 +4,17 @@ namespace FlexFlow { bool CombineAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - return true; + if (!input.is_valid()) { + return false; + } + return true; } -ParallelTensorShape get_output_shape(CombineAttrs const & attrs, - ParallelTensorShape const & input) { +ParallelTensorShape get_output_shape(CombineAttrs const &attrs, + ParallelTensorShape const &input) { ParallelTensorShape output = input_shape; output.at(attrs.combine_dim).degree /= attrs.combine_degree; - return output; + return output; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/conv_2d.cc b/lib/op-attrs/src/conv_2d.cc index d5d2ad90c1..42085ca2b1 100644 --- a/lib/op-attrs/src/conv_2d.cc +++ b/lib/op-attrs/src/conv_2d.cc @@ -88,19 +88,24 @@ bool Conv2DAttrs::is_valid(TensorShape const &input) const { return true; } -//according to pytorch, the input shape: [b, input_channel, input_h, input_w] -//kernel shape: [output_channel, input_channel, kernel_h, kernel_w] -//we may have stide_h and padding_h -//output shape: [b, output_channel, output_h, output_w] -//output_h = (input_h + 2 * padding_h - kernel_h) / stride_h + 1 -//output_w = (input_w + 2 * padding_w - kernel_w) / stride_w + 1 -ParallelTensorShape get_output_shape(Conv2DAttrs const & attrs, - ParallelTensorShape const & input) { +// according to pytorch, the input shape: [b, input_channel, input_h, input_w] +// kernel shape: [output_channel, input_channel, kernel_h, kernel_w] +// we may have stide_h and padding_h +// output shape: [b, output_channel, output_h, output_w] +// output_h = (input_h + 2 * padding_h - kernel_h) / stride_h + 1 +// output_w = (input_w + 2 * padding_w - kernel_w) / stride_w + 1 +ParallelTensorShape get_output_shape(Conv2DAttrs const &attrs, + ParallelTensorShape const &input) { ParallelTensorShape output = input; output.at(ff_dim_t(1)).size = attrs.out_channels; - output.at(ff_dim_t(2)).size = (input.at(ff_dim_t(2)).size + - 2 * attrs.padding_h - attrs.kernel_h) / attrs.stride_h + 1; - output.at(ff_dim_t(3)).size = (input.at(ff_dim_t(3)).size + 2 * attrs.padding_w - attrs.kernel_w) /attrs.stride_w +1; + output.at(ff_dim_t(2)).size = + (input.at(ff_dim_t(2)).size + 2 * attrs.padding_h - attrs.kernel_h) / + attrs.stride_h + + 1; + output.at(ff_dim_t(3)).size = + (input.at(ff_dim_t(3)).size + 2 * attrs.padding_w - attrs.kernel_w) / + attrs.stride_w + + 1; return output; } diff --git a/lib/op-attrs/src/dropout.cc b/lib/op-attrs/src/dropout.cc index dc60d8cf94..bccfdb10a2 100644 --- a/lib/op-attrs/src/dropout.cc +++ b/lib/op-attrs/src/dropout.cc @@ -1,13 +1,13 @@ #include "dropout.h" #include "op-attrs/get_output_shapes.h" -namespace FlexFlow { +namespace FlexFlow { -bool DropoutAttrs::is_valid(ParallelTensorShape const & input) const { - if(!input.is_valid()) { - return false; - } - return true; +bool DropoutAttrs::is_valid(ParallelTensorShape const &input) const { + if (!input.is_valid()) { + return false; + } + return true; } ParallelTensorShape get_output_shape(DropoutAttrs const &attrs, @@ -16,4 +16,4 @@ ParallelTensorShape get_output_shape(DropoutAttrs const &attrs, return output; } -} // namespace FlexFlow \ No newline at end of file +} // namespace FlexFlow diff --git a/lib/op-attrs/src/element_binary.cc b/lib/op-attrs/src/element_binary.cc index 2e0b8f8e34..4b20ee25a9 100644 --- a/lib/op-attrs/src/element_binary.cc +++ b/lib/op-attrs/src/element_binary.cc @@ -2,17 +2,17 @@ namespace FlexFlow { -bool ElementBinaryAttrs::is_valid(ParallelTensorShape const & input1, - ParallelTensorShape const & input2) const { - if(!input1.is_valid() || !input2.is_valid()) { - return false; - } - return true; +bool ElementBinaryAttrs::is_valid(ParallelTensorShape const &input1, + ParallelTensorShape const &input2) const { + if (!input1.is_valid() || !input2.is_valid()) { + return false; + } + return true; } -ParallelTensorShape get_output_shape(ElementBinaryAttrs const & atts, - ParallelTensorShape const & lhs, - ParallelTensorShape const & rhs) { +ParallelTensorShape get_output_shape(ElementBinaryAttrs const &atts, + ParallelTensorShape const &lhs, + ParallelTensorShape const &rhs) { ParallelTensorShape output = lhs.num_dims() >= rhs.num_dims() ? lhs : rhs; for (int i = 0; i < output.num_dims(); i++) { if (i >= lhs.num_dims()) { diff --git a/lib/op-attrs/src/element_unary.cc b/lib/op-attrs/src/element_unary.cc index b59ba92529..1fd11abe05 100644 --- a/lib/op-attrs/src/element_unary.cc +++ b/lib/op-attrs/src/element_unary.cc @@ -2,15 +2,15 @@ namespace FlexFlow { -bool ElementUnaryAttrs::is_valid(ParallelTensorShape const & input) const { - if(!input.is_valid()) { - return false; - } - return true; +bool ElementUnaryAttrs::is_valid(ParallelTensorShape const &input) const { + if (!input.is_valid()) { + return false; + } + return true; } -ParallelTensorShape get_output_shape(ElementUnaryAttrs const & atts, - ParallelTensorShape const & input) { +ParallelTensorShape get_output_shape(ElementUnaryAttrs const &atts, + ParallelTensorShape const &input) { ParallelTensorShape output = input; return output; diff --git a/lib/op-attrs/src/embedding.cc b/lib/op-attrs/src/embedding.cc index b782e1282c..dca6e393ef 100644 --- a/lib/op-attrs/src/embedding.cc +++ b/lib/op-attrs/src/embedding.cc @@ -2,21 +2,21 @@ namespace FlexFlow { -bool EmbeddingAttrs::is_valid(ParallelTensorShape const & input) const { - if(!input.is_valid()) { - return false; - } - return true; +bool EmbeddingAttrs::is_valid(ParallelTensorShape const &input) const { + if (!input.is_valid()) { + return false; + } + return true; } -//pytorch nn.Embedding -//Embedding OP: (num_embeddings, embedding_dim) (num_entries, out_channels) -//Input: (batch_size, seq_len) -//Output: (batch_size, seq_len, embedding_dim) -ParallelTensorShape get_output_shape(EmbeddingAttrs const & atts, - ParallelTensorShape const & input) { +// pytorch nn.Embedding +// Embedding OP: (num_embeddings, embedding_dim) (num_entries, out_channels) +// Input: (batch_size, seq_len) +// Output: (batch_size, seq_len, embedding_dim) +ParallelTensorShape get_output_shape(EmbeddingAttrs const &atts, + ParallelTensorShape const &input) { ParallelTensorShape output = input; output.at(ff_dim_t(1)).size = input.at(ff_dim_t(1)).size; - output.at(ff_dim_t(2)).size= atts.out_channels; + output.at(ff_dim_t(2)).size = atts.out_channels; return output; } // namespace FlexFlow diff --git a/lib/op-attrs/src/flat.cc b/lib/op-attrs/src/flat.cc index 65da5be1f0..ae351328b7 100644 --- a/lib/op-attrs/src/flat.cc +++ b/lib/op-attrs/src/flat.cc @@ -14,8 +14,9 @@ namespace Output { constexpr int NUMDIM = 3, CHANNEL = 0, SAMPLE = 1, REPLICA = 2; } -//flat is like the pytorch view -//tensor = torch.randn(2, 3, 4) ,flattened_tensor = tensor.view(-1) #shape: (24) +// flat is like the pytorch view +// tensor = torch.randn(2, 3, 4) ,flattened_tensor = tensor.view(-1) #shape: +// (24) ParallelTensorShape get_output_shape(FlatAttrs const &attrs, ParallelTensorShape const &input) { ParallelTensorShape output_shape(input.dims, input.data_type); diff --git a/lib/op-attrs/src/gather.cc b/lib/op-attrs/src/gather.cc index d514b439d4..25bfe8e516 100644 --- a/lib/op-attrs/src/gather.cc +++ b/lib/op-attrs/src/gather.cc @@ -17,10 +17,11 @@ bool GatherAttrs::is_valid(ParallelTensorShape const &lhs, return true; } -//todo: why return a vector? -std::vector get_output_shapes(GatherAttrs const & attrs, - ParallelTensorShape const & lhs, - ParallelTensorShape const & rhs ) { +// todo: why return a vector? +std::vector + get_output_shapes(GatherAttrs const &attrs, + ParallelTensorShape const &lhs, + ParallelTensorShape const &rhs) { NOT_IMPLEMENTED(); } diff --git a/lib/op-attrs/src/get_output_shapes.cc b/lib/op-attrs/src/get_output_shapes.cc index b41912d577..f44a677873 100644 --- a/lib/op-attrs/src/get_output_shapes.cc +++ b/lib/op-attrs/src/get_output_shapes.cc @@ -20,6 +20,4 @@ TensorShape get_output_shape(AggregateAttrs const &attrs, as_parallel(exp_preds))); } - - } // namespace FlexFlow diff --git a/lib/op-attrs/src/groupby.cc b/lib/op-attrs/src/groupby.cc index efe22e2a25..9315b85c39 100644 --- a/lib/op-attrs/src/groupby.cc +++ b/lib/op-attrs/src/groupby.cc @@ -5,16 +5,16 @@ namespace FlexFlow { bool Group_byAttrs::is_valid(ParallelTensorShape const &lhs, ParallelTensorShape const &rhs) const { - if(!lhs.is_valid() || !rhs.is_valid()) { - return false; - } - NOT_IMPLEMENTED(); + if (!lhs.is_valid() || !rhs.is_valid()) { + return false; + } + NOT_IMPLEMENTED(); } -ParallelTensorShape get_output_shape(Group_byAttrs const & attrs, - ParallelTensorShape const & lhs, - ParallelTensorShape const & rhs) { - NOT_IMPLEMENTED(); +ParallelTensorShape get_output_shape(Group_byAttrs const &attrs, + ParallelTensorShape const &lhs, + ParallelTensorShape const &rhs) { + NOT_IMPLEMENTED(); } } // namespace FlexFlow diff --git a/lib/op-attrs/src/layer_norm.cc b/lib/op-attrs/src/layer_norm.cc index 4e706cbd28..081252847a 100644 --- a/lib/op-attrs/src/layer_norm.cc +++ b/lib/op-attrs/src/layer_norm.cc @@ -2,21 +2,21 @@ namespace FlexFlow { -bool LayerNormAttrs::is_valid(ParallelTensorShape const & input) const { - if(!input.is_valid()) { - return false; - } - if(input.num_dims() < 2) { - return false; - } - return true; +bool LayerNormAttrs::is_valid(ParallelTensorShape const &input) const { + if (!input.is_valid()) { + return false; + } + if (input.num_dims() < 2) { + return false; + } + return true; } -//todo: maybe we need to set the degree of parallel_dim -ParallelTensorShape get_output_shape(LayerNormAttrs const & attrs, - ParallelTensorShape const & input) { - ParallelTensorShape output = input; - return output; +// todo: maybe we need to set the degree of parallel_dim +ParallelTensorShape get_output_shape(LayerNormAttrs const &attrs, + ParallelTensorShape const &input) { + ParallelTensorShape output = input; + return output; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/linear.cc b/lib/op-attrs/src/linear.cc index 3aa361c342..bae30a8ebd 100644 --- a/lib/op-attrs/src/linear.cc +++ b/lib/op-attrs/src/linear.cc @@ -3,22 +3,22 @@ namespace FlexFlow { -bool LinearAttrs::is_valid(ParallelTensorShape const & input) const { - if(!input.is_valid()) { - return false; - } - return true; +bool LinearAttrs::is_valid(ParallelTensorShape const &input) const { + if (!input.is_valid()) { + return false; + } + return true; } -//pytorch: input shape:{batch_size, input_channels} -//pytorch linearattrs: should be {input_channels, output_channels} -//pytorch: output shape:{batch_size, output_channels} -//question: the Linearattrs doesn't have input_channels -ParallelTensorShape get_output_shape(LinearAttrs const & atts, - ParallelTensorShape const & input) { - ParallelTensorShape out_shape = input; - out_shape.at(ff_dim_t(0)).size = atts.out_channels; - return out_shape; +// pytorch: input shape:{batch_size, input_channels} +// pytorch linearattrs: should be {input_channels, output_channels} +// pytorch: output shape:{batch_size, output_channels} +// question: the Linearattrs doesn't have input_channels +ParallelTensorShape get_output_shape(LinearAttrs const &atts, + ParallelTensorShape const &input) { + ParallelTensorShape out_shape = input; + out_shape.at(ff_dim_t(0)).size = atts.out_channels; + return out_shape; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/pool_2d.cc b/lib/op-attrs/src/pool_2d.cc index 8587b114a6..6d58210b6a 100644 --- a/lib/op-attrs/src/pool_2d.cc +++ b/lib/op-attrs/src/pool_2d.cc @@ -40,32 +40,39 @@ static ParallelDimMappingSolution return solve_parallel_dim_mappings(construct_mappings(input), {input}, 0, 1); } -bool Pool2DAttrs::is_valid(ParallelTensorShape const & input) const { - if(!input.is_valid()) { - return false; - } - return true; +bool Pool2DAttrs::is_valid(ParallelTensorShape const &input) const { + if (!input.is_valid()) { + return false; + } + return true; } -//pytorch: we have two type of pool2d, maxpool2d and avgpool2d -//input shape: (batch_size, channels, input_height, input_width) -//for avgpool2d, output shape: (batch_size, channels, 1, 1) -//for maxpool2d, output shape: (batch_size, channels, output_height, output_width) -//output_height = (input_height + 2 * padding_h - kernel_h) / stride_h + 1 -//output_width = (input_width + 2 * padding_w - kernel_w) / stride_w + 1 -ParallelTensorShape get_output_shape(Pool2DAttrs const & attrs, - ParallelTensorShape const & input) { - ParallelTensorShape output_shape = input; - if(attrs.pool_type == PoolOp::AVG) { - output_shape.at(ff_dim_t(2)).size = 1; - output_shape.at(ff_dim_t(3)).size = 1; - } else if(attrs.pool_type == PoolOp::MAX) { - output_shape.at(ff_dim_t(2)).size = (input.at(ff_dim_t(2)).size + 2 * attrs.padding_h - attrs.kernel_h) / attrs.stride_h + 1; - output_shape.at(ff_dim_t(3)).size = (input.at(ff_dim_t(3)).size + 2 * attrs.padding_w - attrs.kernel_w) / attrs.stride_w + 1; - } else { - assert(false && "unsupported pool type"); - } - return output_shape; +// pytorch: we have two type of pool2d, maxpool2d and avgpool2d +// input shape: (batch_size, channels, input_height, input_width) +// for avgpool2d, output shape: (batch_size, channels, 1, 1) +// for maxpool2d, output shape: (batch_size, channels, output_height, +// output_width) output_height = (input_height + 2 * padding_h - kernel_h) / +// stride_h + 1 output_width = (input_width + 2 * padding_w - kernel_w) / +// stride_w + 1 +ParallelTensorShape get_output_shape(Pool2DAttrs const &attrs, + ParallelTensorShape const &input) { + ParallelTensorShape output_shape = input; + if (attrs.pool_type == PoolOp::AVG) { + output_shape.at(ff_dim_t(2)).size = 1; + output_shape.at(ff_dim_t(3)).size = 1; + } else if (attrs.pool_type == PoolOp::MAX) { + output_shape.at(ff_dim_t(2)).size = + (input.at(ff_dim_t(2)).size + 2 * attrs.padding_h - attrs.kernel_h) / + attrs.stride_h + + 1; + output_shape.at(ff_dim_t(3)).size = + (input.at(ff_dim_t(3)).size + 2 * attrs.padding_w - attrs.kernel_w) / + attrs.stride_w + + 1; + } else { + assert(false && "unsupported pool type"); + } + return output_shape; } } diff --git a/lib/op-attrs/src/reduce.cc b/lib/op-attrs/src/reduce.cc index 2c42e5cfad..3deb33e680 100644 --- a/lib/op-attrs/src/reduce.cc +++ b/lib/op-attrs/src/reduce.cc @@ -3,13 +3,12 @@ namespace FlexFlow { -bool ReduceAttrs::is_valid(ParallelTensorShape const & input) const { - NOT_IMPLEMENTED() -} +bool ReduceAttrs::is_valid(ParallelTensorShape const &input) const { + NOT_IMPLEMENTED()} -ParallelTensorShape get_output_shape(ReduceAttrs const & attrs, - ParallelTensorShape const & input) { - NOT_IMPLEMENTED() +ParallelTensorShape get_output_shape(ReduceAttrs const &attrs, + ParallelTensorShape const &input) { + NOT_IMPLEMENTED() } } // namespace FlexFlow diff --git a/lib/op-attrs/src/repartition.cc b/lib/op-attrs/src/repartition.cc index ad037b7cf6..b5a0280d85 100644 --- a/lib/op-attrs/src/repartition.cc +++ b/lib/op-attrs/src/repartition.cc @@ -17,7 +17,8 @@ bool RepartitionAttrs::is_valid(ParallelTensorShape const &input) const { return (dim.size % this->repartition_degree * dim.degree == 0); } -//this may be wrong partition by n multiplies degree by n and keeps shape the same +// this may be wrong partition by n multiplies degree by n and keeps shape the +// same ParallelTensorShape get_output_shape(RepartitionAttrs const &attrs, ParallelTensorShape const &input_shape) { ParallelTensorShape output(input_shape.dims, input_shape.data_type); @@ -25,5 +26,4 @@ ParallelTensorShape get_output_shape(RepartitionAttrs const &attrs, return output; } - } // namespace FlexFlow diff --git a/lib/op-attrs/src/replicate.cc b/lib/op-attrs/src/replicate.cc index b5e3e81d7e..2086ab41bd 100644 --- a/lib/op-attrs/src/replicate.cc +++ b/lib/op-attrs/src/replicate.cc @@ -5,32 +5,30 @@ namespace FlexFlow { bool ReplicateAttrs::is_valid(ParallelTensorShape const &input) const { - if(!input.is_valid()) { + if (!input.is_valid()) { return false; } - if(this->replicate_dim >= input.num_dims() || this->replicate_degree <= 0) { + if (this->replicate_dim >= input.num_dims() || this->replicate_degree <= 0) { return false; } return true; } -//replicate by n multiplies degree by n and shape by n -//seems it is like pytorch's repeat -//original_tensor = torch.tensor([1, 2, 3]) torch.Size([3]) -///replicated_tensor = original_tensor.repeat(3) torch.Size([9]) +// replicate by n multiplies degree by n and shape by n +// seems it is like pytorch's repeat +// original_tensor = torch.tensor([1, 2, 3]) torch.Size([3]) +/// replicated_tensor = original_tensor.repeat(3) torch.Size([9]) -//original_tensor = torch.randn(2, 3, 4) torch.Size([2, 3, 4]) -//repeated_tensor = original_tensor.repeat(3, 1, 1) torch.Size([6, 3, 4]) +// original_tensor = torch.randn(2, 3, 4) torch.Size([2, 3, 4]) +// repeated_tensor = original_tensor.repeat(3, 1, 1) torch.Size([6, 3, 4]) -ParallelTensorShape get_output_shape(ReplicateAttrs const & attrs, - ParallelTensorShape const & input) { +ParallelTensorShape get_output_shape(ReplicateAttrs const &attrs, + ParallelTensorShape const &input) { assert(attrs.is_valid(input)); ParallelTensorShape output = input; output.at(attrs.replicate_dim).size *= attrs.replicate_degree; return output; } - - } // namespace FlexFlow diff --git a/lib/op-attrs/src/reshape.cc b/lib/op-attrs/src/reshape.cc index 777f2eef1b..e100efeadb 100644 --- a/lib/op-attrs/src/reshape.cc +++ b/lib/op-attrs/src/reshape.cc @@ -3,32 +3,33 @@ namespace FlexFlow { -//pytorch: the input: [2,3,4], shape maybe [-1,6], should we add this? and the output is [4, 6] +// pytorch: the input: [2,3,4], shape maybe [-1,6], should we add this? and the +// output is [4, 6] bool ReshapeAttrs::is_valid(ParallelTensorShape const &input) const { if (!input.is_valid()) { return false; } - std::size_t input_volume =1; - for (int i = 0; i < input.num_dims(); i++) { - input_volume *= input.at(ff_dim_t(i)).size; - } - std::size_t attrs_volume =1; - for (int i = 0; i < this->shape.dims.num_dims(); i++) { - attrs_volume *= this->shape.at(ff_dim_t(i)); - } - return (input_volume == attrs_volume); + std::size_t input_volume = 1; + for (int i = 0; i < input.num_dims(); i++) { + input_volume *= input.at(ff_dim_t(i)).size; + } + std::size_t attrs_volume = 1; + for (int i = 0; i < this->shape.dims.num_dims(); i++) { + attrs_volume *= this->shape.at(ff_dim_t(i)); + } + return (input_volume == attrs_volume); } -//pytorch: the input: [2,3,4], shape maybe [-1,6], should we add this? and the output is [4, 6] -//currently we doesn't consider the case of -1,we can support this later -//the input:[2,3,4], attrs.shape:[4,6], the output is [4, 6] -ParallelTensorShape get_output_shape(ReshapeAttrs const & attrs, - ParallelTensorShape const & input) { +// pytorch: the input: [2,3,4], shape maybe [-1,6], should we add this? and the +// output is [4, 6] currently we doesn't consider the case of -1,we can support +// this later the input:[2,3,4], attrs.shape:[4,6], the output is [4, 6] +ParallelTensorShape get_output_shape(ReshapeAttrs const &attrs, + ParallelTensorShape const &input) { - assert(attrs.is_valid(input) && "input is not valid"); - ParallelTensorDims dims{attrs.shape.dims}; - ParallelTensorShape output{dims, input.data_type}; - return output; + assert(attrs.is_valid(input) && "input is not valid"); + ParallelTensorDims dims{attrs.shape.dims}; + ParallelTensorShape output{dims, input.data_type}; + return output; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/reverse.cc b/lib/op-attrs/src/reverse.cc new file mode 100644 index 0000000000..0af3806778 --- /dev/null +++ b/lib/op-attrs/src/reverse.cc @@ -0,0 +1,23 @@ +#include "op-attrs/ops/reverse.h" +#include "op-attrs/ff_dim.h" + +namespace FlexFlow { + +bool ReverseAttrs::is_valid(ParallelTensorShape const & input) const { + if(input.is_valid() ==false) { + return false; + } + if(this->axis < 0 || this->axis >= input.num_dims()) { + return false; + } + return true; +} + +ParallelTensorShape get_output_shape(ReverseAttrs const & attrs, + ParallelTensorShape const & input) { + ParallelTensorShape output = input; + return output; +} + + +}; \ No newline at end of file diff --git a/lib/op-attrs/src/softmax.cc b/lib/op-attrs/src/softmax.cc index f5795cc037..91d6555681 100644 --- a/lib/op-attrs/src/softmax.cc +++ b/lib/op-attrs/src/softmax.cc @@ -3,20 +3,20 @@ namespace FlexFlow { bool SoftmaxAttrs::is_valid(ParallelTensorShape const &input) const { - if(!input.is_valid()) { - return false; - } - if(input.num_dims() < 2) { - return false; - } - return true; + if (!input.is_valid()) { + return false; + } + if (input.num_dims() < 2) { + return false; + } + return true; } -ParallelTensorShape get_output_shape(SoftmaxAttrs const & attrs, - ParallelTensorShape const & input) { - assert(attrs.is_valid(input)); - ParallelTensorShape output = input; - return output; +ParallelTensorShape get_output_shape(SoftmaxAttrs const &attrs, + ParallelTensorShape const &input) { + assert(attrs.is_valid(input)); + ParallelTensorShape output = input; + return output; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/split.cc b/lib/op-attrs/src/split.cc index 6ba0b711d1..1c14f1c370 100644 --- a/lib/op-attrs/src/split.cc +++ b/lib/op-attrs/src/split.cc @@ -3,33 +3,33 @@ namespace FlexFlow { -bool SplitAttrs::is_valid(ParallelTensorShape const & input) const { - if(!input.is_valid()) { - return false; - } - std::size_t dims_sum = 0; - - for(std::size_t i = 0; i < this->splits.size(); ++i) { - dims_sum += splits[i]; - } - - if(dims_sum != input.at(ff_dim_t(axis)).size) { - return false; - } - return true; +bool SplitAttrs::is_valid(ParallelTensorShape const &input) const { + if (!input.is_valid()) { + return false; + } + std::size_t dims_sum = 0; + + for (std::size_t i = 0; i < this->splits.size(); ++i) { + dims_sum += splits[i]; + } + + if (dims_sum != input.at(ff_dim_t(axis)).size) { + return false; + } + return true; } - -std::vector get_output_shapes(SplitAttrs const & attrs, - ParallelTensorShape const & input) { - - assert(attrs.is_valid(input)); - std::vector outputs; - for(std::size_t i = 0 ; i < attrs.splits.size(); ++i) { - outputs.emplace_back(input); - outputs.back().at(ff_dim_t(attrs.axis)).size = attrs.splits[i]; - } - return outputs; +std::vector + get_output_shapes(SplitAttrs const &attrs, + ParallelTensorShape const &input) { + + assert(attrs.is_valid(input)); + std::vector outputs; + for (std::size_t i = 0; i < attrs.splits.size(); ++i) { + outputs.emplace_back(input); + outputs.back().at(ff_dim_t(attrs.axis)).size = attrs.splits[i]; + } + return outputs; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/topk.cc b/lib/op-attrs/src/topk.cc index 00c2d97902..06c43b3eba 100644 --- a/lib/op-attrs/src/topk.cc +++ b/lib/op-attrs/src/topk.cc @@ -2,24 +2,23 @@ namespace FlexFlow { -bool TopKAttrs::is_valid(ParallelTensorShape const & input) const { - if(!input.is_valid()) { - return false; - } +bool TopKAttrs::is_valid(ParallelTensorShape const &input) const { + if (!input.is_valid()) { + return false; + } - if(k > input.at(ff_dim_t(axis)).size) { - return false; - } - return true; + if (k > input.at(ff_dim_t(axis)).size) { + return false; + } + return true; } - -ParallelTensorShape get_output_shape(TopKAttrs const & attrs, - ParallelTensorShape const & input) { - assert(attrs.is_valid(input)); - ParallelTensorShape output = input; - output.at(ff_dim_t(attrs.axis)).size = attrs.k; - return output; +ParallelTensorShape get_output_shape(TopKAttrs const &attrs, + ParallelTensorShape const &input) { + assert(attrs.is_valid(input)); + ParallelTensorShape output = input; + output.at(ff_dim_t(attrs.axis)).size = attrs.k; + return output; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/transpose.cc b/lib/op-attrs/src/transpose.cc index ad4a84a3d5..97140c6b49 100644 --- a/lib/op-attrs/src/transpose.cc +++ b/lib/op-attrs/src/transpose.cc @@ -1,3 +1,41 @@ #include "op-attrs/ops/transpose.h" +#include "op-attrs/ff_dim.h" +#include "utils/exception.decl.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + +bool TransposeAttrs::is_valid(ParallelTensorShape const &input) const { + if (!input.is_valid()) { + return false; + } + // in pytorch, we use choose two dim for transpose, so I think the size of + // perm should be 2 + if (perm.size() != 2) { + return false; + } + + auto dim0 = perm[0]; + auto dim1 = perm[1]; + if (dim0 < 0 || dim1 < 0 || dim0 >= input.num_dims() || + dim1 >= input.num_dims()) { + return false; + } + + return true; +} + +// assume we have [x, y, z, l], perms is [0,2] we return [z, y, x, l] +ParallelTensorShape get_output_shape(TransposeAttrs const &attrs, + ParallelTensorShape const &input) { + ParallelTensorShape output = input; + auto dim0 = attrs.perm[0]; + auto dim1 = attrs.perm[1]; + int temp = input.at(ff_dim_t(dim0)).size; + output.at(ff_dim_t(dim0)).size = input.at(ff_dim_t(dim1)).size; + output.at(ff_dim_t(dim1)).size = temp; + return output; +} + +} + +} // namespace FlexFlow From 590dac5b72273cc8cc5b66badec10941a606d7de Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Wed, 11 Oct 2023 15:29:11 +0000 Subject: [PATCH 30/69] update the batch matmul --- .../src/cuda/aggregate_spec_kernels.cu | 152 +++++++++--------- lib/op-attrs/src/batch_matmul.cc | 16 +- 2 files changed, 90 insertions(+), 78 deletions(-) diff --git a/lib/kernels/src/cuda/aggregate_spec_kernels.cu b/lib/kernels/src/cuda/aggregate_spec_kernels.cu index 8a39b7f558..d46dc64567 100644 --- a/lib/kernels/src/cuda/aggregate_spec_kernels.cu +++ b/lib/kernels/src/cuda/aggregate_spec_kernels.cu @@ -30,82 +30,6 @@ AggregateSpecPerDeviceState::~AggregateSpecPerDeviceState(void) { namespace Kernels { namespace AggregateSpec { -void forward_kernel(cudaStream_t stream, - AggregateSpecPerDeviceState const *m, - float **exp_preds, - int const *acc_gate_assign_ptr, - float *acc_output_ptr, - int n, - int const k, - int rows, - int const batch_size, - int out_dim) { - - checkCUDA(cublasSetStream(m->handle.blas, stream)); - checkCUDNN(cudnnSetStream(m->handle.dnn, stream)); - - // call forward kernel - cudaMemcpy(m->dev_region_ptrs, - exp_preds, - n * sizeof(float *), - cudaMemcpyHostToDevice); - - aggspec_forward_kernel<<>>(m->dev_region_ptrs, - acc_gate_assign_ptr, - acc_output_ptr, - n, - k, - rows, - batch_size, - out_dim); -} - -void backward_kernel(cudaStream_t stream, - AggregateSpecPerDeviceState const *m, - float **exp_grads, - int const *acc_gate_assign_ptr, - int const *acc_true_gate_assign_ptr, - float const *acc_gate_pred_ptr, - float *acc_full_gate_grad_ptr, - float const *acc_output_grad_ptr, - int n, - int const k, - int rows, - float lambda_bal, - int const batch_size, - int out_dim) { - - checkCUDA(cublasSetStream(m->handle.blas, stream)); - checkCUDNN(cudnnSetStream(m->handle.dnn, stream)); - - // call backward kernel - cudaMemcpy(m->dev_region_ptrs, - exp_grads, - n * sizeof(float *), - cudaMemcpyHostToDevice); - - aggspec_backward_kernel<<>>(m->dev_region_ptrs, - acc_gate_assign_ptr, - acc_true_gate_assign_ptr, - acc_gate_pred_ptr, - acc_full_gate_grad_ptr, - acc_output_grad_ptr, - n, - k, - rows, - lambda_bal, - batch_size, - out_dim); -} - __global__ void aggspec_forward_kernel(float **exp_preds, int const *exp_assign, @@ -297,6 +221,82 @@ __global__ void out_dim); } +void forward_kernel(cudaStream_t stream, + AggregateSpecPerDeviceState const *m, + float **exp_preds, + int const *acc_gate_assign_ptr, + float *acc_output_ptr, + int n, + int const k, + int rows, + int const batch_size, + int out_dim) { + + checkCUDA(cublasSetStream(m->handle.blas, stream)); + checkCUDNN(cudnnSetStream(m->handle.dnn, stream)); + + // call forward kernel + cudaMemcpy(m->dev_region_ptrs, + exp_preds, + n * sizeof(float *), + cudaMemcpyHostToDevice); + + aggspec_forward_kernel<<>>(m->dev_region_ptrs, + acc_gate_assign_ptr, + acc_output_ptr, + n, + k, + rows, + batch_size, + out_dim); +} + +void backward_kernel(cudaStream_t stream, + AggregateSpecPerDeviceState const *m, + float **exp_grads, + int const *acc_gate_assign_ptr, + int const *acc_true_gate_assign_ptr, + float const *acc_gate_pred_ptr, + float *acc_full_gate_grad_ptr, + float const *acc_output_grad_ptr, + int n, + int const k, + int rows, + float lambda_bal, + int const batch_size, + int out_dim) { + + checkCUDA(cublasSetStream(m->handle.blas, stream)); + checkCUDNN(cudnnSetStream(m->handle.dnn, stream)); + + // call backward kernel + cudaMemcpy(m->dev_region_ptrs, + exp_grads, + n * sizeof(float *), + cudaMemcpyHostToDevice); + + aggspec_backward_kernel<<>>(m->dev_region_ptrs, + acc_gate_assign_ptr, + acc_true_gate_assign_ptr, + acc_gate_pred_ptr, + acc_full_gate_grad_ptr, + acc_output_grad_ptr, + n, + k, + rows, + lambda_bal, + batch_size, + out_dim); +} + } // namespace AggregateSpec } // namespace Kernels } // namespace FlexFlow diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index f30869e035..e8842c7722 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -10,14 +10,26 @@ bool BatchMatmulAttrs::is_valid(ParallelTensorShape const &lhs, if (!lhs.is_valid() || !rhs.is_valid()) { return false; } - if (lhs.num_dims() != rhs.num_dims()) { + + if(lhs.at(ff_dim_t(0)).size != rhs.at(ff_dim_t(0)).size) { + return false; + } + if(lhs.at(ff_dim_t(2)).size != rhs.at(ff_dim_t(1)).size) { + return false; + } + if(lhs.at(ff_dim_t(1)).size != a_seq_length_dim) { + return false; + } + + if(rhs.at(ff_dim_t(2)).size != b_seq_length_dim) { return false; } return true; } -// how to get the batch size? and lhs: [b, s1, k], rhs: [b, k, s1] +// how to get the batch size? and lhs: [b, n, m], rhs: [b, m, p] +//output: [b, n, p] //n == s1, m == s2 ParallelTensorShape get_output_shape(BatchMatmulAttrs const &attrs, ParallelTensorShape const &lhs, ParallelTensorShape const &rhs) { From 69c13bae6d85b71e9595152230718b361439db60 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Wed, 11 Oct 2023 15:37:00 +0000 Subject: [PATCH 31/69] add valid check for conv_2d --- lib/op-attrs/include/op-attrs/ops/conv_2d.h | 2 +- lib/op-attrs/src/batch_norm.cc | 4 +++- lib/op-attrs/src/combine.cc | 2 +- lib/op-attrs/src/concat.cc | 3 +++ lib/op-attrs/src/conv_2d.cc | 6 +++++- 5 files changed, 13 insertions(+), 4 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/conv_2d.h b/lib/op-attrs/include/op-attrs/ops/conv_2d.h index 31290d153e..51da1ac91c 100644 --- a/lib/op-attrs/include/op-attrs/ops/conv_2d.h +++ b/lib/op-attrs/include/op-attrs/ops/conv_2d.h @@ -14,7 +14,7 @@ struct Conv2DAttrs { padding_w, groups; req> activation; req use_bias; - bool is_valid(TensorShape const &input) const; + bool is_valid(ParallelTensorShape const &input) const; }; FF_VISITABLE_STRUCT(Conv2DAttrs, diff --git a/lib/op-attrs/src/batch_norm.cc b/lib/op-attrs/src/batch_norm.cc index 9b15913d1f..ed5da8286a 100644 --- a/lib/op-attrs/src/batch_norm.cc +++ b/lib/op-attrs/src/batch_norm.cc @@ -11,7 +11,9 @@ bool BatchNormAttrs::is_valid(ParallelTensorShape const &input) { ParallelTensorShape get_output_shape(BatchNormAttrs const &attrs, ParallelTensorShape const &input) { - return input; + ParallelTensorShape output_shape = input; + + return output_shape; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/combine.cc b/lib/op-attrs/src/combine.cc index 7814442926..5309e5a620 100644 --- a/lib/op-attrs/src/combine.cc +++ b/lib/op-attrs/src/combine.cc @@ -12,7 +12,7 @@ bool CombineAttrs::is_valid(ParallelTensorShape const &input) const { ParallelTensorShape get_output_shape(CombineAttrs const &attrs, ParallelTensorShape const &input) { - ParallelTensorShape output = input_shape; + ParallelTensorShape output = input; output.at(attrs.combine_dim).degree /= attrs.combine_degree; return output; } diff --git a/lib/op-attrs/src/concat.cc b/lib/op-attrs/src/concat.cc index e4b9496e69..34e19cb423 100644 --- a/lib/op-attrs/src/concat.cc +++ b/lib/op-attrs/src/concat.cc @@ -7,6 +7,9 @@ bool ConcatAttrs::is_valid( bool valid = true; for (auto p : input) { valid &= p.is_valid(); + if(axis >= p.num_dims(())){ + return false; + } } return valid; } diff --git a/lib/op-attrs/src/conv_2d.cc b/lib/op-attrs/src/conv_2d.cc index 42085ca2b1..facba4d661 100644 --- a/lib/op-attrs/src/conv_2d.cc +++ b/lib/op-attrs/src/conv_2d.cc @@ -81,10 +81,14 @@ std::vector return mappings; } -bool Conv2DAttrs::is_valid(TensorShape const &input) const { +bool Conv2DAttrs::is_valid(ParallelTensorShape const &input) const { if (!input.is_valid()) { return false; } + if(input.num_dims() != 4) { + return false; + } + return true; } From b56d9c0c49a4daaafb0be09f7329ea56a7082a2f Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Wed, 11 Oct 2023 15:49:56 +0000 Subject: [PATCH 32/69] format the code and get_output_shape draft version0.1 --- .../include/op-attrs/ops/element_binary.h | 2 +- lib/op-attrs/src/batch_matmul.cc | 12 ++++----- lib/op-attrs/src/batch_norm.cc | 2 +- lib/op-attrs/src/concat.cc | 2 +- lib/op-attrs/src/conv_2d.cc | 4 +-- lib/op-attrs/src/element_unary.cc | 1 + lib/op-attrs/src/reverse.cc | 27 +++++++++---------- 7 files changed, 25 insertions(+), 25 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/element_binary.h b/lib/op-attrs/include/op-attrs/ops/element_binary.h index c068fcc45c..7b731bf40f 100644 --- a/lib/op-attrs/include/op-attrs/ops/element_binary.h +++ b/lib/op-attrs/include/op-attrs/ops/element_binary.h @@ -2,9 +2,9 @@ #define _FLEXFLOW_ELEMENT_BINARY_ATTRS_H #include "core.h" -#include "op-attrs/datatype.h" #include "op-attrs/op.h" #include "op-attrs/parallel_tensor_shape.h" +#include "op-attrs/©" #include "utils/visitable.h" namespace FlexFlow { diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index e8842c7722..b460f844f6 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -10,18 +10,18 @@ bool BatchMatmulAttrs::is_valid(ParallelTensorShape const &lhs, if (!lhs.is_valid() || !rhs.is_valid()) { return false; } - - if(lhs.at(ff_dim_t(0)).size != rhs.at(ff_dim_t(0)).size) { + + if (lhs.at(ff_dim_t(0)).size != rhs.at(ff_dim_t(0)).size) { return false; } - if(lhs.at(ff_dim_t(2)).size != rhs.at(ff_dim_t(1)).size) { + if (lhs.at(ff_dim_t(2)).size != rhs.at(ff_dim_t(1)).size) { return false; } - if(lhs.at(ff_dim_t(1)).size != a_seq_length_dim) { + if (lhs.at(ff_dim_t(1)).size != a_seq_length_dim) { return false; } - if(rhs.at(ff_dim_t(2)).size != b_seq_length_dim) { + if (rhs.at(ff_dim_t(2)).size != b_seq_length_dim) { return false; } @@ -29,7 +29,7 @@ bool BatchMatmulAttrs::is_valid(ParallelTensorShape const &lhs, } // how to get the batch size? and lhs: [b, n, m], rhs: [b, m, p] -//output: [b, n, p] //n == s1, m == s2 +// output: [b, n, p] //n == s1, m == s2 ParallelTensorShape get_output_shape(BatchMatmulAttrs const &attrs, ParallelTensorShape const &lhs, ParallelTensorShape const &rhs) { diff --git a/lib/op-attrs/src/batch_norm.cc b/lib/op-attrs/src/batch_norm.cc index ed5da8286a..526871fc46 100644 --- a/lib/op-attrs/src/batch_norm.cc +++ b/lib/op-attrs/src/batch_norm.cc @@ -12,7 +12,7 @@ bool BatchNormAttrs::is_valid(ParallelTensorShape const &input) { ParallelTensorShape get_output_shape(BatchNormAttrs const &attrs, ParallelTensorShape const &input) { ParallelTensorShape output_shape = input; - + return output_shape; } diff --git a/lib/op-attrs/src/concat.cc b/lib/op-attrs/src/concat.cc index 34e19cb423..39c06d07cc 100644 --- a/lib/op-attrs/src/concat.cc +++ b/lib/op-attrs/src/concat.cc @@ -7,7 +7,7 @@ bool ConcatAttrs::is_valid( bool valid = true; for (auto p : input) { valid &= p.is_valid(); - if(axis >= p.num_dims(())){ + if (axis >= p.num_dims(())) { return false; } } diff --git a/lib/op-attrs/src/conv_2d.cc b/lib/op-attrs/src/conv_2d.cc index facba4d661..ed89b380df 100644 --- a/lib/op-attrs/src/conv_2d.cc +++ b/lib/op-attrs/src/conv_2d.cc @@ -85,10 +85,10 @@ bool Conv2DAttrs::is_valid(ParallelTensorShape const &input) const { if (!input.is_valid()) { return false; } - if(input.num_dims() != 4) { + if (input.num_dims() != 4) { return false; } - + return true; } diff --git a/lib/op-attrs/src/element_unary.cc b/lib/op-attrs/src/element_unary.cc index 1fd11abe05..36e58ff263 100644 --- a/lib/op-attrs/src/element_unary.cc +++ b/lib/op-attrs/src/element_unary.cc @@ -13,5 +13,6 @@ ParallelTensorShape get_output_shape(ElementUnaryAttrs const &atts, ParallelTensorShape const &input) { ParallelTensorShape output = input; return output; +} } // namespace FlexFlow diff --git a/lib/op-attrs/src/reverse.cc b/lib/op-attrs/src/reverse.cc index 0af3806778..a09d43ae61 100644 --- a/lib/op-attrs/src/reverse.cc +++ b/lib/op-attrs/src/reverse.cc @@ -3,21 +3,20 @@ namespace FlexFlow { -bool ReverseAttrs::is_valid(ParallelTensorShape const & input) const { - if(input.is_valid() ==false) { - return false; - } - if(this->axis < 0 || this->axis >= input.num_dims()) { - return false; - } - return true; +bool ReverseAttrs::is_valid(ParallelTensorShape const &input) const { + if (input.is_valid() == false) { + return false; + } + if (this->axis < 0 || this->axis >= input.num_dims()) { + return false; + } + return true; } -ParallelTensorShape get_output_shape(ReverseAttrs const & attrs, - ParallelTensorShape const & input) { - ParallelTensorShape output = input; - return output; +ParallelTensorShape get_output_shape(ReverseAttrs const &attrs, + ParallelTensorShape const &input) { + ParallelTensorShape output = input; + return output; } - -}; \ No newline at end of file +}; // namespace FlexFlow From 1f8d85d2f2f37e7274465cf9f9f18caa1bead45d Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 00:36:17 +0000 Subject: [PATCH 33/69] leave attention to implement --- lib/op-attrs/include/op-attrs/ops/attention.h | 38 +++++++++---------- lib/op-attrs/src/attention.cc | 38 ++++++++++++++----- 2 files changed, 47 insertions(+), 29 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/attention.h b/lib/op-attrs/include/op-attrs/ops/attention.h index 670e4018cc..69636d1fa7 100644 --- a/lib/op-attrs/include/op-attrs/ops/attention.h +++ b/lib/op-attrs/include/op-attrs/ops/attention.h @@ -7,6 +7,23 @@ namespace FlexFlow { +struct MultiHeadAttentionAttrs { + req embed_dim, num_heads, kdim, vdim; + req dropout; + req bias, add_bias_kv, add_zero_attn; +}; + +FF_VISITABLE_STRUCT(MultiHeadAttentionAttrs, + embed_dim, + num_heads, + kdim, + vdim, + dropout, + bias, + add_bias_kv, + add_zero_attn); + + template struct MultiHeadAttentionInputs : public use_visitable_cmp> { @@ -28,23 +45,7 @@ struct MultiHeadAttentionInputs TensorType value; }; -struct MultiHeadAttentionAttrs { - req embed_dim, num_heads, kdim, vdim; - req dropout; - req bias, add_bias_kv, add_zero_attn; - bool is_valid(MultiHeadAttentionInputs const &) const; -}; - -FF_VISITABLE_STRUCT(MultiHeadAttentionAttrs, - embed_dim, - num_heads, - kdim, - vdim, - dropout, - bias, - add_bias_kv, - add_zero_attn); -CHECK_VALID_OP_ATTR(MultiHeadAttentionAttrs); +bool is_valid(MultiHeadAttentionAttrs const &, MultiHeadAttentionInputs const &input); int get_qProjSize(MultiHeadAttentionAttrs const &); int get_vProjSize(MultiHeadAttentionAttrs const &); @@ -70,11 +71,10 @@ ParallelTensorShape ParallelTensorShape get_output_shape(MultiHeadAttentionAttrs const &, MultiHeadAttentionInputs const &); - TensorShape get_output_shape(MultiHeadAttentionAttrs const &, MultiHeadAttentionInputs const &); CHECK_VALID_OP_ATTR(MultiHeadAttentionAttrs); } // namespace FlexFlow -#endif +#endif \ No newline at end of file diff --git a/lib/op-attrs/src/attention.cc b/lib/op-attrs/src/attention.cc index a6ae56ddfd..b212cdea8e 100644 --- a/lib/op-attrs/src/attention.cc +++ b/lib/op-attrs/src/attention.cc @@ -1,5 +1,7 @@ #include "op-attrs/ops/attention.h" #include "op-attrs/parallel_tensor_shape.h" +#include "utils/exceptions.h" +#include "kernels/legion_dim.h" namespace FlexFlow { @@ -11,15 +13,6 @@ namespace FlexFlow { /* return is_valid; */ /* } */ -bool MultiHeadAttentionAttrs::is_valid( - MultiHeadAttentionInputs const &input) const { - bool valid = true; - valid &= input.key.is_valid(); - valid &= input.query.is_valid(); - valid &= input.value.is_valid(); - return valid; -} - int get_qProjSize(MultiHeadAttentionAttrs const &attrs) { return attrs.kdim; } @@ -92,14 +85,39 @@ TensorShape // return get_tensor_shape_unsafe(parallel_shape); // } +//according to the pytorch https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html, +//query: [target_size_seq_len, batch_size, embed_dim], we consider the batch size +//key: (seq_len, batch_size, embed_dim) +//value: (seq_len, batch_size, embed_dim) +// multihead_attn = nn.MultiheadAttention(embed_dim, num_heads) +//output: (target_size_seq_len, batch_size, embed_dim) + ParallelTensorShape get_output_shape( MultiHeadAttentionAttrs const &attrs, MultiHeadAttentionInputs const &inputs) { ParallelTensorShape output_shape = inputs.query; - output_shape.at(ff_dim_t(output_shape.num_dims() - 1)).size = attrs.embed_dim; + return output_shape; } +bool is_valid(MultiHeadAttentionAttrs const & attrs, MultiHeadAttentionInputs const &input) { + bool valid = true; + if(input.query.num_dims() != 3 || input.key.num_dims() != 3 || input.value.num_dims() != 3) { + return false; + } + //ff_dim_t = num_dims - legion_dim_t - 1 + if(input.query.at(legion_dim_t(0)).size != attrs.embed_dim) { + return false; + } + if(input.key.at(legion_dim_t(0)).size != attrs.embed_dim) { + return false; + } + if(input.value.at(legion_dim_t(0)).size != attrs.embed_dim) { + return false; + } + return true; +} + } // namespace FlexFlow // Tensor FFModel::multihead_attention(const Tensor query, From c9625645698f653344688a3c160d27d755f382cc Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 00:56:51 +0000 Subject: [PATCH 34/69] add batch_matmul --- .../include/op-attrs/ops/batch_matmul.h | 4 +- lib/op-attrs/src/attention.cc | 4 +- lib/op-attrs/src/batch_matmul.cc | 39 ++++++++++++++----- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h index f64b2fd8fb..1aa2fb7f59 100644 --- a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h +++ b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h @@ -9,12 +9,14 @@ namespace FlexFlow { struct BatchMatmulAttrs { req a_seq_length_dim, b_seq_length_dim; - bool is_valid(ParallelTensorShape const &, ParallelTensorShape const &); }; FF_VISITABLE_STRUCT(BatchMatmulAttrs, a_seq_length_dim, b_seq_length_dim); CHECK_VALID_OP_ATTR(BatchMatmulAttrs); +bool is_valid(BatchMatmulAttrs const &, ParallelTensorShape const &, ParallelTensorShape const &); + + ParallelTensorShape get_output_shape(BatchMatmulAttrs const &, ParallelTensorShape const &, ParallelTensorShape const &); diff --git a/lib/op-attrs/src/attention.cc b/lib/op-attrs/src/attention.cc index b212cdea8e..2ed20b3016 100644 --- a/lib/op-attrs/src/attention.cc +++ b/lib/op-attrs/src/attention.cc @@ -1,5 +1,6 @@ #include "op-attrs/ops/attention.h" #include "op-attrs/parallel_tensor_shape.h" +#include "utils/exception.decl.h" #include "utils/exceptions.h" #include "kernels/legion_dim.h" @@ -96,8 +97,7 @@ ParallelTensorShape get_output_shape( MultiHeadAttentionAttrs const &attrs, MultiHeadAttentionInputs const &inputs) { ParallelTensorShape output_shape = inputs.query; - - return output_shape; + NOT_IMPLEMENTED(); } bool is_valid(MultiHeadAttentionAttrs const & attrs, MultiHeadAttentionInputs const &input) { diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index b460f844f6..1f73da081d 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -5,23 +5,22 @@ namespace FlexFlow { // maybe we should add more check here -bool BatchMatmulAttrs::is_valid(ParallelTensorShape const &lhs, +//// how to get the batch size? and lhs: [b, n, m], rhs: [b, m, p] +// output: [b, n, p] //n == s1, m == s2 +//[n/] +bool is_valid(BatchMatmulAttrs const & attrs, ParallelTensorShape const &lhs, ParallelTensorShape const &rhs) { - if (!lhs.is_valid() || !rhs.is_valid()) { - return false; - } - if (lhs.at(ff_dim_t(0)).size != rhs.at(ff_dim_t(0)).size) { return false; } if (lhs.at(ff_dim_t(2)).size != rhs.at(ff_dim_t(1)).size) { return false; } - if (lhs.at(ff_dim_t(1)).size != a_seq_length_dim) { + if (lhs.at(ff_dim_t(1)).size != attrs.a_seq_length_dim) { return false; } - if (rhs.at(ff_dim_t(2)).size != b_seq_length_dim) { + if (rhs.at(ff_dim_t(2)).size != attrs.b_seq_length_dim) { return false; } @@ -30,14 +29,34 @@ bool BatchMatmulAttrs::is_valid(ParallelTensorShape const &lhs, // how to get the batch size? and lhs: [b, n, m], rhs: [b, m, p] // output: [b, n, p] //n == s1, m == s2 +//[b, n/2, m], [b, m, p/2] -> [b, n/2, p/2] +//[b, n, m/2], [b, m/2, p] -> [b, n, p/2] ParallelTensorShape get_output_shape(BatchMatmulAttrs const &attrs, ParallelTensorShape const &lhs, ParallelTensorShape const &rhs) { ParallelTensorShape output_shape = lhs; output_shape.at(ff_dim_t(0)).size = lhs.at(ff_dim_t(0)).size; - output_shape.at(ff_dim_t(1)).size = attrs.a_seq_length_dim; - output_shape.at(ff_dim_t(2)).size = attrs.b_seq_length_dim; - // TODO: Do we need to set the ParallelDim for output_shape + //degree is 1 + //[b, n, m], rhs: [b, m, p] -> [b, n, p] + if(lhs.at(ff_dim_t(1)).degree == 1 && rhs.at(ff_dim_t(2)).degree == 1) { + output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size; + output_shape.at(ff_dim_t(2)).size = rhs.at(ff_dim_t(2)).size; + output_shape.at(ff_dim_t(0)).is_replica_dim= false; + } else if(lhs.at(ff_dim_t(1)).degree>1 && rhs.at(ff_dim_t(2)).degree == 1) { //[b, n/x, m], [b, m, p/x] => [b, n/x, p/x] + output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size/lhs.at(ff_dim_t(1)).degree; + output_shape.at(ff_dim_t(2)).size = rhs.at(ff_dim_t(2)).size/rhs.at(ff_dim_t(2)).degree; + output_shape.at(ff_dim_t(0)).is_replica_dim= true; + } else if(lhs.at(ff_dim_t(1)).degree == 1 && rhs.at(ff_dim_t(2)).degree > 1) { //[b, n, m/x], [b, m/x, p] => [b, n, p/x] + output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size; + output_shape.at(ff_dim_t(2)).size = rhs.at(ff_dim_t(2)).size/rhs.at(ff_dim_t(2)).degree; + output_shape.at(ff_dim_t(0)).is_replica_dim= true; + } else if(lhs.at(ff_dim_t(1)).degree > 1 && rhs.at(ff_dim_t(2)).degree > 1) { //[b, n/x, m/y], [b, m/y, p/z] => [b, n/x, p/z] + output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size/lhs.at(ff_dim_t(1)).degree; + output_shape.at(ff_dim_t(2)).size = rhs.at(ff_dim_t(2)).size/rhs.at(ff_dim_t(2)).degree; + output_shape.at(ff_dim_t(0)).is_replica_dim= true; + } else { + assert(false && "not supported in BatchMatmulAttrs get_output_shape"); + } return output_shape; } From ab7efc8e3d0f71af54510471235db8b2c163c9b4 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 00:59:13 +0000 Subject: [PATCH 35/69] add batch_matmul --- lib/op-attrs/include/op-attrs/ops/attention.h | 6 +-- .../include/op-attrs/ops/batch_matmul.h | 5 ++- lib/op-attrs/src/attention.cc | 31 +++++++------ lib/op-attrs/src/batch_matmul.cc | 44 ++++++++++++------- 4 files changed, 51 insertions(+), 35 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/attention.h b/lib/op-attrs/include/op-attrs/ops/attention.h index 69636d1fa7..7d332ddc0e 100644 --- a/lib/op-attrs/include/op-attrs/ops/attention.h +++ b/lib/op-attrs/include/op-attrs/ops/attention.h @@ -23,7 +23,6 @@ FF_VISITABLE_STRUCT(MultiHeadAttentionAttrs, add_bias_kv, add_zero_attn); - template struct MultiHeadAttentionInputs : public use_visitable_cmp> { @@ -45,7 +44,8 @@ struct MultiHeadAttentionInputs TensorType value; }; -bool is_valid(MultiHeadAttentionAttrs const &, MultiHeadAttentionInputs const &input); +bool is_valid(MultiHeadAttentionAttrs const &, + MultiHeadAttentionInputs const &input); int get_qProjSize(MultiHeadAttentionAttrs const &); int get_vProjSize(MultiHeadAttentionAttrs const &); @@ -77,4 +77,4 @@ TensorShape get_output_shape(MultiHeadAttentionAttrs const &, CHECK_VALID_OP_ATTR(MultiHeadAttentionAttrs); } // namespace FlexFlow -#endif \ No newline at end of file +#endif diff --git a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h index 1aa2fb7f59..8b545b46f3 100644 --- a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h +++ b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h @@ -14,8 +14,9 @@ FF_VISITABLE_STRUCT(BatchMatmulAttrs, a_seq_length_dim, b_seq_length_dim); CHECK_VALID_OP_ATTR(BatchMatmulAttrs); -bool is_valid(BatchMatmulAttrs const &, ParallelTensorShape const &, ParallelTensorShape const &); - +bool is_valid(BatchMatmulAttrs const &, + ParallelTensorShape const &, + ParallelTensorShape const &); ParallelTensorShape get_output_shape(BatchMatmulAttrs const &, ParallelTensorShape const &, diff --git a/lib/op-attrs/src/attention.cc b/lib/op-attrs/src/attention.cc index 2ed20b3016..2d189d7472 100644 --- a/lib/op-attrs/src/attention.cc +++ b/lib/op-attrs/src/attention.cc @@ -1,8 +1,8 @@ #include "op-attrs/ops/attention.h" +#include "kernels/legion_dim.h" #include "op-attrs/parallel_tensor_shape.h" #include "utils/exception.decl.h" #include "utils/exceptions.h" -#include "kernels/legion_dim.h" namespace FlexFlow { @@ -86,12 +86,13 @@ TensorShape // return get_tensor_shape_unsafe(parallel_shape); // } -//according to the pytorch https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html, -//query: [target_size_seq_len, batch_size, embed_dim], we consider the batch size -//key: (seq_len, batch_size, embed_dim) -//value: (seq_len, batch_size, embed_dim) -// multihead_attn = nn.MultiheadAttention(embed_dim, num_heads) -//output: (target_size_seq_len, batch_size, embed_dim) +// according to the pytorch +// https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html, +// query: [target_size_seq_len, batch_size, embed_dim], we consider the batch +// size key: (seq_len, batch_size, embed_dim) value: (seq_len, batch_size, +// embed_dim) +// multihead_attn = nn.MultiheadAttention(embed_dim, num_heads) +// output: (target_size_seq_len, batch_size, embed_dim) ParallelTensorShape get_output_shape( MultiHeadAttentionAttrs const &attrs, @@ -100,22 +101,24 @@ ParallelTensorShape get_output_shape( NOT_IMPLEMENTED(); } -bool is_valid(MultiHeadAttentionAttrs const & attrs, MultiHeadAttentionInputs const &input) { +bool is_valid(MultiHeadAttentionAttrs const &attrs, + MultiHeadAttentionInputs const &input) { bool valid = true; - if(input.query.num_dims() != 3 || input.key.num_dims() != 3 || input.value.num_dims() != 3) { + if (input.query.num_dims() != 3 || input.key.num_dims() != 3 || + input.value.num_dims() != 3) { return false; } - //ff_dim_t = num_dims - legion_dim_t - 1 - if(input.query.at(legion_dim_t(0)).size != attrs.embed_dim) { + // ff_dim_t = num_dims - legion_dim_t - 1 + if (input.query.at(legion_dim_t(0)).size != attrs.embed_dim) { return false; } - if(input.key.at(legion_dim_t(0)).size != attrs.embed_dim) { + if (input.key.at(legion_dim_t(0)).size != attrs.embed_dim) { return false; } - if(input.value.at(legion_dim_t(0)).size != attrs.embed_dim) { + if (input.value.at(legion_dim_t(0)).size != attrs.embed_dim) { return false; } - return true; + return true; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index 1f73da081d..27aa81e003 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -8,8 +8,9 @@ namespace FlexFlow { //// how to get the batch size? and lhs: [b, n, m], rhs: [b, m, p] // output: [b, n, p] //n == s1, m == s2 //[n/] -bool is_valid(BatchMatmulAttrs const & attrs, ParallelTensorShape const &lhs, - ParallelTensorShape const &rhs) { +bool is_valid(BatchMatmulAttrs const &attrs, + ParallelTensorShape const &lhs, + ParallelTensorShape const &rhs) { if (lhs.at(ff_dim_t(0)).size != rhs.at(ff_dim_t(0)).size) { return false; } @@ -36,24 +37,35 @@ ParallelTensorShape get_output_shape(BatchMatmulAttrs const &attrs, ParallelTensorShape const &rhs) { ParallelTensorShape output_shape = lhs; output_shape.at(ff_dim_t(0)).size = lhs.at(ff_dim_t(0)).size; - //degree is 1 + // degree is 1 //[b, n, m], rhs: [b, m, p] -> [b, n, p] - if(lhs.at(ff_dim_t(1)).degree == 1 && rhs.at(ff_dim_t(2)).degree == 1) { + if (lhs.at(ff_dim_t(1)).degree == 1 && rhs.at(ff_dim_t(2)).degree == 1) { output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size; output_shape.at(ff_dim_t(2)).size = rhs.at(ff_dim_t(2)).size; - output_shape.at(ff_dim_t(0)).is_replica_dim= false; - } else if(lhs.at(ff_dim_t(1)).degree>1 && rhs.at(ff_dim_t(2)).degree == 1) { //[b, n/x, m], [b, m, p/x] => [b, n/x, p/x] - output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size/lhs.at(ff_dim_t(1)).degree; - output_shape.at(ff_dim_t(2)).size = rhs.at(ff_dim_t(2)).size/rhs.at(ff_dim_t(2)).degree; - output_shape.at(ff_dim_t(0)).is_replica_dim= true; - } else if(lhs.at(ff_dim_t(1)).degree == 1 && rhs.at(ff_dim_t(2)).degree > 1) { //[b, n, m/x], [b, m/x, p] => [b, n, p/x] + output_shape.at(ff_dim_t(0)).is_replica_dim = false; + } else if (lhs.at(ff_dim_t(1)).degree > 1 && + rhs.at(ff_dim_t(2)).degree == + 1) { //[b, n/x, m], [b, m, p/x] => [b, n/x, p/x] + output_shape.at(ff_dim_t(1)).size = + lhs.at(ff_dim_t(1)).size / lhs.at(ff_dim_t(1)).degree; + output_shape.at(ff_dim_t(2)).size = + rhs.at(ff_dim_t(2)).size / rhs.at(ff_dim_t(2)).degree; + output_shape.at(ff_dim_t(0)).is_replica_dim = true; + } else if (lhs.at(ff_dim_t(1)).degree == 1 && + rhs.at(ff_dim_t(2)).degree > + 1) { //[b, n, m/x], [b, m/x, p] => [b, n, p/x] output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size; - output_shape.at(ff_dim_t(2)).size = rhs.at(ff_dim_t(2)).size/rhs.at(ff_dim_t(2)).degree; - output_shape.at(ff_dim_t(0)).is_replica_dim= true; - } else if(lhs.at(ff_dim_t(1)).degree > 1 && rhs.at(ff_dim_t(2)).degree > 1) { //[b, n/x, m/y], [b, m/y, p/z] => [b, n/x, p/z] - output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size/lhs.at(ff_dim_t(1)).degree; - output_shape.at(ff_dim_t(2)).size = rhs.at(ff_dim_t(2)).size/rhs.at(ff_dim_t(2)).degree; - output_shape.at(ff_dim_t(0)).is_replica_dim= true; + output_shape.at(ff_dim_t(2)).size = + rhs.at(ff_dim_t(2)).size / rhs.at(ff_dim_t(2)).degree; + output_shape.at(ff_dim_t(0)).is_replica_dim = true; + } else if (lhs.at(ff_dim_t(1)).degree > 1 && + rhs.at(ff_dim_t(2)).degree > + 1) { //[b, n/x, m/y], [b, m/y, p/z] => [b, n/x, p/z] + output_shape.at(ff_dim_t(1)).size = + lhs.at(ff_dim_t(1)).size / lhs.at(ff_dim_t(1)).degree; + output_shape.at(ff_dim_t(2)).size = + rhs.at(ff_dim_t(2)).size / rhs.at(ff_dim_t(2)).degree; + output_shape.at(ff_dim_t(0)).is_replica_dim = true; } else { assert(false && "not supported in BatchMatmulAttrs get_output_shape"); } From 8c7395ddd38743ef2a6ccff8a2aaacd2a6dc510b Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 01:04:45 +0000 Subject: [PATCH 36/69] add batch norm --- lib/op-attrs/include/op-attrs/ops/batch_norm.h | 3 ++- lib/op-attrs/src/batch_norm.cc | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/batch_norm.h b/lib/op-attrs/include/op-attrs/ops/batch_norm.h index c35d7bcd41..6f170c92f8 100644 --- a/lib/op-attrs/include/op-attrs/ops/batch_norm.h +++ b/lib/op-attrs/include/op-attrs/ops/batch_norm.h @@ -9,10 +9,11 @@ namespace FlexFlow { struct BatchNormAttrs { req relu; - bool is_valid(ParallelTensorShape const &); }; FF_VISITABLE_STRUCT(BatchNormAttrs, relu); +bool is_valid(BatchNormAttrs const &, ParallelTensorShape const &); + ParallelTensorShape get_output_shape(BatchNormAttrs const &, ParallelTensorShape const &); diff --git a/lib/op-attrs/src/batch_norm.cc b/lib/op-attrs/src/batch_norm.cc index 526871fc46..752bf5e06c 100644 --- a/lib/op-attrs/src/batch_norm.cc +++ b/lib/op-attrs/src/batch_norm.cc @@ -2,17 +2,18 @@ namespace FlexFlow { -bool BatchNormAttrs::is_valid(ParallelTensorShape const &input) { - if (!input.is_valid()) { +bool is_valid(BatchNormAttrs const &attrs, ParallelTensorShape const &input) { + if (input.num_dims() != 4) { return false; } return true; } +// input: [b, c, h, w] +// output: [b, c, h, w] ParallelTensorShape get_output_shape(BatchNormAttrs const &attrs, ParallelTensorShape const &input) { ParallelTensorShape output_shape = input; - return output_shape; } From 1f1703c2dbbe91fc0f371a2858c2fe61cf436993 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 01:28:56 +0000 Subject: [PATCH 37/69] refine the batch_matmul --- lib/op-attrs/include/op-attrs/ops/cast.h | 1 - lib/op-attrs/include/op-attrs/ops/combine.h | 1 - lib/op-attrs/src/batch_matmul.cc | 66 +++++++++++++-------- lib/op-attrs/src/batch_norm.cc | 7 --- lib/op-attrs/src/cast.cc | 7 --- lib/op-attrs/src/combine.cc | 7 --- lib/op-attrs/src/concat.cc | 19 +++--- 7 files changed, 48 insertions(+), 60 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/cast.h b/lib/op-attrs/include/op-attrs/ops/cast.h index 39d6fe1cc1..403fcc21a6 100644 --- a/lib/op-attrs/include/op-attrs/ops/cast.h +++ b/lib/op-attrs/include/op-attrs/ops/cast.h @@ -10,7 +10,6 @@ namespace FlexFlow { struct CastAttrs { req dtype; - bool is_valid(ParallelTensorShape const &input) const; }; FF_VISITABLE_STRUCT(CastAttrs, dtype); diff --git a/lib/op-attrs/include/op-attrs/ops/combine.h b/lib/op-attrs/include/op-attrs/ops/combine.h index ffc04d4656..49bea57a38 100644 --- a/lib/op-attrs/include/op-attrs/ops/combine.h +++ b/lib/op-attrs/include/op-attrs/ops/combine.h @@ -11,7 +11,6 @@ namespace FlexFlow { struct CombineAttrs { ff_dim_t combine_dim; req combine_degree; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(CombineAttrs, combine_dim, combine_degree); CHECK_VALID_OP_ATTR(CombineAttrs); diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index 27aa81e003..209a064803 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -1,33 +1,11 @@ #include "op-attrs/ops/batch_matmul.h" #include "op-attrs/ff_dim.h" #include "op-attrs/parallel_tensor_shape.h" +#include "utils/exception.decl.h" +#include "utils/exception.h" namespace FlexFlow { -// maybe we should add more check here -//// how to get the batch size? and lhs: [b, n, m], rhs: [b, m, p] -// output: [b, n, p] //n == s1, m == s2 -//[n/] -bool is_valid(BatchMatmulAttrs const &attrs, - ParallelTensorShape const &lhs, - ParallelTensorShape const &rhs) { - if (lhs.at(ff_dim_t(0)).size != rhs.at(ff_dim_t(0)).size) { - return false; - } - if (lhs.at(ff_dim_t(2)).size != rhs.at(ff_dim_t(1)).size) { - return false; - } - if (lhs.at(ff_dim_t(1)).size != attrs.a_seq_length_dim) { - return false; - } - - if (rhs.at(ff_dim_t(2)).size != attrs.b_seq_length_dim) { - return false; - } - - return true; -} - // how to get the batch size? and lhs: [b, n, m], rhs: [b, m, p] // output: [b, n, p] //n == s1, m == s2 //[b, n/2, m], [b, m, p/2] -> [b, n/2, p/2] @@ -36,16 +14,42 @@ ParallelTensorShape get_output_shape(BatchMatmulAttrs const &attrs, ParallelTensorShape const &lhs, ParallelTensorShape const &rhs) { ParallelTensorShape output_shape = lhs; + + // check if the input is valid + if (!lhs.is_valid() || !rhs.is_valid()) { + throw mk_runtime_error( + "BatchMatmulAttrs::get_output_shape: input is invalid") + } + + if (lhs.at(ff_dim_t(0)).size != rhs.at(ff_dim_t(0)).size) { + throw mk_runtime_error( + "BatchMatmulAttrs::get_output_shape: batch size is not equal") + } + output_shape.at(ff_dim_t(0)).size = lhs.at(ff_dim_t(0)).size; // degree is 1 //[b, n, m], rhs: [b, m, p] -> [b, n, p] if (lhs.at(ff_dim_t(1)).degree == 1 && rhs.at(ff_dim_t(2)).degree == 1) { + // check if the input is valid + if (lhs.at(ff_dim_t(2)).size != rhs.at(ff_dim_t(1)).size || + lhs.at(ff_dim_t(1)).size != attrs.a_seq_length_dim || + rhs.at(ff_dim_t(2)).size != attrs.b_seq_length_dim) { + throw mk_runtime_error("BatchMatmulAttrs::get_output_shape: lhs and rhs " + "are not match when degree is 1"); + } + output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size; output_shape.at(ff_dim_t(2)).size = rhs.at(ff_dim_t(2)).size; output_shape.at(ff_dim_t(0)).is_replica_dim = false; } else if (lhs.at(ff_dim_t(1)).degree > 1 && rhs.at(ff_dim_t(2)).degree == 1) { //[b, n/x, m], [b, m, p/x] => [b, n/x, p/x] + + if (lhs.at(ff_dim_t(1)).degree != rhs.at(ff_dim_t(2)).degree) { + throw mk_runtime_error("BatchMatmulAttrs::get_output_shape: lhs.degree " + ">1 and rhs.degree == 1, but degree is not equal"); + } + output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size / lhs.at(ff_dim_t(1)).degree; output_shape.at(ff_dim_t(2)).size = @@ -54,6 +58,11 @@ ParallelTensorShape get_output_shape(BatchMatmulAttrs const &attrs, } else if (lhs.at(ff_dim_t(1)).degree == 1 && rhs.at(ff_dim_t(2)).degree > 1) { //[b, n, m/x], [b, m/x, p] => [b, n, p/x] + if (lhs.at(ff_dim_t(2)).degree != rhs.at(ff_dim_t(1)).degree) { + throw mk_runtime_error( + "BatchMatmulAttrs::get_output_shape: lhs.degree == 1 and rhs.degree " + "> 1, but degree is not equal"); + } output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size; output_shape.at(ff_dim_t(2)).size = rhs.at(ff_dim_t(2)).size / rhs.at(ff_dim_t(2)).degree; @@ -61,13 +70,20 @@ ParallelTensorShape get_output_shape(BatchMatmulAttrs const &attrs, } else if (lhs.at(ff_dim_t(1)).degree > 1 && rhs.at(ff_dim_t(2)).degree > 1) { //[b, n/x, m/y], [b, m/y, p/z] => [b, n/x, p/z] + + if (lhs.at(ff_dim_t(1)).degree != rhs.at(ff_dim_t(2)).degree) { + throw mk_runtime_error("BatchMatmulAttrs::get_output_shape: lhs.degree > " + "1 and rhs.degree > 1, but degree is not equal"); + } + output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size / lhs.at(ff_dim_t(1)).degree; output_shape.at(ff_dim_t(2)).size = rhs.at(ff_dim_t(2)).size / rhs.at(ff_dim_t(2)).degree; output_shape.at(ff_dim_t(0)).is_replica_dim = true; } else { - assert(false && "not supported in BatchMatmulAttrs get_output_shape"); + throw mk_runtime_error("BatchMatmulAttrs::get_output_shape: not supported " + "in BatchMatmulAttrs get_output_shape"); } return output_shape; } diff --git a/lib/op-attrs/src/batch_norm.cc b/lib/op-attrs/src/batch_norm.cc index 752bf5e06c..e787cf741f 100644 --- a/lib/op-attrs/src/batch_norm.cc +++ b/lib/op-attrs/src/batch_norm.cc @@ -2,13 +2,6 @@ namespace FlexFlow { -bool is_valid(BatchNormAttrs const &attrs, ParallelTensorShape const &input) { - if (input.num_dims() != 4) { - return false; - } - return true; -} - // input: [b, c, h, w] // output: [b, c, h, w] ParallelTensorShape get_output_shape(BatchNormAttrs const &attrs, diff --git a/lib/op-attrs/src/cast.cc b/lib/op-attrs/src/cast.cc index d3a6961a2e..a743d77f59 100644 --- a/lib/op-attrs/src/cast.cc +++ b/lib/op-attrs/src/cast.cc @@ -2,13 +2,6 @@ namespace FlexFlow { -bool CastAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - return true; -} - ParallelTensorShape get_output_shape(CastAttrs const &attrs, ParallelTensorShape const &input) { ParallelTensorShape output = input; diff --git a/lib/op-attrs/src/combine.cc b/lib/op-attrs/src/combine.cc index 5309e5a620..48fc6c8720 100644 --- a/lib/op-attrs/src/combine.cc +++ b/lib/op-attrs/src/combine.cc @@ -3,13 +3,6 @@ namespace FlexFlow { -bool CombineAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - return true; -} - ParallelTensorShape get_output_shape(CombineAttrs const &attrs, ParallelTensorShape const &input) { ParallelTensorShape output = input; diff --git a/lib/op-attrs/src/concat.cc b/lib/op-attrs/src/concat.cc index 39c06d07cc..2f65ac5623 100644 --- a/lib/op-attrs/src/concat.cc +++ b/lib/op-attrs/src/concat.cc @@ -1,23 +1,18 @@ #include "op-attrs/ops/concat.h" +#include "utils/exception.h" namespace FlexFlow { -bool ConcatAttrs::is_valid( - std::vector const &input) const { - bool valid = true; - for (auto p : input) { - valid &= p.is_valid(); - if (axis >= p.num_dims(())) { - return false; - } - } - return valid; -} - ParallelTensorShape get_output_shape(ConcatAttrs const &attrs, std::vector const &inputs) { ParallelTensorShape output = inputs[0]; + for (auto &i : inputs) { + if (attrs.axis >= i.num_dims() || i.is_valid() == false) { + throw mk_runtime_error("ConcatAttrs::get_output_shape: axis is out of " + "range or input is invalid"); + } + } for (auto &i : inputs) { output.at(attrs.axis).size += i.at(attrs.axis).size; } From 9406a0b310dd426ca3a0b874366dcae4c6a5799f Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 12:54:52 +0000 Subject: [PATCH 38/69] refine the batch_matmul --- .../include/op-attrs/ops/batch_matmul.h | 4 - .../include/op-attrs/ops/batch_norm.h | 2 - lib/op-attrs/include/op-attrs/ops/conv_2d.h | 1 - lib/op-attrs/src/batch_matmul.cc | 78 +++++++------------ lib/op-attrs/src/batch_norm.cc | 5 ++ lib/op-attrs/src/cast.cc | 4 + lib/op-attrs/src/conv_2d.cc | 5 ++ 7 files changed, 40 insertions(+), 59 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h index 8b545b46f3..c9d81c98e4 100644 --- a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h +++ b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h @@ -14,10 +14,6 @@ FF_VISITABLE_STRUCT(BatchMatmulAttrs, a_seq_length_dim, b_seq_length_dim); CHECK_VALID_OP_ATTR(BatchMatmulAttrs); -bool is_valid(BatchMatmulAttrs const &, - ParallelTensorShape const &, - ParallelTensorShape const &); - ParallelTensorShape get_output_shape(BatchMatmulAttrs const &, ParallelTensorShape const &, ParallelTensorShape const &); diff --git a/lib/op-attrs/include/op-attrs/ops/batch_norm.h b/lib/op-attrs/include/op-attrs/ops/batch_norm.h index 6f170c92f8..29b76d96e9 100644 --- a/lib/op-attrs/include/op-attrs/ops/batch_norm.h +++ b/lib/op-attrs/include/op-attrs/ops/batch_norm.h @@ -12,8 +12,6 @@ struct BatchNormAttrs { }; FF_VISITABLE_STRUCT(BatchNormAttrs, relu); -bool is_valid(BatchNormAttrs const &, ParallelTensorShape const &); - ParallelTensorShape get_output_shape(BatchNormAttrs const &, ParallelTensorShape const &); diff --git a/lib/op-attrs/include/op-attrs/ops/conv_2d.h b/lib/op-attrs/include/op-attrs/ops/conv_2d.h index 51da1ac91c..79233eb8fc 100644 --- a/lib/op-attrs/include/op-attrs/ops/conv_2d.h +++ b/lib/op-attrs/include/op-attrs/ops/conv_2d.h @@ -14,7 +14,6 @@ struct Conv2DAttrs { padding_w, groups; req> activation; req use_bias; - bool is_valid(ParallelTensorShape const &input) const; }; FF_VISITABLE_STRUCT(Conv2DAttrs, diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index 209a064803..c4ff074302 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -1,7 +1,6 @@ #include "op-attrs/ops/batch_matmul.h" #include "op-attrs/ff_dim.h" #include "op-attrs/parallel_tensor_shape.h" -#include "utils/exception.decl.h" #include "utils/exception.h" namespace FlexFlow { @@ -23,64 +22,39 @@ ParallelTensorShape get_output_shape(BatchMatmulAttrs const &attrs, if (lhs.at(ff_dim_t(0)).size != rhs.at(ff_dim_t(0)).size) { throw mk_runtime_error( - "BatchMatmulAttrs::get_output_shape: batch size is not equal") + "BatchMatmulAttrs::get_output_shape: batch size is not equal"); } + if (lhs.at(ff_dim_t(2)).size != rhs.at(ff_dim_t(1)).size || + lhs.at(ff_dim_t(1)).size != attrs.a_seq_length_dim || + rhs.at(ff_dim_t(2)).size != attrs.b_seq_length_dim) { + throw mk_runtime_error( + "BatchMatmulAttrs::get_output_shape: third demension of lhs and second " + "dementions of rhs are not match"); + } + output_shape.at(ff_dim_t(0)).size = lhs.at(ff_dim_t(0)).size; // batch size + output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size; + output_shape.at(ff_dim_t(2)).size = rhs.at(ff_dim_t(2)).size; - output_shape.at(ff_dim_t(0)).size = lhs.at(ff_dim_t(0)).size; - // degree is 1 - //[b, n, m], rhs: [b, m, p] -> [b, n, p] - if (lhs.at(ff_dim_t(1)).degree == 1 && rhs.at(ff_dim_t(2)).degree == 1) { - // check if the input is valid - if (lhs.at(ff_dim_t(2)).size != rhs.at(ff_dim_t(1)).size || - lhs.at(ff_dim_t(1)).size != attrs.a_seq_length_dim || - rhs.at(ff_dim_t(2)).size != attrs.b_seq_length_dim) { - throw mk_runtime_error("BatchMatmulAttrs::get_output_shape: lhs and rhs " - "are not match when degree is 1"); - } - - output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size; - output_shape.at(ff_dim_t(2)).size = rhs.at(ff_dim_t(2)).size; + if (lhs.at(ff_dim_t(1)).degree == 1 && lhs.at(ff_dim_t(2)).degree == 1) { + // case 0: degree is 1, [b, n, m], rhs: [b, m, p] -> [b, n, p] output_shape.at(ff_dim_t(0)).is_replica_dim = false; - } else if (lhs.at(ff_dim_t(1)).degree > 1 && - rhs.at(ff_dim_t(2)).degree == - 1) { //[b, n/x, m], [b, m, p/x] => [b, n/x, p/x] - - if (lhs.at(ff_dim_t(1)).degree != rhs.at(ff_dim_t(2)).degree) { - throw mk_runtime_error("BatchMatmulAttrs::get_output_shape: lhs.degree " - ">1 and rhs.degree == 1, but degree is not equal"); - } - - output_shape.at(ff_dim_t(1)).size = - lhs.at(ff_dim_t(1)).size / lhs.at(ff_dim_t(1)).degree; - output_shape.at(ff_dim_t(2)).size = - rhs.at(ff_dim_t(2)).size / rhs.at(ff_dim_t(2)).degree; - output_shape.at(ff_dim_t(0)).is_replica_dim = true; } else if (lhs.at(ff_dim_t(1)).degree == 1 && - rhs.at(ff_dim_t(2)).degree > - 1) { //[b, n, m/x], [b, m/x, p] => [b, n, p/x] - if (lhs.at(ff_dim_t(2)).degree != rhs.at(ff_dim_t(1)).degree) { - throw mk_runtime_error( - "BatchMatmulAttrs::get_output_shape: lhs.degree == 1 and rhs.degree " - "> 1, but degree is not equal"); - } - output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size; - output_shape.at(ff_dim_t(2)).size = - rhs.at(ff_dim_t(2)).size / rhs.at(ff_dim_t(2)).degree; + lhs.at(ff_dim_t(2)).degree > + 1) { // case 1: [b, n, m/x], [b, m/x, p] => [b, n, y] output_shape.at(ff_dim_t(0)).is_replica_dim = true; + output_shape.at(ff_dim_t(1)).degree = lhs.at(ff_dim_t(1)).degree; } else if (lhs.at(ff_dim_t(1)).degree > 1 && - rhs.at(ff_dim_t(2)).degree > - 1) { //[b, n/x, m/y], [b, m/y, p/z] => [b, n/x, p/z] - - if (lhs.at(ff_dim_t(1)).degree != rhs.at(ff_dim_t(2)).degree) { - throw mk_runtime_error("BatchMatmulAttrs::get_output_shape: lhs.degree > " - "1 and rhs.degree > 1, but degree is not equal"); - } - - output_shape.at(ff_dim_t(1)).size = - lhs.at(ff_dim_t(1)).size / lhs.at(ff_dim_t(1)).degree; - output_shape.at(ff_dim_t(2)).size = - rhs.at(ff_dim_t(2)).size / rhs.at(ff_dim_t(2)).degree; + lhs.at(ff_dim_t(2)).degree == + 1) { // case 2: [b, n/x, m] [b m p/x] => [b n/x p/x] + output_shape.at(ff_dim_t(0)).is_replica_dim = true; + output_shape.at(ff_dim_t(1)).degree = rhs.at(ff_dim_t(1)).degree; + output_shape.at(ff_dim_t(2)).degree = rhs.at(ff_dim_t(2)).degree; + } else if (lhs.at(ff_dim_t(1)).degree > 1 && + lhs.at(ff_dim_t(2)).degree > + 1) { // case 3: [b n/x m/y] [b m/y p/x]=> [b n/x p/x] output_shape.at(ff_dim_t(0)).is_replica_dim = true; + output_shape.at(ff_dim_t(1)).degree = lhs.at(ff_dim_t(1)).degree; + output_shape.at(ff_dim_t(2)).degree = rhs.at(ff_dim_t(2)).degree; } else { throw mk_runtime_error("BatchMatmulAttrs::get_output_shape: not supported " "in BatchMatmulAttrs get_output_shape"); diff --git a/lib/op-attrs/src/batch_norm.cc b/lib/op-attrs/src/batch_norm.cc index e787cf741f..be11ac0e13 100644 --- a/lib/op-attrs/src/batch_norm.cc +++ b/lib/op-attrs/src/batch_norm.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/batch_norm.h" +#include "utils/exception.h" namespace FlexFlow { @@ -6,6 +7,10 @@ namespace FlexFlow { // output: [b, c, h, w] ParallelTensorShape get_output_shape(BatchNormAttrs const &attrs, ParallelTensorShape const &input) { + if (!input.is_valid() || input.num_dims() != 4) { + throw mk_runtime_error( + "BatchNormAttrs::get_output_shape: input is invalid"); + } ParallelTensorShape output_shape = input; return output_shape; } diff --git a/lib/op-attrs/src/cast.cc b/lib/op-attrs/src/cast.cc index a743d77f59..7c679439ad 100644 --- a/lib/op-attrs/src/cast.cc +++ b/lib/op-attrs/src/cast.cc @@ -1,9 +1,13 @@ #include "op-attrs/ops/cast.h" +#include "utils/exception.h" namespace FlexFlow { ParallelTensorShape get_output_shape(CastAttrs const &attrs, ParallelTensorShape const &input) { + if (!input.is_valid()) { + throw mk_runtime_error("CastAttrs::get_output_shape: input is invalid"); + } ParallelTensorShape output = input; output.data_type = attrs.dtype; return output; diff --git a/lib/op-attrs/src/conv_2d.cc b/lib/op-attrs/src/conv_2d.cc index ed89b380df..ab541cbe94 100644 --- a/lib/op-attrs/src/conv_2d.cc +++ b/lib/op-attrs/src/conv_2d.cc @@ -1,6 +1,7 @@ #include "op-attrs/ops/conv_2d.h" #include "parallel_dim_mapping_record.h" #include "parallel_dim_mapping_record_solver.h" +#include "utils/exception.h" #include "utils/vector.h" namespace FlexFlow { @@ -101,6 +102,10 @@ bool Conv2DAttrs::is_valid(ParallelTensorShape const &input) const { ParallelTensorShape get_output_shape(Conv2DAttrs const &attrs, ParallelTensorShape const &input) { ParallelTensorShape output = input; + if (input.num_dims() != 4) { + throw mk_runtime_error("Conv2DAttrs::get_output_shape: input is invalid"); + } + output.at(ff_dim_t(1)).size = attrs.out_channels; output.at(ff_dim_t(2)).size = (input.at(ff_dim_t(2)).size + 2 * attrs.padding_h - attrs.kernel_h) / From 9a84d50055b04f17a24a15011a2c93a99f8676e2 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 13:02:51 +0000 Subject: [PATCH 39/69] refine the conv2d --- lib/op-attrs/src/conv_2d.cc | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/lib/op-attrs/src/conv_2d.cc b/lib/op-attrs/src/conv_2d.cc index ab541cbe94..867b4d1540 100644 --- a/lib/op-attrs/src/conv_2d.cc +++ b/lib/op-attrs/src/conv_2d.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/conv_2d.h" +#include "op-attrs/ff_dim.h" #include "parallel_dim_mapping_record.h" #include "parallel_dim_mapping_record_solver.h" #include "utils/exception.h" @@ -106,6 +107,12 @@ ParallelTensorShape get_output_shape(Conv2DAttrs const &attrs, throw mk_runtime_error("Conv2DAttrs::get_output_shape: input is invalid"); } + if (attrs.kernel_h > input.at(ff_dim_t(2)).size || + attrs.kernel_w > input.at(ff_dim_t(3)).size) { + throw mk_runtime_error( + "Conv2DAttrs::get_output_shape: kernel size is larger than input size"); + } + output.at(ff_dim_t(1)).size = attrs.out_channels; output.at(ff_dim_t(2)).size = (input.at(ff_dim_t(2)).size + 2 * attrs.padding_h - attrs.kernel_h) / @@ -115,6 +122,28 @@ ParallelTensorShape get_output_shape(Conv2DAttrs const &attrs, (input.at(ff_dim_t(3)).size + 2 * attrs.padding_w - attrs.kernel_w) / attrs.stride_w + 1; + if (input.at(ff_dim_t(2)).size == 1 && input.at(ff_dim_t(3)).size == 1) { + // case 1 input degree is 1, like 1GPU + output.at(ff_dim_t(0)).is_replica_dim = false; + } else if (input.at(ff_dim_t(2)).size > 1 && + input.at(ff_dim_t(3)).size == 1) { + // case 2: [b, input_channel, input_h/x, input_w], [output_channel, + // input_channel, kernel_h, kernel_w] => [b, output_channel, output_h/x, + // output_w] + output.at(ff_dim_t(0)).is_replica_dim = true; + output.at(ff_dim_t(2)).degree = input.at(ff_dim_t(2)).degree; + output.at(ff_dim_t(3)).degree = input.at(ff_dim_t(3)).degree; + } else if (input.at(ff_dim_t(2)).size == 1 && + input.at(ff_dim_t(3)).size > 1) { + // case 3: [b, input_channel, input_h, input_w / x] [output_channel, + // input_channel, kernel_h, kernel_w / x] => [b, output_channel, output_h, + // output_w / x] + output.at(ff_dim_t(0)).is_replica_dim = true; + output.at(ff_dim_t(3)).degree = input.at(ff_dim_t(3)).degree; + } else { + throw mk_runtime_error("Conv2DAttrs::get_output_shape: not supported in " + "Conv2DAttrs get_output_shape"); + } return output; } From 3208f5b1fb16526dd6132788381ca389a3f1baf0 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 13:04:59 +0000 Subject: [PATCH 40/69] delete the invalid --- lib/op-attrs/include/op-attrs/ops/dropout.h | 1 - lib/op-attrs/include/op-attrs/ops/element_binary.h | 2 -- lib/op-attrs/src/conv_2d.cc | 11 ----------- lib/op-attrs/src/dropout.cc | 7 ------- lib/op-attrs/src/element_binary.cc | 7 ------- 5 files changed, 28 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/dropout.h b/lib/op-attrs/include/op-attrs/ops/dropout.h index 04f244f27f..edf6db9ea8 100644 --- a/lib/op-attrs/include/op-attrs/ops/dropout.h +++ b/lib/op-attrs/include/op-attrs/ops/dropout.h @@ -10,7 +10,6 @@ namespace FlexFlow { struct DropoutAttrs { req rate; req seed; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(DropoutAttrs, rate, seed); CHECK_VALID_OP_ATTR(DropoutAttrs); diff --git a/lib/op-attrs/include/op-attrs/ops/element_binary.h b/lib/op-attrs/include/op-attrs/ops/element_binary.h index 7b731bf40f..9a2e4dc22a 100644 --- a/lib/op-attrs/include/op-attrs/ops/element_binary.h +++ b/lib/op-attrs/include/op-attrs/ops/element_binary.h @@ -14,8 +14,6 @@ struct ElementBinaryAttrs { req compute_type; req should_broadcast_lhs; req should_broadcast_rhs; - bool is_valid(ParallelTensorShape const &lhs, - ParallelTensorShape const &rhs) const; }; FF_VISITABLE_STRUCT(ElementBinaryAttrs, type, diff --git a/lib/op-attrs/src/conv_2d.cc b/lib/op-attrs/src/conv_2d.cc index 867b4d1540..08e8315952 100644 --- a/lib/op-attrs/src/conv_2d.cc +++ b/lib/op-attrs/src/conv_2d.cc @@ -83,17 +83,6 @@ std::vector return mappings; } -bool Conv2DAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - if (input.num_dims() != 4) { - return false; - } - - return true; -} - // according to pytorch, the input shape: [b, input_channel, input_h, input_w] // kernel shape: [output_channel, input_channel, kernel_h, kernel_w] // we may have stide_h and padding_h diff --git a/lib/op-attrs/src/dropout.cc b/lib/op-attrs/src/dropout.cc index bccfdb10a2..ab763b8a7f 100644 --- a/lib/op-attrs/src/dropout.cc +++ b/lib/op-attrs/src/dropout.cc @@ -3,13 +3,6 @@ namespace FlexFlow { -bool DropoutAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - return true; -} - ParallelTensorShape get_output_shape(DropoutAttrs const &attrs, ParallelTensorShape const &input) { ParallelTensorShape output = input; diff --git a/lib/op-attrs/src/element_binary.cc b/lib/op-attrs/src/element_binary.cc index 4b20ee25a9..f591d88719 100644 --- a/lib/op-attrs/src/element_binary.cc +++ b/lib/op-attrs/src/element_binary.cc @@ -2,13 +2,6 @@ namespace FlexFlow { -bool ElementBinaryAttrs::is_valid(ParallelTensorShape const &input1, - ParallelTensorShape const &input2) const { - if (!input1.is_valid() || !input2.is_valid()) { - return false; - } - return true; -} ParallelTensorShape get_output_shape(ElementBinaryAttrs const &atts, ParallelTensorShape const &lhs, From 8a6b29ea07309167d2a3e13f58b1b8e3515bd0ee Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 14:24:24 +0000 Subject: [PATCH 41/69] add gather --- .../include/op-attrs/ops/element_unary.h | 1 - lib/op-attrs/include/op-attrs/ops/embedding.h | 1 - lib/op-attrs/include/op-attrs/ops/gather.h | 1 - lib/op-attrs/src/element_binary.cc | 1 - lib/op-attrs/src/element_unary.cc | 7 ------ lib/op-attrs/src/embedding.cc | 9 ++------ lib/op-attrs/src/gather.cc | 23 ++++++++++++++++--- 7 files changed, 22 insertions(+), 21 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/element_unary.h b/lib/op-attrs/include/op-attrs/ops/element_unary.h index 562c50e4ed..d0dbc3661c 100644 --- a/lib/op-attrs/include/op-attrs/ops/element_unary.h +++ b/lib/op-attrs/include/op-attrs/ops/element_unary.h @@ -18,7 +18,6 @@ CHECK_VALID_OP_ATTR(ElementScalarUnaryAttrs); struct ElementUnaryAttrs { req op; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(ElementUnaryAttrs, op); CHECK_VALID_OP_ATTR(ElementUnaryAttrs); diff --git a/lib/op-attrs/include/op-attrs/ops/embedding.h b/lib/op-attrs/include/op-attrs/ops/embedding.h index 506b8a6186..52d22fe836 100644 --- a/lib/op-attrs/include/op-attrs/ops/embedding.h +++ b/lib/op-attrs/include/op-attrs/ops/embedding.h @@ -19,7 +19,6 @@ struct EmbeddingAttrs { req num_entries, out_channels; req aggr; req data_type; - bool is_valid(ParallelTensorShape const &input) const; }; FF_VISITABLE_STRUCT(EmbeddingAttrs, num_entries, out_channels, aggr, data_type); CHECK_VALID_OP_ATTR(EmbeddingAttrs); diff --git a/lib/op-attrs/include/op-attrs/ops/gather.h b/lib/op-attrs/include/op-attrs/ops/gather.h index 1789edf649..852dc9cd5e 100644 --- a/lib/op-attrs/include/op-attrs/ops/gather.h +++ b/lib/op-attrs/include/op-attrs/ops/gather.h @@ -10,7 +10,6 @@ namespace FlexFlow { struct GatherAttrs { ff_dim_t dim; - bool is_valid(ParallelTensorShape const &, ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(GatherAttrs, dim); CHECK_VALID_OP_ATTR(GatherAttrs); diff --git a/lib/op-attrs/src/element_binary.cc b/lib/op-attrs/src/element_binary.cc index f591d88719..c61be195c0 100644 --- a/lib/op-attrs/src/element_binary.cc +++ b/lib/op-attrs/src/element_binary.cc @@ -2,7 +2,6 @@ namespace FlexFlow { - ParallelTensorShape get_output_shape(ElementBinaryAttrs const &atts, ParallelTensorShape const &lhs, ParallelTensorShape const &rhs) { diff --git a/lib/op-attrs/src/element_unary.cc b/lib/op-attrs/src/element_unary.cc index 36e58ff263..b9028ac3b8 100644 --- a/lib/op-attrs/src/element_unary.cc +++ b/lib/op-attrs/src/element_unary.cc @@ -2,13 +2,6 @@ namespace FlexFlow { -bool ElementUnaryAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - return true; -} - ParallelTensorShape get_output_shape(ElementUnaryAttrs const &atts, ParallelTensorShape const &input) { ParallelTensorShape output = input; diff --git a/lib/op-attrs/src/embedding.cc b/lib/op-attrs/src/embedding.cc index dca6e393ef..6598146092 100644 --- a/lib/op-attrs/src/embedding.cc +++ b/lib/op-attrs/src/embedding.cc @@ -2,13 +2,6 @@ namespace FlexFlow { -bool EmbeddingAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - return true; -} - // pytorch nn.Embedding // Embedding OP: (num_embeddings, embedding_dim) (num_entries, out_channels) // Input: (batch_size, seq_len) @@ -20,3 +13,5 @@ ParallelTensorShape get_output_shape(EmbeddingAttrs const &atts, output.at(ff_dim_t(2)).size = atts.out_channels; return output; } // namespace FlexFlow + +} \ No newline at end of file diff --git a/lib/op-attrs/src/gather.cc b/lib/op-attrs/src/gather.cc index 25bfe8e516..5cdca14955 100644 --- a/lib/op-attrs/src/gather.cc +++ b/lib/op-attrs/src/gather.cc @@ -1,5 +1,6 @@ #include "op-attrs/ops/gather.h" #include "utils/exception.decl.h" +#include "utils/exceptions.h" namespace FlexFlow { @@ -17,12 +18,28 @@ bool GatherAttrs::is_valid(ParallelTensorShape const &lhs, return true; } +//https://pytorch.org/docs/stable/generated/torch.gather.html // todo: why return a vector? std::vector get_output_shapes(GatherAttrs const &attrs, - ParallelTensorShape const &lhs, - ParallelTensorShape const &rhs) { - NOT_IMPLEMENTED(); + ParallelTensorShape const & input, + ParallelTensorShape const &index) { + if(input.num_dims() != index.num_dims()) { + throw mk_runtime_error("Gather: input and index must have the same number of dimensions"); + } + + for(int i = 0; i < input.num_dims(); i++) { + if(i != attrs.dim && input.at(ff_dim_t(i)).size <= index.at(ff_dim_t(i)).size) { + throw mk_runtime_error("Gather: index.size(d) <= input.size(d) for all dimensions d != dim"); + } + } + + ParallelTensorShape output = input; + + std::vector results; + //NOTE(lambda):why return a vector? + results.push_back(output); + return results; } /* bool GatherAttrs::is_valid(ParallelTensorShape const &lhs, From a8c75ece898844407e56113af36d25daf4a08594 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 14:47:50 +0000 Subject: [PATCH 42/69] add groupy --- lib/op-attrs/include/op-attrs/ops/groupby.h | 1 - .../include/op-attrs/ops/layer_norm.h | 1 - lib/op-attrs/src/embedding.cc | 3 +- lib/op-attrs/src/gather.cc | 21 +++++++----- lib/op-attrs/src/groupby.cc | 34 +++++++++++++------ 5 files changed, 38 insertions(+), 22 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/groupby.h b/lib/op-attrs/include/op-attrs/ops/groupby.h index 702cbd2a1c..d2c1033b31 100644 --- a/lib/op-attrs/include/op-attrs/ops/groupby.h +++ b/lib/op-attrs/include/op-attrs/ops/groupby.h @@ -10,7 +10,6 @@ namespace FlexFlow { struct Group_byAttrs { req n; req alpha; - bool is_valid(ParallelTensorShape const &, ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(Group_byAttrs, n, alpha); CHECK_VALID_OP_ATTR(Group_byAttrs); diff --git a/lib/op-attrs/include/op-attrs/ops/layer_norm.h b/lib/op-attrs/include/op-attrs/ops/layer_norm.h index 15b6729262..f279b0650c 100644 --- a/lib/op-attrs/include/op-attrs/ops/layer_norm.h +++ b/lib/op-attrs/include/op-attrs/ops/layer_norm.h @@ -12,7 +12,6 @@ struct LayerNormAttrs { stack_vector axes; req elementwise_affine; req eps; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(LayerNormAttrs, axes, elementwise_affine, eps); CHECK_VALID_OP_ATTR(LayerNormAttrs); diff --git a/lib/op-attrs/src/embedding.cc b/lib/op-attrs/src/embedding.cc index 6598146092..fa8f313457 100644 --- a/lib/op-attrs/src/embedding.cc +++ b/lib/op-attrs/src/embedding.cc @@ -11,7 +11,8 @@ ParallelTensorShape get_output_shape(EmbeddingAttrs const &atts, ParallelTensorShape output = input; output.at(ff_dim_t(1)).size = input.at(ff_dim_t(1)).size; output.at(ff_dim_t(2)).size = atts.out_channels; + // output degree is same as input degree return output; } // namespace FlexFlow -} \ No newline at end of file +} // namespace FlexFlow diff --git a/lib/op-attrs/src/gather.cc b/lib/op-attrs/src/gather.cc index 5cdca14955..ed7e5abd7b 100644 --- a/lib/op-attrs/src/gather.cc +++ b/lib/op-attrs/src/gather.cc @@ -18,26 +18,29 @@ bool GatherAttrs::is_valid(ParallelTensorShape const &lhs, return true; } -//https://pytorch.org/docs/stable/generated/torch.gather.html -// todo: why return a vector? +// https://pytorch.org/docs/stable/generated/torch.gather.html +// todo: why return a vector? std::vector get_output_shapes(GatherAttrs const &attrs, - ParallelTensorShape const & input, + ParallelTensorShape const &input, ParallelTensorShape const &index) { - if(input.num_dims() != index.num_dims()) { - throw mk_runtime_error("Gather: input and index must have the same number of dimensions"); + if (input.num_dims() != index.num_dims()) { + throw mk_runtime_error( + "Gather: input and index must have the same number of dimensions"); } - for(int i = 0; i < input.num_dims(); i++) { - if(i != attrs.dim && input.at(ff_dim_t(i)).size <= index.at(ff_dim_t(i)).size) { - throw mk_runtime_error("Gather: index.size(d) <= input.size(d) for all dimensions d != dim"); + for (int i = 0; i < input.num_dims(); i++) { + if (i != attrs.dim && + input.at(ff_dim_t(i)).size <= index.at(ff_dim_t(i)).size) { + throw mk_runtime_error( + "Gather: index.size(d) <= input.size(d) for all dimensions d != dim"); } } ParallelTensorShape output = input; std::vector results; - //NOTE(lambda):why return a vector? + // NOTE(lambda):why return a vector? results.push_back(output); return results; } diff --git a/lib/op-attrs/src/groupby.cc b/lib/op-attrs/src/groupby.cc index 9315b85c39..acae02d584 100644 --- a/lib/op-attrs/src/groupby.cc +++ b/lib/op-attrs/src/groupby.cc @@ -3,18 +3,32 @@ namespace FlexFlow { -bool Group_byAttrs::is_valid(ParallelTensorShape const &lhs, - ParallelTensorShape const &rhs) const { - if (!lhs.is_valid() || !rhs.is_valid()) { - return false; - } - NOT_IMPLEMENTED(); -} +/* +import torch +data = torch.tensor([10, 20, 30, 40, 50, 60, 70, 80]) +# group index tensor group_indices +group_indices = torch.tensor([0, 1, 0, 2, 1, 2, 0, 1]) + +# groupby operator +unique_indices, unique_inverse_indices = torch.unique(group_indices, +return_inverse=True) print(f"unique_indices: {unique_indices} and +unique_inverse_indices: {unique_inverse_indices}") grouped_data = [] for i in +unique_indices: # use unique_inverse_indices group_data = +data[unique_inverse_indices == i] grouped_data.append(group_data) for i, group +in enumerate(grouped_data): print(f"Group {i}: {group}") +*/ ParallelTensorShape get_output_shape(Group_byAttrs const &attrs, - ParallelTensorShape const &lhs, - ParallelTensorShape const &rhs) { - NOT_IMPLEMENTED(); + ParallelTensorShape const &input, + ParallelTensorShape const &index) { + if (input.num_dims() != index.num_dims()) { + throw mk_runtime_error( + "Group_by: input and index must have the same number of dimensions"); + } + + ParallelTensorShape output = input; + // degree of output is same as input's + return output; } } // namespace FlexFlow From bb615bd5c37ce04789766b29f01e6240c45721c2 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 14:49:16 +0000 Subject: [PATCH 43/69] implement the layer_norm --- lib/op-attrs/src/groupby.cc | 2 +- lib/op-attrs/src/layer_norm.cc | 15 +++++---------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/lib/op-attrs/src/groupby.cc b/lib/op-attrs/src/groupby.cc index acae02d584..09babdb20d 100644 --- a/lib/op-attrs/src/groupby.cc +++ b/lib/op-attrs/src/groupby.cc @@ -1,5 +1,5 @@ #include "op-attrs/ops/groupby.h" -#include "utils/exception.decl.h" +#include "utils/exceptions.h" namespace FlexFlow { diff --git a/lib/op-attrs/src/layer_norm.cc b/lib/op-attrs/src/layer_norm.cc index 081252847a..58160b528f 100644 --- a/lib/op-attrs/src/layer_norm.cc +++ b/lib/op-attrs/src/layer_norm.cc @@ -1,21 +1,16 @@ #include "op-attrs/ops/layer_norm.h" +#include "utils/exceptions.h" namespace FlexFlow { -bool LayerNormAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - if (input.num_dims() < 2) { - return false; - } - return true; -} - // todo: maybe we need to set the degree of parallel_dim ParallelTensorShape get_output_shape(LayerNormAttrs const &attrs, ParallelTensorShape const &input) { + if (input.num_dims() < 2) { + throw mk_runtime_error("LayerNorm: input must have at least 2 dimensions"); + } ParallelTensorShape output = input; + // output degree is same as input degree return output; } From bc823f4506d955c6b3183cd3695dacf7dc808982 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 15:01:32 +0000 Subject: [PATCH 44/69] add linear --- lib/op-attrs/include/op-attrs/ops/linear.h | 1 - lib/op-attrs/src/linear.cc | 43 ++++++++++++++++++---- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/linear.h b/lib/op-attrs/include/op-attrs/ops/linear.h index 54a3864e8d..e696bb9fd0 100644 --- a/lib/op-attrs/include/op-attrs/ops/linear.h +++ b/lib/op-attrs/include/op-attrs/ops/linear.h @@ -29,7 +29,6 @@ struct LinearAttrs { req data_type; req activation; req> regularizer; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT( LinearAttrs, out_channels, use_bias, data_type, activation, regularizer); diff --git a/lib/op-attrs/src/linear.cc b/lib/op-attrs/src/linear.cc index bae30a8ebd..ef7832773a 100644 --- a/lib/op-attrs/src/linear.cc +++ b/lib/op-attrs/src/linear.cc @@ -1,23 +1,50 @@ #include "op-attrs/ops/linear.h" #include "op-attrs/ff_dim.h" +#include "utils/exception.decl.h" +#include "utils/exception.h" namespace FlexFlow { -bool LinearAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - return true; -} - +//https://pytorch.org/docs/stable/generated/torch.nn.Linear.html +//torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None) // pytorch: input shape:{batch_size, input_channels} // pytorch linearattrs: should be {input_channels, output_channels} // pytorch: output shape:{batch_size, output_channels} // question: the Linearattrs doesn't have input_channels ParallelTensorShape get_output_shape(LinearAttrs const &atts, ParallelTensorShape const &input) { + ParallelTensorShape out_shape = input; - out_shape.at(ff_dim_t(0)).size = atts.out_channels; + if(input.num_dims() != 2) { + throw mk_runtime_error("LinearAttrs: input shape should be 2D"); + } + + out_shape.at(ff_dim_t(1)).size = atts.out_channels; + //linear shoud consider the degree + //case 1: input:[N, K], weight:[K, M], degree is 1 + if(input.at(ff_dim_t(0)).degree == 1 && input.at(ff_dim_t(1)).degree == 1 ) { + out_shape.at(ff_dim_t(0)).degree = 1; + for(int i = 0; i < input.num_dims(); i++) { + out_shape.at(ff_dim_t(i)).is_replica_dim = false; + out_shape.at(ff_dim_t(i)).degree = 1; + } + } else if(input.at(ff_dim_t(0)).degree == 1 && input.at(ff_dim_t(1)).degree > 1) { + //case 2: input [N, k/x], weight [k/x, M], output [N, M], degree is x + out_shape.at(ff_dim_t(1)).degree = input.at(ff_dim_t(1)).degree; + out_shape.at(ff_dim_t(1)).is_replica_dim = true; + } else if(input.at(ff_dim_t(0)).degree > 1 && input.at(ff_dim_t(1)).degree == 1) { + //case 3: input [N/X, K], weight [K, M/X], output [N/X, M], degree is X + out_shape.at(ff_dim_t(0)).degree = input.at(ff_dim_t(0)).degree; + out_shape.at(ff_dim_t(0)).is_replica_dim = true; + } else if(input.at(ff_dim_t(0)).degree > 1 && input.at(ff_dim_t(1)).degree > 1) { + //case 4: input [N/X, K/Y], weight [K/Y, M/X], output [N/X, M/X], degree is X + for(int i = 0; i < input.num_dims(); i++) { + out_shape.at(ff_dim_t(i)).is_replica_dim = true; + out_shape.at(ff_dim_t(i)).degree = input.at(ff_dim_t(i)).degree; + } + } else { + throw mk_runtime_error("LinearAttrs: degree is not supported"); + } return out_shape; } From f2a50e39b65a4445b21674d6fc06673b33e840f6 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 15:14:27 +0000 Subject: [PATCH 45/69] add pool2d --- lib/op-attrs/src/linear.cc | 44 ++++++++++++++------------- lib/op-attrs/src/pool_2d.cc | 59 ++++++++++++++++++++++++++++++++----- 2 files changed, 75 insertions(+), 28 deletions(-) diff --git a/lib/op-attrs/src/linear.cc b/lib/op-attrs/src/linear.cc index ef7832773a..3bb8e0f3ae 100644 --- a/lib/op-attrs/src/linear.cc +++ b/lib/op-attrs/src/linear.cc @@ -1,44 +1,48 @@ #include "op-attrs/ops/linear.h" #include "op-attrs/ff_dim.h" -#include "utils/exception.decl.h" #include "utils/exception.h" namespace FlexFlow { -//https://pytorch.org/docs/stable/generated/torch.nn.Linear.html -//torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None) -// pytorch: input shape:{batch_size, input_channels} -// pytorch linearattrs: should be {input_channels, output_channels} -// pytorch: output shape:{batch_size, output_channels} -// question: the Linearattrs doesn't have input_channels +// https://pytorch.org/docs/stable/generated/torch.nn.Linear.html +// torch.nn.Linear(in_features, out_features, bias=True, device=None, +// dtype=None) +// pytorch: input shape:{batch_size, input_channels} +// pytorch linearattrs: should be {input_channels, output_channels} +// pytorch: output shape:{batch_size, output_channels} +// question: the Linearattrs doesn't have input_channels ParallelTensorShape get_output_shape(LinearAttrs const &atts, ParallelTensorShape const &input) { - + ParallelTensorShape out_shape = input; - if(input.num_dims() != 2) { + if (input.num_dims() != 2) { throw mk_runtime_error("LinearAttrs: input shape should be 2D"); } out_shape.at(ff_dim_t(1)).size = atts.out_channels; - //linear shoud consider the degree - //case 1: input:[N, K], weight:[K, M], degree is 1 - if(input.at(ff_dim_t(0)).degree == 1 && input.at(ff_dim_t(1)).degree == 1 ) { + // linear shoud consider the degree + // case 1: input:[N, K], weight:[K, M], degree is 1 + if (input.at(ff_dim_t(0)).degree == 1 && input.at(ff_dim_t(1)).degree == 1) { out_shape.at(ff_dim_t(0)).degree = 1; - for(int i = 0; i < input.num_dims(); i++) { + for (int i = 0; i < input.num_dims(); i++) { out_shape.at(ff_dim_t(i)).is_replica_dim = false; out_shape.at(ff_dim_t(i)).degree = 1; } - } else if(input.at(ff_dim_t(0)).degree == 1 && input.at(ff_dim_t(1)).degree > 1) { - //case 2: input [N, k/x], weight [k/x, M], output [N, M], degree is x + } else if (input.at(ff_dim_t(0)).degree == 1 && + input.at(ff_dim_t(1)).degree > 1) { + // case 2: input [N, k/x], weight [k/x, M], output [N, M], degree is x out_shape.at(ff_dim_t(1)).degree = input.at(ff_dim_t(1)).degree; out_shape.at(ff_dim_t(1)).is_replica_dim = true; - } else if(input.at(ff_dim_t(0)).degree > 1 && input.at(ff_dim_t(1)).degree == 1) { - //case 3: input [N/X, K], weight [K, M/X], output [N/X, M], degree is X + } else if (input.at(ff_dim_t(0)).degree > 1 && + input.at(ff_dim_t(1)).degree == 1) { + // case 3: input [N/X, K], weight [K, M/X], output [N/X, M], degree is X out_shape.at(ff_dim_t(0)).degree = input.at(ff_dim_t(0)).degree; out_shape.at(ff_dim_t(0)).is_replica_dim = true; - } else if(input.at(ff_dim_t(0)).degree > 1 && input.at(ff_dim_t(1)).degree > 1) { - //case 4: input [N/X, K/Y], weight [K/Y, M/X], output [N/X, M/X], degree is X - for(int i = 0; i < input.num_dims(); i++) { + } else if (input.at(ff_dim_t(0)).degree > 1 && + input.at(ff_dim_t(1)).degree > 1) { + // case 4: input [N/X, K/Y], weight [K/Y, M/X], output [N/X, M/X], degree is + // X + for (int i = 0; i < input.num_dims(); i++) { out_shape.at(ff_dim_t(i)).is_replica_dim = true; out_shape.at(ff_dim_t(i)).degree = input.at(ff_dim_t(i)).degree; } diff --git a/lib/op-attrs/src/pool_2d.cc b/lib/op-attrs/src/pool_2d.cc index 6d58210b6a..65754de6e3 100644 --- a/lib/op-attrs/src/pool_2d.cc +++ b/lib/op-attrs/src/pool_2d.cc @@ -2,6 +2,7 @@ #include "op-attrs/ff_dim.h" #include "parallel_dim_mapping_record.h" #include "parallel_dim_mapping_record_solver.h" +#include "utils/exception.h" namespace FlexFlow { @@ -47,15 +48,21 @@ bool Pool2DAttrs::is_valid(ParallelTensorShape const &input) const { return true; } -// pytorch: we have two type of pool2d, maxpool2d and avgpool2d -// input shape: (batch_size, channels, input_height, input_width) -// for avgpool2d, output shape: (batch_size, channels, 1, 1) -// for maxpool2d, output shape: (batch_size, channels, output_height, -// output_width) output_height = (input_height + 2 * padding_h - kernel_h) / -// stride_h + 1 output_width = (input_width + 2 * padding_w - kernel_w) / -// stride_w + 1 +// https://pytorch.org/docs/stable/generated/torch.nn.AvgPool2d.html +// https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html +// pytorch: we have two type of pool2d, maxpool2d and avgpool2d +// input shape: (batch_size, channels, input_height, input_width) +// for avgpool2d, output shape: (batch_size, channels, 1, 1) +// for maxpool2d, output shape: (batch_size, channels, output_height, +// output_width) output_height = (input_height + 2 * padding_h - kernel_h) / +// stride_h + 1 output_width = (input_width + 2 * padding_w - kernel_w) / +// stride_w + 1 ParallelTensorShape get_output_shape(Pool2DAttrs const &attrs, ParallelTensorShape const &input) { + + if (input.num_dims() != 4) { + throw mk_runtime_error("Pool2DAttrs: input shape should be 4D"); + } ParallelTensorShape output_shape = input; if (attrs.pool_type == PoolOp::AVG) { output_shape.at(ff_dim_t(2)).size = 1; @@ -70,8 +77,44 @@ ParallelTensorShape get_output_shape(Pool2DAttrs const &attrs, attrs.stride_w + 1; } else { - assert(false && "unsupported pool type"); + throw mk_runtime_error("Pool2DAttrs: pool type is not supported"); + } + + // case 1: input:[N, C, H, W], output:[N, C, 1, 1], degree is 1 for avgpool2d + // input: [N, C, H, W], output: [N, C, output_height, output_width], degree is 1 for maxpool2d + if (input.at(ff_dim_t(2)).degree == 1 && input.at(ff_dim_t(3)).degree == 1) { + for (int i = 2; i < input.num_dims(); i++) { + output_shape.at(ff_dim_t(i)).is_replica_dim = false; + output_shape.at(ff_dim_t(i)).degree = 1; + } + } else if (input.at(ff_dim_t(2)).degree > 1 && + input.at(ff_dim_t(3)).degree == 1) { + // case 2: input [N, C, H/X, W] output [N, C, 1, 1], degree is X + // input [N, C, H/X, W] output [N, C, output_height/x, output_width], degree is X + output_shape.at(ff_dim_t(2)).degree = input.at(ff_dim_t(2)).degree; + output_shape.at(ff_dim_t(2)).is_replica_dim = true; + output_shape.at(ff_dim_t(3)).degree = 1; + output_shape.at(ff_dim_t(3)).is_replica_dim = false; + } else if (input.at(ff_dim_t(2)).degree == 1 && + input.at(ff_dim_t(3)).degree > 1) { + // case 3: input [N, C, H, W/X] output [N, C, 1, 1], degree is X + // input [N, C, H, W/X] output [N, C, output_height, output_width/x], degree is X + output_shape.at(ff_dim_t(2)).degree = 1; + output_shape.at(ff_dim_t(2)).is_replica_dim = false; + output_shape.at(ff_dim_t(3)).degree = input.at(ff_dim_t(3)).degree; + output_shape.at(ff_dim_t(3)).is_replica_dim = true; + } else if (input.at(ff_dim_t(2)).degree > 1 && + input.at(ff_dim_t(3)).degree > 1) { + // case 4: input [N, C, H/X, W/Y] output [N, C, 1, 1], degree is X and Y for + // avgpool2d input [N, C, H/X, W/Y] output [N, C, output_height/x, output_width/y], degree is X and Y for maxpool2d + for (int i = 2; i < input.num_dims(); i++) { + output_shape.at(ff_dim_t(i)).is_replica_dim = true; + output_shape.at(ff_dim_t(i)).degree = input.at(ff_dim_t(i)).degree; + } + } else { + throw mk_runtime_error("Pool2DAttrs: degree is not supported"); } + return output_shape; } From 2408b37046c975500c0407dd7ba2784f736f41cd Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 15:17:09 +0000 Subject: [PATCH 46/69] add repartition --- lib/op-attrs/include/op-attrs/ops/reduce.h | 1 - lib/op-attrs/include/op-attrs/ops/reduction.h | 1 - .../include/op-attrs/ops/repartition.h | 1 - lib/op-attrs/src/reduction.cc | 7 ----- lib/op-attrs/src/repartition.cc | 26 ++++++------------- 5 files changed, 8 insertions(+), 28 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/reduce.h b/lib/op-attrs/include/op-attrs/ops/reduce.h index c18d4cd888..96827a83cc 100644 --- a/lib/op-attrs/include/op-attrs/ops/reduce.h +++ b/lib/op-attrs/include/op-attrs/ops/reduce.h @@ -14,7 +14,6 @@ struct ReduceAttrs { stack_vector axes; req op_type; req keepdims; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(ReduceAttrs, axes, op_type, keepdims); CHECK_VALID_OP_ATTR(ReduceAttrs); diff --git a/lib/op-attrs/include/op-attrs/ops/reduction.h b/lib/op-attrs/include/op-attrs/ops/reduction.h index a8e7abd318..70f268c97d 100644 --- a/lib/op-attrs/include/op-attrs/ops/reduction.h +++ b/lib/op-attrs/include/op-attrs/ops/reduction.h @@ -11,7 +11,6 @@ namespace FlexFlow { struct ReductionAttrs { ff_dim_t reduction_dim; req reduction_degree; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(ReductionAttrs, reduction_dim, reduction_degree); CHECK_VALID_OP_ATTR(ReductionAttrs); diff --git a/lib/op-attrs/include/op-attrs/ops/repartition.h b/lib/op-attrs/include/op-attrs/ops/repartition.h index a795017bf4..8abdc6eb1c 100644 --- a/lib/op-attrs/include/op-attrs/ops/repartition.h +++ b/lib/op-attrs/include/op-attrs/ops/repartition.h @@ -11,7 +11,6 @@ namespace FlexFlow { struct RepartitionAttrs { ff_dim_t repartition_dim; req repartition_degree; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(RepartitionAttrs, repartition_dim, repartition_degree); CHECK_VALID_OP_ATTR(RepartitionAttrs); diff --git a/lib/op-attrs/src/reduction.cc b/lib/op-attrs/src/reduction.cc index 9196000a05..6336e15253 100644 --- a/lib/op-attrs/src/reduction.cc +++ b/lib/op-attrs/src/reduction.cc @@ -10,13 +10,6 @@ namespace FlexFlow { /* return output; */ /* } */ -bool ReductionAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - return true; -} - ParallelTensorShape get_output_shape(ReductionAttrs const &attrs, ParallelTensorShape const &input_shape) { ParallelTensorShape output(input_shape.dims, input_shape.data_type); diff --git a/lib/op-attrs/src/repartition.cc b/lib/op-attrs/src/repartition.cc index b5a0280d85..292d90d2e2 100644 --- a/lib/op-attrs/src/repartition.cc +++ b/lib/op-attrs/src/repartition.cc @@ -1,27 +1,17 @@ #include "op-attrs/ops/repartition.h" #include "op-attrs/parallel_dim.h" +#include "utils/exception.h" namespace FlexFlow { -/* bool RepartitionAttrs::is_valid(ParallelTensorShape const &input_shape) const - * { */ -/* ParallelDim dim = input_shape.at(this->repartition_legion_dim); */ -/* return (dim.size % this->repartition_degree * dim.degree == 0); */ -/* } */ - -bool RepartitionAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - ParallelDim dim = input.at(this->repartition_dim); - return (dim.size % this->repartition_degree * dim.degree == 0); -} - -// this may be wrong partition by n multiplies degree by n and keeps shape the -// same +// this may be wrong partition by n multiplies degree by n and keeps shape the same ParallelTensorShape get_output_shape(RepartitionAttrs const &attrs, - ParallelTensorShape const &input_shape) { - ParallelTensorShape output(input_shape.dims, input_shape.data_type); + ParallelTensorShape const &input) { + ParallelDim dim = input.at(attrs.repartition_dim); + if(dim.size % attrs.repartition_degree * dim.degree != 0) { + throw mk_runtime_error("RepartitionAttrs: input.at(attrs.repartition_dim) % attrs.repartition_degree * dim.degree != 0"); + } + ParallelTensorShape output(input.dims, input.data_type); output.at(attrs.repartition_dim).degree *= attrs.repartition_degree; return output; } From 9c93f0751219a74fb43282eb78442e66fbfc171a Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 15:24:13 +0000 Subject: [PATCH 47/69] remove aggregate --- .../src/cuda/aggregate_spec_kernels.cu | 176 +++++++----------- lib/op-attrs/include/op-attrs/ops/reshape.h | 1 - 2 files changed, 72 insertions(+), 105 deletions(-) diff --git a/lib/kernels/src/cuda/aggregate_spec_kernels.cu b/lib/kernels/src/cuda/aggregate_spec_kernels.cu index d46dc64567..ed4f656131 100644 --- a/lib/kernels/src/cuda/aggregate_spec_kernels.cu +++ b/lib/kernels/src/cuda/aggregate_spec_kernels.cu @@ -12,12 +12,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "kernels/aggregate_spec_kernels.h" #include "kernels/cuda_helper.h" - namespace FlexFlow { - AggregateSpecPerDeviceState::AggregateSpecPerDeviceState(FFHandler handler, int n) : PerDeviceOpState(handler) { @@ -26,10 +23,80 @@ AggregateSpecPerDeviceState::AggregateSpecPerDeviceState(FFHandler handler, AggregateSpecPerDeviceState::~AggregateSpecPerDeviceState(void) { checkCUDA(cudaFree(&dev_region_ptrs)); } - namespace Kernels { namespace AggregateSpec { +void forward_kernel(cudaStream_t stream, + AggregateSpecPerDeviceState const *m, + float **exp_preds, + int const *acc_gate_assign_ptr, + float *acc_output_ptr, + int n, + int const k, + int rows, + int const batch_size, + int out_dim) { + + checkCUDA(cublasSetStream(m->handle.blas, stream)); + checkCUDNN(cudnnSetStream(m->handle.dnn, stream)); + + // call forward kernel + cudaMemcpy(m->dev_region_ptrs, + exp_preds, + n * sizeof(float *), + cudaMemcpyHostToDevice); + + aggspec_forward_kernel<<>>(m->dev_region_ptrs, + acc_gate_assign_ptr, + acc_output_ptr, + n, + k, + rows, + batch_size, + out_dim); +} +void backward_kernel(cudaStream_t stream, + AggregateSpecPerDeviceState const *m, + float **exp_grads, + int const *acc_gate_assign_ptr, + int const *acc_true_gate_assign_ptr, + float const *acc_gate_pred_ptr, + float *acc_full_gate_grad_ptr, + float const *acc_output_grad_ptr, + int n, + int const k, + int rows, + float lambda_bal, + int const batch_size, + int out_dim) { + checkCUDA(cublasSetStream(m->handle.blas, stream)); + checkCUDNN(cudnnSetStream(m->handle.dnn, stream)); + // call backward kernel + cudaMemcpy(m->dev_region_ptrs, + exp_grads, + n * sizeof(float *), + cudaMemcpyHostToDevice); + aggspec_backward_kernel<<>>(m->dev_region_ptrs, + acc_gate_assign_ptr, + acc_true_gate_assign_ptr, + acc_gate_pred_ptr, + acc_full_gate_grad_ptr, + acc_output_grad_ptr, + n, + k, + rows, + lambda_bal, + batch_size, + out_dim); +} __global__ void aggspec_forward_kernel(float **exp_preds, int const *exp_assign, @@ -41,7 +108,6 @@ __global__ void int out_dim) { __shared__ float *chosen_exp_preds[AGGREGATE_SPEC_MAX_K * AGGREGATE_SPEC_MAX_BATCH_SIZE]; - // Get pred pointers, single thread per block if (threadIdx.x == 0) { int expert_idx[AGGREGATE_SPEC_MAX_N] = {0}; @@ -60,9 +126,7 @@ __global__ void } } } - __syncthreads(); - // compute output CUDA_KERNEL_LOOP(i, k * batch_size * out_dim) { if (chosen_exp_preds[i / out_dim] != 0) { @@ -72,7 +136,6 @@ __global__ void } } } - __device__ void aggspec_backward_kernel_gate(float const *output_grad, float *full_gate_grads, int const *expert_assign, @@ -84,16 +147,12 @@ __device__ void aggspec_backward_kernel_gate(float const *output_grad, int k, int n, int out_dim) { - __shared__ float gate_grad_sum[AGGREGATE_SPEC_MAX_BATCH_SIZE]; - // init gate_grad_sum to 0 CUDA_KERNEL_LOOP(i, batch_size) { gate_grad_sum[i] = 0.0f; } - __syncthreads(); - // get sum of expert errors /* NOTE: Errors just squared L2 norm of gradients. * batch_size because the expert gradients are /= batch_size and then it would be /= batch_size^2 here @@ -108,7 +167,6 @@ __device__ void aggspec_backward_kernel_gate(float const *output_grad, atomicAdd(gate_grad_sum + i / (k * out_dim), res); } } - // Compute gate gradients: // Assigned expert i, sample j: pred(i,j) - err_(i,j)/sum_l err(l,j) __syncthreads(); @@ -118,15 +176,12 @@ __device__ void aggspec_backward_kernel_gate(float const *output_grad, full_gate_grads[i / k * n + expert_assign[i]] -= (1.0f - gate_pred[i]); } } - // balance term __syncthreads(); CUDA_KERNEL_LOOP(i, n * batch_size) { full_gate_grads[i] += lambda_bal * expert_bal[i % n]; } - __syncthreads(); - // make 0 mean CUDA_KERNEL_LOOP(i, n * batch_size) { int start = (i / n) * n; @@ -136,7 +191,6 @@ __device__ void aggspec_backward_kernel_gate(float const *output_grad, } } } - __device__ void aggspec_backward_kernel_exp(float const *output_grad, float const *gate_preds, float **exp_grads, @@ -151,7 +205,6 @@ __device__ void aggspec_backward_kernel_exp(float const *output_grad, } } } - __global__ void aggspec_backward_kernel(float **exp_grads, int const *exp_assign, @@ -169,7 +222,6 @@ __global__ void *chosen_exp_grads[AGGREGATE_SPEC_MAX_K * AGGREGATE_SPEC_MAX_BATCH_SIZE]; __shared__ int expert_bal[AGGREGATE_SPEC_MAX_N]; __shared__ bool cache_corr[AGGREGATE_SPEC_MAX_BATCH_SIZE]; - // Get pred pointers, single thread per block if (threadIdx.x == 0) { // init arrays @@ -179,7 +231,6 @@ __global__ void for (int i = 0; i < batch_size; i++) { cache_corr[i] = true; } - // Get pointer to chosen expert grads and expert counts for (int i = 0; i < batch_size; i++) { for (int j = 0; j < k; j++) { @@ -199,14 +250,11 @@ __global__ void } } } - __syncthreads(); - // NOTE: These 2 functions could execute independently in parallel // get expert gradients aggspec_backward_kernel_exp( output_grads, gating_net_preds, chosen_exp_grads, batch_size, k, out_dim); - // get gating net gradients aggspec_backward_kernel_gate(output_grads, full_gating_grads, @@ -219,84 +267,4 @@ __global__ void k, n, out_dim); -} - -void forward_kernel(cudaStream_t stream, - AggregateSpecPerDeviceState const *m, - float **exp_preds, - int const *acc_gate_assign_ptr, - float *acc_output_ptr, - int n, - int const k, - int rows, - int const batch_size, - int out_dim) { - - checkCUDA(cublasSetStream(m->handle.blas, stream)); - checkCUDNN(cudnnSetStream(m->handle.dnn, stream)); - - // call forward kernel - cudaMemcpy(m->dev_region_ptrs, - exp_preds, - n * sizeof(float *), - cudaMemcpyHostToDevice); - - aggspec_forward_kernel<<>>(m->dev_region_ptrs, - acc_gate_assign_ptr, - acc_output_ptr, - n, - k, - rows, - batch_size, - out_dim); -} - -void backward_kernel(cudaStream_t stream, - AggregateSpecPerDeviceState const *m, - float **exp_grads, - int const *acc_gate_assign_ptr, - int const *acc_true_gate_assign_ptr, - float const *acc_gate_pred_ptr, - float *acc_full_gate_grad_ptr, - float const *acc_output_grad_ptr, - int n, - int const k, - int rows, - float lambda_bal, - int const batch_size, - int out_dim) { - - checkCUDA(cublasSetStream(m->handle.blas, stream)); - checkCUDNN(cudnnSetStream(m->handle.dnn, stream)); - - // call backward kernel - cudaMemcpy(m->dev_region_ptrs, - exp_grads, - n * sizeof(float *), - cudaMemcpyHostToDevice); - - aggspec_backward_kernel<<>>(m->dev_region_ptrs, - acc_gate_assign_ptr, - acc_true_gate_assign_ptr, - acc_gate_pred_ptr, - acc_full_gate_grad_ptr, - acc_output_grad_ptr, - n, - k, - rows, - lambda_bal, - batch_size, - out_dim); -} - -} // namespace AggregateSpec -} // namespace Kernels -} // namespace FlexFlow +} \ No newline at end of file diff --git a/lib/op-attrs/include/op-attrs/ops/reshape.h b/lib/op-attrs/include/op-attrs/ops/reshape.h index 7fbe573c93..78b9806fe7 100644 --- a/lib/op-attrs/include/op-attrs/ops/reshape.h +++ b/lib/op-attrs/include/op-attrs/ops/reshape.h @@ -10,7 +10,6 @@ namespace FlexFlow { struct ReshapeAttrs { TensorShape shape; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(ReshapeAttrs, shape); CHECK_VALID_OP_ATTR(ReshapeAttrs); From 7377bee136b081af9640d115454be08ff5d85599 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 15:24:46 +0000 Subject: [PATCH 48/69] remove aggregate --- .../src/cuda/aggregate_spec_kernels.cu | 270 ------------------ 1 file changed, 270 deletions(-) delete mode 100644 lib/kernels/src/cuda/aggregate_spec_kernels.cu diff --git a/lib/kernels/src/cuda/aggregate_spec_kernels.cu b/lib/kernels/src/cuda/aggregate_spec_kernels.cu deleted file mode 100644 index ed4f656131..0000000000 --- a/lib/kernels/src/cuda/aggregate_spec_kernels.cu +++ /dev/null @@ -1,270 +0,0 @@ -/* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "kernels/aggregate_spec_kernels.h" -#include "kernels/cuda_helper.h" -namespace FlexFlow { -AggregateSpecPerDeviceState::AggregateSpecPerDeviceState(FFHandler handler, - int n) - : PerDeviceOpState(handler) { - checkCUDA(cudaMalloc(&dev_region_ptrs, n * sizeof(float *))); -} -AggregateSpecPerDeviceState::~AggregateSpecPerDeviceState(void) { - checkCUDA(cudaFree(&dev_region_ptrs)); -} -namespace Kernels { -namespace AggregateSpec { - -void forward_kernel(cudaStream_t stream, - AggregateSpecPerDeviceState const *m, - float **exp_preds, - int const *acc_gate_assign_ptr, - float *acc_output_ptr, - int n, - int const k, - int rows, - int const batch_size, - int out_dim) { - - checkCUDA(cublasSetStream(m->handle.blas, stream)); - checkCUDNN(cudnnSetStream(m->handle.dnn, stream)); - - // call forward kernel - cudaMemcpy(m->dev_region_ptrs, - exp_preds, - n * sizeof(float *), - cudaMemcpyHostToDevice); - - aggspec_forward_kernel<<>>(m->dev_region_ptrs, - acc_gate_assign_ptr, - acc_output_ptr, - n, - k, - rows, - batch_size, - out_dim); -} -void backward_kernel(cudaStream_t stream, - AggregateSpecPerDeviceState const *m, - float **exp_grads, - int const *acc_gate_assign_ptr, - int const *acc_true_gate_assign_ptr, - float const *acc_gate_pred_ptr, - float *acc_full_gate_grad_ptr, - float const *acc_output_grad_ptr, - int n, - int const k, - int rows, - float lambda_bal, - int const batch_size, - int out_dim) { - checkCUDA(cublasSetStream(m->handle.blas, stream)); - checkCUDNN(cudnnSetStream(m->handle.dnn, stream)); - // call backward kernel - cudaMemcpy(m->dev_region_ptrs, - exp_grads, - n * sizeof(float *), - cudaMemcpyHostToDevice); - aggspec_backward_kernel<<>>(m->dev_region_ptrs, - acc_gate_assign_ptr, - acc_true_gate_assign_ptr, - acc_gate_pred_ptr, - acc_full_gate_grad_ptr, - acc_output_grad_ptr, - n, - k, - rows, - lambda_bal, - batch_size, - out_dim); -} -__global__ void - aggspec_forward_kernel(float **exp_preds, - int const *exp_assign, - float *output, - int n, // num experts - int const k, // num chosen experts - int exp_samples, // max samples per expert - int const batch_size, - int out_dim) { - __shared__ float - *chosen_exp_preds[AGGREGATE_SPEC_MAX_K * AGGREGATE_SPEC_MAX_BATCH_SIZE]; - // Get pred pointers, single thread per block - if (threadIdx.x == 0) { - int expert_idx[AGGREGATE_SPEC_MAX_N] = {0}; - for (int i = 0; i < batch_size; i++) { - for (int j = 0; j < k; j++) { - // Get pointer to chosen expert predictions - int expert = exp_assign[i * k + j]; - if (expert_idx[expert] >= exp_samples) { - // dropped sample - chosen_exp_preds[i * k + j] = 0; - continue; - } - chosen_exp_preds[i * k + j] = - exp_preds[expert] + expert_idx[expert] * out_dim; - expert_idx[expert]++; - } - } - } - __syncthreads(); - // compute output - CUDA_KERNEL_LOOP(i, k * batch_size * out_dim) { - if (chosen_exp_preds[i / out_dim] != 0) { - output[i] = chosen_exp_preds[i / out_dim][i % out_dim]; - } else { - output[i] = 0.0f; - } - } -} -__device__ void aggspec_backward_kernel_gate(float const *output_grad, - float *full_gate_grads, - int const *expert_assign, - bool const *cache_corr, - float const *gate_pred, - int *expert_bal, - float lambda_bal, - int batch_size, - int k, - int n, - int out_dim) { - __shared__ float gate_grad_sum[AGGREGATE_SPEC_MAX_BATCH_SIZE]; - // init gate_grad_sum to 0 - CUDA_KERNEL_LOOP(i, batch_size) { - gate_grad_sum[i] = 0.0f; - } - __syncthreads(); - // get sum of expert errors - /* NOTE: Errors just squared L2 norm of gradients. * batch_size because the - expert gradients are /= batch_size and then it would be /= batch_size^2 here -*/ - CUDA_KERNEL_LOOP(i, batch_size * k * out_dim) { - if (cache_corr[i / (k * out_dim)]) { - float res = output_grad[i] * output_grad[i] * batch_size; - float *gate_grad_idx = - full_gate_grads + (i / (out_dim * k)) * n + - expert_assign[(i / (out_dim * k)) * k + (i / out_dim) % k]; - atomicAdd(gate_grad_idx, res); - atomicAdd(gate_grad_sum + i / (k * out_dim), res); - } - } - // Compute gate gradients: - // Assigned expert i, sample j: pred(i,j) - err_(i,j)/sum_l err(l,j) - __syncthreads(); - CUDA_KERNEL_LOOP(i, k * batch_size) { - if (cache_corr[i / k]) { - full_gate_grads[i / k * n + expert_assign[i]] /= gate_grad_sum[i / k]; - full_gate_grads[i / k * n + expert_assign[i]] -= (1.0f - gate_pred[i]); - } - } - // balance term - __syncthreads(); - CUDA_KERNEL_LOOP(i, n * batch_size) { - full_gate_grads[i] += lambda_bal * expert_bal[i % n]; - } - __syncthreads(); - // make 0 mean - CUDA_KERNEL_LOOP(i, n * batch_size) { - int start = (i / n) * n; - float sub = -full_gate_grads[i] / n; - for (int j = 0; j < n; j++) { - atomicAdd(full_gate_grads + start + j, sub); - } - } -} -__device__ void aggspec_backward_kernel_exp(float const *output_grad, - float const *gate_preds, - float **exp_grads, - int batch_size, - int k, - int out_dim) { - // compute expert gradients - CUDA_KERNEL_LOOP(i, k * out_dim * batch_size) { - if (exp_grads[i / out_dim] != 0) { - exp_grads[i / out_dim][i % out_dim] += - gate_preds[i / out_dim] * output_grad[i]; - } - } -} -__global__ void - aggspec_backward_kernel(float **exp_grads, - int const *exp_assign, - int const *true_exp_assign, - float const *gating_net_preds, - float *full_gating_grads, - float const *output_grads, - int n, // num experts - int k, // num chosen experts - int exp_samples, // max samples per expert - float lambda_bal, - int batch_size, - int out_dim) { - __shared__ float - *chosen_exp_grads[AGGREGATE_SPEC_MAX_K * AGGREGATE_SPEC_MAX_BATCH_SIZE]; - __shared__ int expert_bal[AGGREGATE_SPEC_MAX_N]; - __shared__ bool cache_corr[AGGREGATE_SPEC_MAX_BATCH_SIZE]; - // Get pred pointers, single thread per block - if (threadIdx.x == 0) { - // init arrays - for (int i = 0; i < n; i++) { - expert_bal[i] = 0; - } - for (int i = 0; i < batch_size; i++) { - cache_corr[i] = true; - } - // Get pointer to chosen expert grads and expert counts - for (int i = 0; i < batch_size; i++) { - for (int j = 0; j < k; j++) { - int expert = true_exp_assign[k * i + j]; - if (expert != exp_assign[k * i + j]) { - cache_corr[i] = false; - } - if (expert_bal[expert] >= exp_samples) { - // dropped sample - chosen_exp_grads[i * k + j] = 0; - expert_bal[expert]++; - continue; - } - chosen_exp_grads[i * k + j] = - exp_grads[expert] + expert_bal[expert] * out_dim; - expert_bal[expert]++; - } - } - } - __syncthreads(); - // NOTE: These 2 functions could execute independently in parallel - // get expert gradients - aggspec_backward_kernel_exp( - output_grads, gating_net_preds, chosen_exp_grads, batch_size, k, out_dim); - // get gating net gradients - aggspec_backward_kernel_gate(output_grads, - full_gating_grads, - exp_assign, - cache_corr, - gating_net_preds, - expert_bal, - (lambda_bal * n) / batch_size, - batch_size, - k, - n, - out_dim); -} \ No newline at end of file From a5f1a0e3629e3c432de44d6bfaf468d517a79642 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 15:50:24 +0000 Subject: [PATCH 49/69] add reshape --- lib/op-attrs/src/reshape.cc | 51 ++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/lib/op-attrs/src/reshape.cc b/lib/op-attrs/src/reshape.cc index e100efeadb..c715c8dada 100644 --- a/lib/op-attrs/src/reshape.cc +++ b/lib/op-attrs/src/reshape.cc @@ -1,34 +1,45 @@ #include "op-attrs/ops/reshape.h" #include "op-attrs/ff_dim.h" +#include "utils/exception.h" namespace FlexFlow { -// pytorch: the input: [2,3,4], shape maybe [-1,6], should we add this? and the -// output is [4, 6] -bool ReshapeAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - std::size_t input_volume = 1; - for (int i = 0; i < input.num_dims(); i++) { - input_volume *= input.at(ff_dim_t(i)).size; - } - std::size_t attrs_volume = 1; - for (int i = 0; i < this->shape.dims.num_dims(); i++) { - attrs_volume *= this->shape.at(ff_dim_t(i)); - } - return (input_volume == attrs_volume); -} - +//https://pytorch.org/docs/stable/generated/torch.reshape.html // pytorch: the input: [2,3,4], shape maybe [-1,6], should we add this? and the // output is [4, 6] currently we doesn't consider the case of -1,we can support // this later the input:[2,3,4], attrs.shape:[4,6], the output is [4, 6] ParallelTensorShape get_output_shape(ReshapeAttrs const &attrs, ParallelTensorShape const &input) { + std::size_t input_volume = input.dims.get_volume(); + std::size_t attrs_volume = 1; + for (int i = 0; i < attrs.shape.dims.num_dims(); i++) { + attrs_volume *= attrs.shape.at(ff_dim_t(i)); + } + if(input_volume != attrs_volume) { + throw mk_runtime_error("ReshapeAttrs: input_volume != attrs_volume"); + } - assert(attrs.is_valid(input) && "input is not valid"); - ParallelTensorDims dims{attrs.shape.dims}; - ParallelTensorShape output{dims, input.data_type}; + ParallelTensorShape output = input; + output.data_type = input.data_type; + if(attrs.shape.dims.num_dims() == 1) { + //infer the shape + if(attrs.shape.at(ff_dim_t(0)) == -1) { + + output.at(ff_dim_t(0)).size = input_volume ; + output.at(ff_dim_t(0)).degree = 1; + output.at(ff_dim_t(0)).is_replica_dim = false; + } else { + output.at(ff_dim_t(0)).size = attrs.shape.at(ff_dim_t(0)); + output.at(ff_dim_t(1)).size = input_volume / attrs.shape.at(ff_dim_t(0)); + for(int i = 0; i < 2; i++) { + output.at(ff_dim_t(i)).degree = 1; + output.at(ff_dim_t(i)).is_replica_dim = false; + } + } + } else { + ParallelTensorDims dims{attrs.shape.dims}; + output = {dims, input.data_type}; + } return output; } From 812708f50d68d31e12961a6a0009722cab7a2275 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 17 Oct 2023 15:58:24 +0000 Subject: [PATCH 50/69] conv2d done --- lib/op-attrs/src/batch_matmul.cc | 15 ++++++++++----- lib/op-attrs/src/batch_norm.cc | 1 + lib/op-attrs/src/concat.cc | 3 +++ lib/op-attrs/src/conv_2d.cc | 13 ++++++++++--- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index c4ff074302..f1c0e12968 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -37,22 +37,27 @@ ParallelTensorShape get_output_shape(BatchMatmulAttrs const &attrs, if (lhs.at(ff_dim_t(1)).degree == 1 && lhs.at(ff_dim_t(2)).degree == 1) { // case 0: degree is 1, [b, n, m], rhs: [b, m, p] -> [b, n, p] - output_shape.at(ff_dim_t(0)).is_replica_dim = false; + for(int i =1; i < lhs.num_dims(); i++) { + output_shape.at(ff_dim_t(i)).degree = 1; + output_shape.at(ff_dim_t(i)).is_replica_dim = false; + } } else if (lhs.at(ff_dim_t(1)).degree == 1 && lhs.at(ff_dim_t(2)).degree > 1) { // case 1: [b, n, m/x], [b, m/x, p] => [b, n, y] - output_shape.at(ff_dim_t(0)).is_replica_dim = true; + output_shape.at(ff_dim_t(1)).is_replica_dim = true; output_shape.at(ff_dim_t(1)).degree = lhs.at(ff_dim_t(1)).degree; } else if (lhs.at(ff_dim_t(1)).degree > 1 && lhs.at(ff_dim_t(2)).degree == 1) { // case 2: [b, n/x, m] [b m p/x] => [b n/x p/x] - output_shape.at(ff_dim_t(0)).is_replica_dim = true; - output_shape.at(ff_dim_t(1)).degree = rhs.at(ff_dim_t(1)).degree; + output_shape.at(ff_dim_t(1)).is_replica_dim = true; + output_shape.at(ff_dim_t(2)).is_replica_dim = true; + output_shape.at(ff_dim_t(1)).degree = lhs.at(ff_dim_t(1)).degree; output_shape.at(ff_dim_t(2)).degree = rhs.at(ff_dim_t(2)).degree; } else if (lhs.at(ff_dim_t(1)).degree > 1 && lhs.at(ff_dim_t(2)).degree > 1) { // case 3: [b n/x m/y] [b m/y p/x]=> [b n/x p/x] - output_shape.at(ff_dim_t(0)).is_replica_dim = true; + output_shape.at(ff_dim_t(1)).is_replica_dim = true; + output_shape.at(ff_dim_t(2)).is_replica_dim = true; output_shape.at(ff_dim_t(1)).degree = lhs.at(ff_dim_t(1)).degree; output_shape.at(ff_dim_t(2)).degree = rhs.at(ff_dim_t(2)).degree; } else { diff --git a/lib/op-attrs/src/batch_norm.cc b/lib/op-attrs/src/batch_norm.cc index be11ac0e13..777cca1df6 100644 --- a/lib/op-attrs/src/batch_norm.cc +++ b/lib/op-attrs/src/batch_norm.cc @@ -12,6 +12,7 @@ ParallelTensorShape get_output_shape(BatchNormAttrs const &attrs, "BatchNormAttrs::get_output_shape: input is invalid"); } ParallelTensorShape output_shape = input; + //the degree of the output is the same as the input return output_shape; } diff --git a/lib/op-attrs/src/concat.cc b/lib/op-attrs/src/concat.cc index 2f65ac5623..5c9dc3e370 100644 --- a/lib/op-attrs/src/concat.cc +++ b/lib/op-attrs/src/concat.cc @@ -16,6 +16,9 @@ ParallelTensorShape for (auto &i : inputs) { output.at(attrs.axis).size += i.at(attrs.axis).size; } + output.at(attrs.axis).degree = inputs[0].at(attrs.axis).degree; + output.at(attrs.axis).is_replica_dim = inputs[0].at(attrs.axis).degree >= 1; + return output; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/conv_2d.cc b/lib/op-attrs/src/conv_2d.cc index 08e8315952..d201c64e3d 100644 --- a/lib/op-attrs/src/conv_2d.cc +++ b/lib/op-attrs/src/conv_2d.cc @@ -1,5 +1,6 @@ #include "op-attrs/ops/conv_2d.h" #include "op-attrs/ff_dim.h" +#include "op-attrs/parallel_tensor_shape.h" #include "parallel_dim_mapping_record.h" #include "parallel_dim_mapping_record_solver.h" #include "utils/exception.h" @@ -111,6 +112,7 @@ ParallelTensorShape get_output_shape(Conv2DAttrs const &attrs, (input.at(ff_dim_t(3)).size + 2 * attrs.padding_w - attrs.kernel_w) / attrs.stride_w + 1; + if (input.at(ff_dim_t(2)).size == 1 && input.at(ff_dim_t(3)).size == 1) { // case 1 input degree is 1, like 1GPU output.at(ff_dim_t(0)).is_replica_dim = false; @@ -119,16 +121,21 @@ ParallelTensorShape get_output_shape(Conv2DAttrs const &attrs, // case 2: [b, input_channel, input_h/x, input_w], [output_channel, // input_channel, kernel_h, kernel_w] => [b, output_channel, output_h/x, // output_w] - output.at(ff_dim_t(0)).is_replica_dim = true; + output.at(ff_dim_t(2)).is_replica_dim = true; output.at(ff_dim_t(2)).degree = input.at(ff_dim_t(2)).degree; - output.at(ff_dim_t(3)).degree = input.at(ff_dim_t(3)).degree; } else if (input.at(ff_dim_t(2)).size == 1 && input.at(ff_dim_t(3)).size > 1) { // case 3: [b, input_channel, input_h, input_w / x] [output_channel, // input_channel, kernel_h, kernel_w / x] => [b, output_channel, output_h, // output_w / x] - output.at(ff_dim_t(0)).is_replica_dim = true; + output.at(ff_dim_t(3)).is_replica_dim = true; output.at(ff_dim_t(3)).degree = input.at(ff_dim_t(3)).degree; + } else if(input.at(ff_dim_t(2)).size >1 && + input.at(ff_dim_t(3)).size > 1) { + for(int i =2; i < input.num_dims();i++) { + output.at(ff_dim_t(i)).is_replica_dim = true; + output.at(ff_dim_t(i)).degree = input.at(ff_dim_t(i)).degree; + } } else { throw mk_runtime_error("Conv2DAttrs::get_output_shape: not supported in " "Conv2DAttrs get_output_shape"); From 2bdaf0991e09196861068ebe9669727d1278769b Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Wed, 18 Oct 2023 15:23:24 +0000 Subject: [PATCH 51/69] add more shape --- lib/op-attrs/include/op-attrs/ops/reverse.h | 1 - lib/op-attrs/include/op-attrs/ops/softmax.h | 1 - lib/op-attrs/include/op-attrs/ops/split.h | 1 - lib/op-attrs/include/op-attrs/ops/topk.h | 2 - lib/op-attrs/src/batch_matmul.cc | 2 +- lib/op-attrs/src/batch_norm.cc | 2 +- lib/op-attrs/src/conv_2d.cc | 13 +++--- lib/op-attrs/src/embedding.cc | 2 +- lib/op-attrs/src/flat.cc | 2 + lib/op-attrs/src/linear.cc | 1 - lib/op-attrs/src/pool_2d.cc | 12 ++++-- lib/op-attrs/src/repartition.cc | 8 ++-- lib/op-attrs/src/reshape.cc | 44 +++++++++++---------- lib/op-attrs/src/reverse.cc | 15 +++---- lib/op-attrs/src/softmax.cc | 15 ++----- lib/op-attrs/src/split.cc | 29 ++++++-------- lib/op-attrs/src/topk.cc | 21 +++++----- lib/op-attrs/src/transpose.cc | 37 ++++++++--------- 18 files changed, 95 insertions(+), 113 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/reverse.h b/lib/op-attrs/include/op-attrs/ops/reverse.h index 0c8657c6ec..ce1295f437 100644 --- a/lib/op-attrs/include/op-attrs/ops/reverse.h +++ b/lib/op-attrs/include/op-attrs/ops/reverse.h @@ -10,7 +10,6 @@ namespace FlexFlow { struct ReverseAttrs { ff_dim_t axis; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(ReverseAttrs, axis); CHECK_VALID_OP_ATTR(ReverseAttrs); diff --git a/lib/op-attrs/include/op-attrs/ops/softmax.h b/lib/op-attrs/include/op-attrs/ops/softmax.h index 8e7a00e661..8f31bccdef 100644 --- a/lib/op-attrs/include/op-attrs/ops/softmax.h +++ b/lib/op-attrs/include/op-attrs/ops/softmax.h @@ -10,7 +10,6 @@ namespace FlexFlow { struct SoftmaxAttrs { ff_dim_t dim; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(SoftmaxAttrs, dim); CHECK_VALID_OP_ATTR(SoftmaxAttrs); diff --git a/lib/op-attrs/include/op-attrs/ops/split.h b/lib/op-attrs/include/op-attrs/ops/split.h index e2abeb2581..14f9395a26 100644 --- a/lib/op-attrs/include/op-attrs/ops/split.h +++ b/lib/op-attrs/include/op-attrs/ops/split.h @@ -10,7 +10,6 @@ namespace FlexFlow { struct SplitAttrs { req> splits; ff_dim_t axis; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(SplitAttrs, splits, axis); CHECK_VALID_OP_ATTR(SplitAttrs); diff --git a/lib/op-attrs/include/op-attrs/ops/topk.h b/lib/op-attrs/include/op-attrs/ops/topk.h index 914ac1afc2..3a3b49ab3b 100644 --- a/lib/op-attrs/include/op-attrs/ops/topk.h +++ b/lib/op-attrs/include/op-attrs/ops/topk.h @@ -7,13 +7,11 @@ namespace FlexFlow { -// I think we should add axis // pytorch code: torch.topk(input_tensor, k, largest=True, sorted=True, dim=dim) struct TopKAttrs { req k; req sorted; req axis; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(TopKAttrs, k, sorted, axis); CHECK_VALID_OP_ATTR(TopKAttrs); diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index f1c0e12968..deabdcea5b 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -37,7 +37,7 @@ ParallelTensorShape get_output_shape(BatchMatmulAttrs const &attrs, if (lhs.at(ff_dim_t(1)).degree == 1 && lhs.at(ff_dim_t(2)).degree == 1) { // case 0: degree is 1, [b, n, m], rhs: [b, m, p] -> [b, n, p] - for(int i =1; i < lhs.num_dims(); i++) { + for (int i = 1; i < lhs.num_dims(); i++) { output_shape.at(ff_dim_t(i)).degree = 1; output_shape.at(ff_dim_t(i)).is_replica_dim = false; } diff --git a/lib/op-attrs/src/batch_norm.cc b/lib/op-attrs/src/batch_norm.cc index 777cca1df6..2f050814c9 100644 --- a/lib/op-attrs/src/batch_norm.cc +++ b/lib/op-attrs/src/batch_norm.cc @@ -12,7 +12,7 @@ ParallelTensorShape get_output_shape(BatchNormAttrs const &attrs, "BatchNormAttrs::get_output_shape: input is invalid"); } ParallelTensorShape output_shape = input; - //the degree of the output is the same as the input + // the degree of the output is the same as the input return output_shape; } diff --git a/lib/op-attrs/src/conv_2d.cc b/lib/op-attrs/src/conv_2d.cc index d201c64e3d..15566f9005 100644 --- a/lib/op-attrs/src/conv_2d.cc +++ b/lib/op-attrs/src/conv_2d.cc @@ -112,7 +112,7 @@ ParallelTensorShape get_output_shape(Conv2DAttrs const &attrs, (input.at(ff_dim_t(3)).size + 2 * attrs.padding_w - attrs.kernel_w) / attrs.stride_w + 1; - + if (input.at(ff_dim_t(2)).size == 1 && input.at(ff_dim_t(3)).size == 1) { // case 1 input degree is 1, like 1GPU output.at(ff_dim_t(0)).is_replica_dim = false; @@ -130,12 +130,11 @@ ParallelTensorShape get_output_shape(Conv2DAttrs const &attrs, // output_w / x] output.at(ff_dim_t(3)).is_replica_dim = true; output.at(ff_dim_t(3)).degree = input.at(ff_dim_t(3)).degree; - } else if(input.at(ff_dim_t(2)).size >1 && - input.at(ff_dim_t(3)).size > 1) { - for(int i =2; i < input.num_dims();i++) { - output.at(ff_dim_t(i)).is_replica_dim = true; - output.at(ff_dim_t(i)).degree = input.at(ff_dim_t(i)).degree; - } + } else if (input.at(ff_dim_t(2)).size > 1 && input.at(ff_dim_t(3)).size > 1) { + for (int i = 2; i < input.num_dims(); i++) { + output.at(ff_dim_t(i)).is_replica_dim = true; + output.at(ff_dim_t(i)).degree = input.at(ff_dim_t(i)).degree; + } } else { throw mk_runtime_error("Conv2DAttrs::get_output_shape: not supported in " "Conv2DAttrs get_output_shape"); diff --git a/lib/op-attrs/src/embedding.cc b/lib/op-attrs/src/embedding.cc index fa8f313457..5e86335f14 100644 --- a/lib/op-attrs/src/embedding.cc +++ b/lib/op-attrs/src/embedding.cc @@ -13,6 +13,6 @@ ParallelTensorShape get_output_shape(EmbeddingAttrs const &atts, output.at(ff_dim_t(2)).size = atts.out_channels; // output degree is same as input degree return output; -} // namespace FlexFlow +} } // namespace FlexFlow diff --git a/lib/op-attrs/src/flat.cc b/lib/op-attrs/src/flat.cc index ae351328b7..51c29ec5b7 100644 --- a/lib/op-attrs/src/flat.cc +++ b/lib/op-attrs/src/flat.cc @@ -27,6 +27,8 @@ ParallelTensorShape get_output_shape(FlatAttrs const &attrs, input.at(ff_dim_t(Input::WIDTH)).size; output_shape.at(ff_dim_t(Output::CHANNEL)).degree = input.at(ff_dim_t(Input::CHANNEL)).degree; + output_shape.at(ff_dim_t(Output::CHANNEL)).is_replica_dim = + (input.at(ff_dim_t(Input::CHANNEL)).degree > 1); return output_shape; } diff --git a/lib/op-attrs/src/linear.cc b/lib/op-attrs/src/linear.cc index 3bb8e0f3ae..ec0f5dd235 100644 --- a/lib/op-attrs/src/linear.cc +++ b/lib/op-attrs/src/linear.cc @@ -23,7 +23,6 @@ ParallelTensorShape get_output_shape(LinearAttrs const &atts, // linear shoud consider the degree // case 1: input:[N, K], weight:[K, M], degree is 1 if (input.at(ff_dim_t(0)).degree == 1 && input.at(ff_dim_t(1)).degree == 1) { - out_shape.at(ff_dim_t(0)).degree = 1; for (int i = 0; i < input.num_dims(); i++) { out_shape.at(ff_dim_t(i)).is_replica_dim = false; out_shape.at(ff_dim_t(i)).degree = 1; diff --git a/lib/op-attrs/src/pool_2d.cc b/lib/op-attrs/src/pool_2d.cc index 65754de6e3..b3859c6e06 100644 --- a/lib/op-attrs/src/pool_2d.cc +++ b/lib/op-attrs/src/pool_2d.cc @@ -81,7 +81,8 @@ ParallelTensorShape get_output_shape(Pool2DAttrs const &attrs, } // case 1: input:[N, C, H, W], output:[N, C, 1, 1], degree is 1 for avgpool2d - // input: [N, C, H, W], output: [N, C, output_height, output_width], degree is 1 for maxpool2d + // input: [N, C, H, W], output: [N, C, output_height, output_width], degree is + // 1 for maxpool2d if (input.at(ff_dim_t(2)).degree == 1 && input.at(ff_dim_t(3)).degree == 1) { for (int i = 2; i < input.num_dims(); i++) { output_shape.at(ff_dim_t(i)).is_replica_dim = false; @@ -90,7 +91,8 @@ ParallelTensorShape get_output_shape(Pool2DAttrs const &attrs, } else if (input.at(ff_dim_t(2)).degree > 1 && input.at(ff_dim_t(3)).degree == 1) { // case 2: input [N, C, H/X, W] output [N, C, 1, 1], degree is X - // input [N, C, H/X, W] output [N, C, output_height/x, output_width], degree is X + // input [N, C, H/X, W] output [N, C, output_height/x, output_width], degree + // is X output_shape.at(ff_dim_t(2)).degree = input.at(ff_dim_t(2)).degree; output_shape.at(ff_dim_t(2)).is_replica_dim = true; output_shape.at(ff_dim_t(3)).degree = 1; @@ -98,7 +100,8 @@ ParallelTensorShape get_output_shape(Pool2DAttrs const &attrs, } else if (input.at(ff_dim_t(2)).degree == 1 && input.at(ff_dim_t(3)).degree > 1) { // case 3: input [N, C, H, W/X] output [N, C, 1, 1], degree is X - // input [N, C, H, W/X] output [N, C, output_height, output_width/x], degree is X + // input [N, C, H, W/X] output [N, C, output_height, output_width/x], degree + // is X output_shape.at(ff_dim_t(2)).degree = 1; output_shape.at(ff_dim_t(2)).is_replica_dim = false; output_shape.at(ff_dim_t(3)).degree = input.at(ff_dim_t(3)).degree; @@ -106,7 +109,8 @@ ParallelTensorShape get_output_shape(Pool2DAttrs const &attrs, } else if (input.at(ff_dim_t(2)).degree > 1 && input.at(ff_dim_t(3)).degree > 1) { // case 4: input [N, C, H/X, W/Y] output [N, C, 1, 1], degree is X and Y for - // avgpool2d input [N, C, H/X, W/Y] output [N, C, output_height/x, output_width/y], degree is X and Y for maxpool2d + // avgpool2d input [N, C, H/X, W/Y] output [N, C, output_height/x, + // output_width/y], degree is X and Y for maxpool2d for (int i = 2; i < input.num_dims(); i++) { output_shape.at(ff_dim_t(i)).is_replica_dim = true; output_shape.at(ff_dim_t(i)).degree = input.at(ff_dim_t(i)).degree; diff --git a/lib/op-attrs/src/repartition.cc b/lib/op-attrs/src/repartition.cc index 292d90d2e2..b95a400ad5 100644 --- a/lib/op-attrs/src/repartition.cc +++ b/lib/op-attrs/src/repartition.cc @@ -4,12 +4,14 @@ namespace FlexFlow { -// this may be wrong partition by n multiplies degree by n and keeps shape the same +// this may be wrong partition by n multiplies degree by n and keeps shape the +// same ParallelTensorShape get_output_shape(RepartitionAttrs const &attrs, ParallelTensorShape const &input) { ParallelDim dim = input.at(attrs.repartition_dim); - if(dim.size % attrs.repartition_degree * dim.degree != 0) { - throw mk_runtime_error("RepartitionAttrs: input.at(attrs.repartition_dim) % attrs.repartition_degree * dim.degree != 0"); + if (dim.size % attrs.repartition_degree * dim.degree != 0) { + throw mk_runtime_error("RepartitionAttrs: input.at(attrs.repartition_dim) " + "% attrs.repartition_degree * dim.degree != 0"); } ParallelTensorShape output(input.dims, input.data_type); output.at(attrs.repartition_dim).degree *= attrs.repartition_degree; diff --git a/lib/op-attrs/src/reshape.cc b/lib/op-attrs/src/reshape.cc index c715c8dada..b7e887002a 100644 --- a/lib/op-attrs/src/reshape.cc +++ b/lib/op-attrs/src/reshape.cc @@ -4,10 +4,11 @@ namespace FlexFlow { -//https://pytorch.org/docs/stable/generated/torch.reshape.html -// pytorch: the input: [2,3,4], shape maybe [-1,6], should we add this? and the -// output is [4, 6] currently we doesn't consider the case of -1,we can support -// this later the input:[2,3,4], attrs.shape:[4,6], the output is [4, 6] +// https://pytorch.org/docs/stable/generated/torch.reshape.html +// pytorch: the input: [2,3,4], shape maybe [-1,6], should we add this? and +// the output is [4, 6] currently we doesn't consider the case of -1,we can +// support this later the input:[2,3,4], attrs.shape:[4,6], the output is [4, +// 6] ParallelTensorShape get_output_shape(ReshapeAttrs const &attrs, ParallelTensorShape const &input) { std::size_t input_volume = input.dims.get_volume(); @@ -15,30 +16,31 @@ ParallelTensorShape get_output_shape(ReshapeAttrs const &attrs, for (int i = 0; i < attrs.shape.dims.num_dims(); i++) { attrs_volume *= attrs.shape.at(ff_dim_t(i)); } - if(input_volume != attrs_volume) { + if (input_volume != attrs_volume) { throw mk_runtime_error("ReshapeAttrs: input_volume != attrs_volume"); } ParallelTensorShape output = input; output.data_type = input.data_type; - if(attrs.shape.dims.num_dims() == 1) { - //infer the shape - if(attrs.shape.at(ff_dim_t(0)) == -1) { - - output.at(ff_dim_t(0)).size = input_volume ; - output.at(ff_dim_t(0)).degree = 1; - output.at(ff_dim_t(0)).is_replica_dim = false; - } else { - output.at(ff_dim_t(0)).size = attrs.shape.at(ff_dim_t(0)); - output.at(ff_dim_t(1)).size = input_volume / attrs.shape.at(ff_dim_t(0)); - for(int i = 0; i < 2; i++) { - output.at(ff_dim_t(i)).degree = 1; - output.at(ff_dim_t(i)).is_replica_dim = false; - } + if (attrs.shape.dims.num_dims() == 1) { + // infer the shape + if (attrs.shape.at(ff_dim_t(0)) == -1) { + + output.at(ff_dim_t(0)).size = input_volume; + output.at(ff_dim_t(0)).degree = 1; + output.at(ff_dim_t(0)).is_replica_dim = false; + } else { + output.at(ff_dim_t(0)).size = attrs.shape.at(ff_dim_t(0)); + output.at(ff_dim_t(1)).size = input_volume / attrs.shape.at(ff_dim_t(0)); + for (int i = 0; i < 2; i++) { + output.at(ff_dim_t(i)).degree = 1; + output.at(ff_dim_t(i)).is_replica_dim = false; } + } } else { - ParallelTensorDims dims{attrs.shape.dims}; - output = {dims, input.data_type}; + ParallelTensorDims dims{attrs.shape.dims}; + output = {dims, input.data_type}; + // Note: I think reshape doesn't need to consider the degree } return output; } diff --git a/lib/op-attrs/src/reverse.cc b/lib/op-attrs/src/reverse.cc index a09d43ae61..644e733a0a 100644 --- a/lib/op-attrs/src/reverse.cc +++ b/lib/op-attrs/src/reverse.cc @@ -1,21 +1,16 @@ #include "op-attrs/ops/reverse.h" #include "op-attrs/ff_dim.h" +#include "utils/exception.h" namespace FlexFlow { -bool ReverseAttrs::is_valid(ParallelTensorShape const &input) const { - if (input.is_valid() == false) { - return false; - } - if (this->axis < 0 || this->axis >= input.num_dims()) { - return false; - } - return true; -} - ParallelTensorShape get_output_shape(ReverseAttrs const &attrs, ParallelTensorShape const &input) { + if (attrs.axis < 0 || attrs.axis >= input.num_dims()) { + throw mk_runtime_error("ReverseAttrs: axis is invalid"); + } ParallelTensorShape output = input; + // output degree is same as input degree, because it's just reverse operation return output; } diff --git a/lib/op-attrs/src/softmax.cc b/lib/op-attrs/src/softmax.cc index 91d6555681..1d9cd5fcc0 100644 --- a/lib/op-attrs/src/softmax.cc +++ b/lib/op-attrs/src/softmax.cc @@ -1,20 +1,13 @@ #include "op-attrs/ops/softmax.h" +#include "utils/exception.h" namespace FlexFlow { -bool SoftmaxAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - if (input.num_dims() < 2) { - return false; - } - return true; -} - ParallelTensorShape get_output_shape(SoftmaxAttrs const &attrs, ParallelTensorShape const &input) { - assert(attrs.is_valid(input)); + if (input.num_dims() < 2) { + throw mk_runtime_error("SoftmaxAttrs: input.num_dims() < 2"); + } ParallelTensorShape output = input; return output; } diff --git a/lib/op-attrs/src/split.cc b/lib/op-attrs/src/split.cc index 1c14f1c370..5c6f3d6924 100644 --- a/lib/op-attrs/src/split.cc +++ b/lib/op-attrs/src/split.cc @@ -1,33 +1,28 @@ #include "op-attrs/ops/split.h" #include "op-attrs/ff_dim.h" +#include "utils/containers.h" +#include "utils/exception.h" namespace FlexFlow { -bool SplitAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - std::size_t dims_sum = 0; - - for (std::size_t i = 0; i < this->splits.size(); ++i) { - dims_sum += splits[i]; - } - - if (dims_sum != input.at(ff_dim_t(axis)).size) { - return false; - } - return true; -} - std::vector get_output_shapes(SplitAttrs const &attrs, ParallelTensorShape const &input) { - assert(attrs.is_valid(input)); + std::size_t dims_sum = sum(attrs.splits); + if (dims_sum != input.at(ff_dim_t(attrs.axis)).size) { + throw mk_runtime_error( + "SplitAttrs: dims_sum != input.at(ff_dim_t(attrs.axis)).size"); + } + std::vector outputs; for (std::size_t i = 0; i < attrs.splits.size(); ++i) { outputs.emplace_back(input); outputs.back().at(ff_dim_t(attrs.axis)).size = attrs.splits[i]; + outputs.back().at(ff_dim_t(attrs.axis)).degree = + input.at(ff_dim_t(attrs.axis)).degree; + outputs.back().at(ff_dim_t(attrs.axis)).is_replica_dim = + input.at(ff_dim_t(attrs.axis)).degree > 1; } return outputs; } diff --git a/lib/op-attrs/src/topk.cc b/lib/op-attrs/src/topk.cc index 06c43b3eba..73bf59b048 100644 --- a/lib/op-attrs/src/topk.cc +++ b/lib/op-attrs/src/topk.cc @@ -1,23 +1,22 @@ #include "op-attrs/ops/topk.h" +#include "utils/exception.h" namespace FlexFlow { -bool TopKAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } +ParallelTensorShape get_output_shape(TopKAttrs const &attrs, + ParallelTensorShape const &input) { - if (k > input.at(ff_dim_t(axis)).size) { - return false; + if (attrs.k > input.at(ff_dim_t(attrs.axis)).size) { + throw mk_runtime_error( + "TopKAttrs: k > input.at(ff_dim_t(attrs.axis)).size"); } - return true; -} -ParallelTensorShape get_output_shape(TopKAttrs const &attrs, - ParallelTensorShape const &input) { - assert(attrs.is_valid(input)); ParallelTensorShape output = input; output.at(ff_dim_t(attrs.axis)).size = attrs.k; + output.at(ff_dim_t(attrs.axis)).degree = + input.at(ff_dim_t(attrs.axis)).degree; + output.at(ff_dim_t(attrs.axis)).is_replica_dim = + input.at(ff_dim_t(attrs.axis)).degree > 1; return output; } diff --git a/lib/op-attrs/src/transpose.cc b/lib/op-attrs/src/transpose.cc index 97140c6b49..88772b72e0 100644 --- a/lib/op-attrs/src/transpose.cc +++ b/lib/op-attrs/src/transpose.cc @@ -1,38 +1,35 @@ #include "op-attrs/ops/transpose.h" #include "op-attrs/ff_dim.h" -#include "utils/exception.decl.h" +#include "utils/exception.h" namespace FlexFlow { -bool TransposeAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - // in pytorch, we use choose two dim for transpose, so I think the size of - // perm should be 2 - if (perm.size() != 2) { - return false; +// assume we have [x, y, z, l], perms is [0,2] we return [z, y, x, l] +ParallelTensorShape get_output_shape(TransposeAttrs const &attrs, + ParallelTensorShape const &input) { + if (attrs.perm.size() != 2) { + throw mk_runtime_error("TransposeAttrs: perm.size() != 2"); } - auto dim0 = perm[0]; - auto dim1 = perm[1]; + auto dim0 = attrs.perm[0]; + auto dim1 = attrs.perm[1]; if (dim0 < 0 || dim1 < 0 || dim0 >= input.num_dims() || dim1 >= input.num_dims()) { - return false; + throw mk_runtime_error("TransposeAttrs: dim0 < 0 || dim1 < 0 || dim0 >= " + "input.num_dims() || dim1 >= input.num_dims()"); } - return true; -} - -// assume we have [x, y, z, l], perms is [0,2] we return [z, y, x, l] -ParallelTensorShape get_output_shape(TransposeAttrs const &attrs, - ParallelTensorShape const &input) { ParallelTensorShape output = input; - auto dim0 = attrs.perm[0]; - auto dim1 = attrs.perm[1]; int temp = input.at(ff_dim_t(dim0)).size; + int degree = input.at(ff_dim_t(dim0)).degree; output.at(ff_dim_t(dim0)).size = input.at(ff_dim_t(dim1)).size; output.at(ff_dim_t(dim1)).size = temp; + output.at(ff_dim_t(dim0)).degree = input.at(ff_dim_t(dim1)).degree; + output.at(ff_dim_t(dim1)).degree = degree; + output.at(ff_dim_t(dim0)).is_replica_dim = + output.at(ff_dim_t(dim0)).degree > 1; + output.at(ff_dim_t(dim1)).is_replica_dim = + output.at(ff_dim_t(dim1)).degree > 1; return output; } From 50ec41f0b204dd5fc4b2afc91c6ec5bce8edb88f Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Wed, 18 Oct 2023 15:46:12 +0000 Subject: [PATCH 52/69] try to implement the attention --- lib/op-attrs/src/attention.cc | 78 ++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 24 deletions(-) diff --git a/lib/op-attrs/src/attention.cc b/lib/op-attrs/src/attention.cc index 2d189d7472..bee1606d59 100644 --- a/lib/op-attrs/src/attention.cc +++ b/lib/op-attrs/src/attention.cc @@ -1,8 +1,6 @@ #include "op-attrs/ops/attention.h" -#include "kernels/legion_dim.h" #include "op-attrs/parallel_tensor_shape.h" -#include "utils/exception.decl.h" -#include "utils/exceptions.h" +#include "utils/exception.h" namespace FlexFlow { @@ -88,37 +86,69 @@ TensorShape // according to the pytorch // https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html, -// query: [target_size_seq_len, batch_size, embed_dim], we consider the batch -// size key: (seq_len, batch_size, embed_dim) value: (seq_len, batch_size, -// embed_dim) +// we consider the batch size +// query: [seq_len, batch_size, embed_dim], +// key: (seq_len, batch_size, embed_dim) +// value: (seq_len, batch_size,embed_dim) // multihead_attn = nn.MultiheadAttention(embed_dim, num_heads) -// output: (target_size_seq_len, batch_size, embed_dim) +// output: (seq_len, batch_size, embed_dim) ParallelTensorShape get_output_shape( MultiHeadAttentionAttrs const &attrs, - MultiHeadAttentionInputs const &inputs) { - ParallelTensorShape output_shape = inputs.query; - NOT_IMPLEMENTED(); -} - -bool is_valid(MultiHeadAttentionAttrs const &attrs, - MultiHeadAttentionInputs const &input) { - bool valid = true; + MultiHeadAttentionInputs const &input) { if (input.query.num_dims() != 3 || input.key.num_dims() != 3 || input.value.num_dims() != 3) { - return false; + throw mk_runtime_error("MultiHeadAttentionAttrs: num_dims != 3"); + } + + if (input.query.at(ff_dim_t(0)).size != input.key.at(ff_dim_t(0)).size || + input.query.at(ff_dim_t(0)).size != input.value.at(ff_dim_t(0)).size || + input.key.at(ff_dim_t(0)).size != input.value.at(ff_dim_t(0)).size) { + throw mk_runtime_error("MultiHeadAttentionAttrs: seq_len not match"); + } + + if (input.query.at(ff_dim_t(1)).size != input.key.at(ff_dim_t(1)).size || + input.query.at(ff_dim_t(1)).size != input.value.at(ff_dim_t(1)).size || + input.key.at(ff_dim_t(1)).size != input.value.at(ff_dim_t(1)).size) { + throw mk_runtime_error("MultiHeadAttentionAttrs: batch_size not match"); } - // ff_dim_t = num_dims - legion_dim_t - 1 - if (input.query.at(legion_dim_t(0)).size != attrs.embed_dim) { - return false; + + if (input.query.at(ff_dim_t(2)).size != input.key.at(ff_dim_t(2)).size || + input.query.at(ff_dim_t(2)).size != input.value.at(ff_dim_t(2)).size || + input.key.at(ff_dim_t(2)).size != input.value.at(ff_dim_t(2)).size) { + throw mk_runtime_error("MultiHeadAttentionAttrs: embed_dim not match"); + } + + if (input.query.at(ff_dim_t(2)).size != attrs.embed_dim || + input.key.at(ff_dim_t(2)).size != attrs.embed_dim || + input.value.at(ff_dim_t(2)).size != attrs.embed_dim) { + throw mk_runtime_error( + "MultiHeadAttentionAttrs: input's embed_dim not match to attrs"); } - if (input.key.at(legion_dim_t(0)).size != attrs.embed_dim) { - return false; + + if (attrs.embed_dim != (attrs.num_heads * attrs.kdim)) { + throw mk_runtime_error( + "MultiHeadAttentionAttrs: embed_dim not match to num_heads * kdim"); } - if (input.value.at(legion_dim_t(0)).size != attrs.embed_dim) { - return false; + + // TODO: how to deal with the degree + // q = wq*x , k = wk*x, v = wv*x (seq_len, batch_size, embed_dim) + // k->(seq_len, num_head, batch_size, kdim) + // v->(seq_len, num_head, batch_size, vdim) + // q->(seq_len, num_head, batch_size, kdim) + // attn = q @k (seq_len, num_head, batch_size, batch_size) + // attn = attn @v (seq_len, num_head, batch_size, vdim) + // attn = attn.transpose(1,2) (seq_len, batch_size, num_head, vdim) + // attn = attn.reshape(seq_len, batch_size, num_head*vdim) + + // Note: we support tensor parallelism for seq_len/batch_size/embed_dim + ParallelTensorShape output = input.query; + for (int i = 0; i < output.num_dims(); i++) { + output.at(ff_dim_t(i)).degree = input.query.at(ff_dim_t(i)).degree; + output.at(ff_dim_t(i)).is_replica_dim = + input.query.at(ff_dim_t(i)).degree > 1; } - return true; + return output; } } // namespace FlexFlow From 2e2533adcf282b62daba19515299c7356048ef9b Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Wed, 18 Oct 2023 15:57:37 +0000 Subject: [PATCH 53/69] leave reduce --- lib/op-attrs/src/reduce.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/op-attrs/src/reduce.cc b/lib/op-attrs/src/reduce.cc index 3deb33e680..727e41e0ec 100644 --- a/lib/op-attrs/src/reduce.cc +++ b/lib/op-attrs/src/reduce.cc @@ -3,12 +3,11 @@ namespace FlexFlow { -bool ReduceAttrs::is_valid(ParallelTensorShape const &input) const { - NOT_IMPLEMENTED()} - ParallelTensorShape get_output_shape(ReduceAttrs const &attrs, ParallelTensorShape const &input) { NOT_IMPLEMENTED() + // reduce is sum/max/min/mean + // NOTE: how to implement this } } // namespace FlexFlow From aff7b006300b658db3b90b4cd9010baf55992a0e Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Wed, 18 Oct 2023 21:09:38 +0000 Subject: [PATCH 54/69] fix some error --- lib/op-attrs/include/op-attrs/ops/attention.h | 3 -- .../include/op-attrs/ops/element_binary.h | 1 - lib/op-attrs/include/op-attrs/ops/replicate.h | 1 - lib/op-attrs/src/attention.cc | 29 ------------------- lib/op-attrs/src/batch_matmul.cc | 1 - lib/op-attrs/src/batch_norm.cc | 7 ++--- lib/op-attrs/src/cast.cc | 8 ++--- lib/op-attrs/src/combine.cc | 14 +++++---- lib/op-attrs/src/concat.cc | 1 - lib/op-attrs/src/dropout.cc | 6 ++-- lib/op-attrs/src/element_binary.cc | 7 +++-- lib/op-attrs/src/element_unary.cc | 6 ++-- lib/op-attrs/src/embedding.cc | 1 - lib/op-attrs/src/groupby.cc | 7 ++--- lib/op-attrs/src/layer_norm.cc | 6 ++-- lib/op-attrs/src/reduce.cc | 2 +- lib/op-attrs/src/replicate.cc | 18 ++++-------- lib/op-attrs/src/reverse.cc | 8 ++--- lib/op-attrs/src/softmax.cc | 10 +++---- 19 files changed, 47 insertions(+), 89 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/attention.h b/lib/op-attrs/include/op-attrs/ops/attention.h index 7d332ddc0e..a80c579689 100644 --- a/lib/op-attrs/include/op-attrs/ops/attention.h +++ b/lib/op-attrs/include/op-attrs/ops/attention.h @@ -44,9 +44,6 @@ struct MultiHeadAttentionInputs TensorType value; }; -bool is_valid(MultiHeadAttentionAttrs const &, - MultiHeadAttentionInputs const &input); - int get_qProjSize(MultiHeadAttentionAttrs const &); int get_vProjSize(MultiHeadAttentionAttrs const &); int get_kProjSize(MultiHeadAttentionAttrs const &); diff --git a/lib/op-attrs/include/op-attrs/ops/element_binary.h b/lib/op-attrs/include/op-attrs/ops/element_binary.h index 9a2e4dc22a..377a03970a 100644 --- a/lib/op-attrs/include/op-attrs/ops/element_binary.h +++ b/lib/op-attrs/include/op-attrs/ops/element_binary.h @@ -4,7 +4,6 @@ #include "core.h" #include "op-attrs/op.h" #include "op-attrs/parallel_tensor_shape.h" -#include "op-attrs/©" #include "utils/visitable.h" namespace FlexFlow { diff --git a/lib/op-attrs/include/op-attrs/ops/replicate.h b/lib/op-attrs/include/op-attrs/ops/replicate.h index c2a9b6abf0..2bbcad9d95 100644 --- a/lib/op-attrs/include/op-attrs/ops/replicate.h +++ b/lib/op-attrs/include/op-attrs/ops/replicate.h @@ -11,7 +11,6 @@ namespace FlexFlow { struct ReplicateAttrs { ff_dim_t replicate_dim; req replicate_degree; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(ReplicateAttrs, replicate_dim, replicate_degree); CHECK_VALID_OP_ATTR(ReplicateAttrs); diff --git a/lib/op-attrs/src/attention.cc b/lib/op-attrs/src/attention.cc index bee1606d59..61a5c79b0a 100644 --- a/lib/op-attrs/src/attention.cc +++ b/lib/op-attrs/src/attention.cc @@ -54,35 +54,6 @@ TensorShape return {dims, DataType::FLOAT}; } -// these two functions are not defined in the attention.h -// ParallelTensorShape get_output_shape(MultiHeadAttentionAttrs const &attrs, -// ParallelTensorShape const &query_shape, -// ParallelTensorShape const &key_shape, -// ParallelTensorShape const &value_shape) -// { -// /* ParallelDim replica_dim = -// query_shape.at(ff_dim_t(query_shape.num_dims() - -// * 2)); */ -// /* replica_dim.size = replica_dim.degree; */ - -// /* ParallelDim */ - -// ParallelTensorShape output_shape = query_shape; -// output_shape.at(ff_dim_t(output_shape.num_dims() - 1)).size = -// attrs.embed_dim; return output_shape; -// } - -// TensorShape get_output_shape(MultiHeadAttentionAttrs const &attrs, -// TensorShape const &query_shape, -// TensorShape const &key_shape, -// TensorShape const &value_shape) { -// ParallelTensorShape parallel_shape = -// get_output_shape(attrs, -// static_cast(query_shape), -// static_cast(key_shape), -// static_cast(value_shape)); -// return get_tensor_shape_unsafe(parallel_shape); -// } // according to the pytorch // https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html, diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index deabdcea5b..e170e29053 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -4,7 +4,6 @@ #include "utils/exception.h" namespace FlexFlow { - // how to get the batch size? and lhs: [b, n, m], rhs: [b, m, p] // output: [b, n, p] //n == s1, m == s2 //[b, n/2, m], [b, m, p/2] -> [b, n/2, p/2] diff --git a/lib/op-attrs/src/batch_norm.cc b/lib/op-attrs/src/batch_norm.cc index 2f050814c9..2557c9def5 100644 --- a/lib/op-attrs/src/batch_norm.cc +++ b/lib/op-attrs/src/batch_norm.cc @@ -2,18 +2,17 @@ #include "utils/exception.h" namespace FlexFlow { - // input: [b, c, h, w] // output: [b, c, h, w] ParallelTensorShape get_output_shape(BatchNormAttrs const &attrs, - ParallelTensorShape const &input) { + ParallelTensorShape const &input_shape) { if (!input.is_valid() || input.num_dims() != 4) { throw mk_runtime_error( "BatchNormAttrs::get_output_shape: input is invalid"); } - ParallelTensorShape output_shape = input; + // the degree of the output is the same as the input - return output_shape; + return input_shape; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/cast.cc b/lib/op-attrs/src/cast.cc index 7c679439ad..868756abcd 100644 --- a/lib/op-attrs/src/cast.cc +++ b/lib/op-attrs/src/cast.cc @@ -4,13 +4,13 @@ namespace FlexFlow { ParallelTensorShape get_output_shape(CastAttrs const &attrs, - ParallelTensorShape const &input) { + ParallelTensorShape const &input_shape) { if (!input.is_valid()) { throw mk_runtime_error("CastAttrs::get_output_shape: input is invalid"); } - ParallelTensorShape output = input; - output.data_type = attrs.dtype; - return output; + ParallelTensorShape output_shape = input_shape; + output_shape.data_type = attrs.dtype; + return output_shape; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/combine.cc b/lib/op-attrs/src/combine.cc index 48fc6c8720..d851c186c8 100644 --- a/lib/op-attrs/src/combine.cc +++ b/lib/op-attrs/src/combine.cc @@ -2,12 +2,14 @@ #include "utils/hash-utils.h" namespace FlexFlow { - -ParallelTensorShape get_output_shape(CombineAttrs const &attrs, - ParallelTensorShape const &input) { - ParallelTensorShape output = input; - output.at(attrs.combine_dim).degree /= attrs.combine_degree; - return output; +ParallelTensorShape + get_output_shape_shape(CombineAttrs const &attrs, + ParallelTensorShape const &input_shape) { + ParallelTensorShape output_shape = input_shape; + output_shape.at(attrs.combine_dim).degree /= attrs.combine_degree; + output_shape.at(attrs.combine_dim).is_replica_dim = + output_shape.at(attrs.combine_dim).degree > 1; + return output_shape; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/concat.cc b/lib/op-attrs/src/concat.cc index 5c9dc3e370..47807b9c1a 100644 --- a/lib/op-attrs/src/concat.cc +++ b/lib/op-attrs/src/concat.cc @@ -2,7 +2,6 @@ #include "utils/exception.h" namespace FlexFlow { - ParallelTensorShape get_output_shape(ConcatAttrs const &attrs, std::vector const &inputs) { diff --git a/lib/op-attrs/src/dropout.cc b/lib/op-attrs/src/dropout.cc index ab763b8a7f..5d5f78ef25 100644 --- a/lib/op-attrs/src/dropout.cc +++ b/lib/op-attrs/src/dropout.cc @@ -4,9 +4,9 @@ namespace FlexFlow { ParallelTensorShape get_output_shape(DropoutAttrs const &attrs, - ParallelTensorShape const &input) { - ParallelTensorShape output = input; - return output; + ParallelTensorShape const &input_shape) { + + return input_shape; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/element_binary.cc b/lib/op-attrs/src/element_binary.cc index c61be195c0..77ddc016b8 100644 --- a/lib/op-attrs/src/element_binary.cc +++ b/lib/op-attrs/src/element_binary.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/element_binary.h" +#include "utils/exception.h" namespace FlexFlow { @@ -18,8 +19,10 @@ ParallelTensorShape get_output_shape(ElementBinaryAttrs const &atts, } else if (rhs.at(ff_dim_t(i)).size == 1) { output.at(ff_dim_t(i)) = lhs.at(ff_dim_t(i)); } else { - assert(false && "Operands could not be broadcast together"); - exit(0); + throw mk_runtime_error( + "Operands of shapes {} and {} could not be broadcast together", + lhs, + rhs); } } diff --git a/lib/op-attrs/src/element_unary.cc b/lib/op-attrs/src/element_unary.cc index b9028ac3b8..109653c5c5 100644 --- a/lib/op-attrs/src/element_unary.cc +++ b/lib/op-attrs/src/element_unary.cc @@ -3,9 +3,9 @@ namespace FlexFlow { ParallelTensorShape get_output_shape(ElementUnaryAttrs const &atts, - ParallelTensorShape const &input) { - ParallelTensorShape output = input; - return output; + ParallelTensorShape const &input_shape) { + + return input_shape; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/embedding.cc b/lib/op-attrs/src/embedding.cc index 5e86335f14..3aba747036 100644 --- a/lib/op-attrs/src/embedding.cc +++ b/lib/op-attrs/src/embedding.cc @@ -9,7 +9,6 @@ namespace FlexFlow { ParallelTensorShape get_output_shape(EmbeddingAttrs const &atts, ParallelTensorShape const &input) { ParallelTensorShape output = input; - output.at(ff_dim_t(1)).size = input.at(ff_dim_t(1)).size; output.at(ff_dim_t(2)).size = atts.out_channels; // output degree is same as input degree return output; diff --git a/lib/op-attrs/src/groupby.cc b/lib/op-attrs/src/groupby.cc index 09babdb20d..6c1b201993 100644 --- a/lib/op-attrs/src/groupby.cc +++ b/lib/op-attrs/src/groupby.cc @@ -19,16 +19,15 @@ in enumerate(grouped_data): print(f"Group {i}: {group}") */ ParallelTensorShape get_output_shape(Group_byAttrs const &attrs, - ParallelTensorShape const &input, + ParallelTensorShape const &input_shape, ParallelTensorShape const &index) { - if (input.num_dims() != index.num_dims()) { + if (input_shape.num_dims() != index.num_dims()) { throw mk_runtime_error( "Group_by: input and index must have the same number of dimensions"); } - ParallelTensorShape output = input; // degree of output is same as input's - return output; + return input_shape; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/layer_norm.cc b/lib/op-attrs/src/layer_norm.cc index 58160b528f..737e527647 100644 --- a/lib/op-attrs/src/layer_norm.cc +++ b/lib/op-attrs/src/layer_norm.cc @@ -5,13 +5,13 @@ namespace FlexFlow { // todo: maybe we need to set the degree of parallel_dim ParallelTensorShape get_output_shape(LayerNormAttrs const &attrs, - ParallelTensorShape const &input) { + ParallelTensorShape const &input_shape) { if (input.num_dims() < 2) { throw mk_runtime_error("LayerNorm: input must have at least 2 dimensions"); } - ParallelTensorShape output = input; + // output degree is same as input degree - return output; + return input_shape; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/reduce.cc b/lib/op-attrs/src/reduce.cc index 727e41e0ec..a6c78db75b 100644 --- a/lib/op-attrs/src/reduce.cc +++ b/lib/op-attrs/src/reduce.cc @@ -6,7 +6,7 @@ namespace FlexFlow { ParallelTensorShape get_output_shape(ReduceAttrs const &attrs, ParallelTensorShape const &input) { NOT_IMPLEMENTED() - // reduce is sum/max/min/mean + // reduce is sum/max/min/mean, I think we just return 1D tensor // NOTE: how to implement this } diff --git a/lib/op-attrs/src/replicate.cc b/lib/op-attrs/src/replicate.cc index 2086ab41bd..d059bd387b 100644 --- a/lib/op-attrs/src/replicate.cc +++ b/lib/op-attrs/src/replicate.cc @@ -1,20 +1,9 @@ #include "op-attrs/ops/replicate.h" #include "op-attrs/parallel_dim.h" -#include "utils/exception.decl.h" +#include "utils/exception.h" namespace FlexFlow { -bool ReplicateAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - if (this->replicate_dim >= input.num_dims() || this->replicate_degree <= 0) { - return false; - } - - return true; -} - // replicate by n multiplies degree by n and shape by n // seems it is like pytorch's repeat // original_tensor = torch.tensor([1, 2, 3]) torch.Size([3]) @@ -25,7 +14,10 @@ bool ReplicateAttrs::is_valid(ParallelTensorShape const &input) const { ParallelTensorShape get_output_shape(ReplicateAttrs const &attrs, ParallelTensorShape const &input) { - assert(attrs.is_valid(input)); + if (attrs.replicate_dim >= input.num_dims() || attrs.replicate_degree <= 0) { + throw mk_runtime_error("ReplicateAttrs::get_output_shape: axis is out of " + "range or input is invalid"); + } ParallelTensorShape output = input; output.at(attrs.replicate_dim).size *= attrs.replicate_degree; return output; diff --git a/lib/op-attrs/src/reverse.cc b/lib/op-attrs/src/reverse.cc index 644e733a0a..f418495acf 100644 --- a/lib/op-attrs/src/reverse.cc +++ b/lib/op-attrs/src/reverse.cc @@ -5,13 +5,13 @@ namespace FlexFlow { ParallelTensorShape get_output_shape(ReverseAttrs const &attrs, - ParallelTensorShape const &input) { - if (attrs.axis < 0 || attrs.axis >= input.num_dims()) { + ParallelTensorShape const &input_shape) { + if (attrs.axis < 0 || attrs.axis >= input_shape.num_dims()) { throw mk_runtime_error("ReverseAttrs: axis is invalid"); } - ParallelTensorShape output = input; + // output degree is same as input degree, because it's just reverse operation - return output; + return input_shape; } }; // namespace FlexFlow diff --git a/lib/op-attrs/src/softmax.cc b/lib/op-attrs/src/softmax.cc index 1d9cd5fcc0..d9ab0c9a84 100644 --- a/lib/op-attrs/src/softmax.cc +++ b/lib/op-attrs/src/softmax.cc @@ -4,12 +4,12 @@ namespace FlexFlow { ParallelTensorShape get_output_shape(SoftmaxAttrs const &attrs, - ParallelTensorShape const &input) { - if (input.num_dims() < 2) { - throw mk_runtime_error("SoftmaxAttrs: input.num_dims() < 2"); + ParallelTensorShape const &input_shape) { + if (input_shape.num_dims() < 2) { + throw mk_runtime_error("SoftmaxAttrs: input_shape.num_dims() < 2"); } - ParallelTensorShape output = input; - return output; + + return input_shape; } } // namespace FlexFlow From e59975eb2ad1bd253d9817c8100c126d531a6cb4 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Wed, 18 Oct 2023 21:17:23 +0000 Subject: [PATCH 55/69] remove the empty blank --- lib/op-attrs/src/batch_norm.cc | 8 ++++---- lib/op-attrs/src/broadcast.cc | 5 ++++- lib/op-attrs/src/cast.cc | 5 +++-- lib/op-attrs/src/dropout.cc | 1 - lib/op-attrs/src/element_unary.cc | 1 - lib/op-attrs/src/groupby.cc | 1 - lib/op-attrs/src/layer_norm.cc | 1 - lib/op-attrs/src/pool_2d.cc | 2 -- lib/op-attrs/src/repartition.cc | 2 +- lib/op-attrs/src/replicate.cc | 2 ++ lib/op-attrs/src/reverse.cc | 1 - lib/op-attrs/src/softmax.cc | 1 - 12 files changed, 14 insertions(+), 16 deletions(-) diff --git a/lib/op-attrs/src/batch_norm.cc b/lib/op-attrs/src/batch_norm.cc index 2557c9def5..5e22c8147d 100644 --- a/lib/op-attrs/src/batch_norm.cc +++ b/lib/op-attrs/src/batch_norm.cc @@ -2,16 +2,16 @@ #include "utils/exception.h" namespace FlexFlow { -// input: [b, c, h, w] +// input_shape: [b, c, h, w] // output: [b, c, h, w] ParallelTensorShape get_output_shape(BatchNormAttrs const &attrs, ParallelTensorShape const &input_shape) { - if (!input.is_valid() || input.num_dims() != 4) { + if (!input_shape.is_valid() || input_shape.num_dims() != 4) { throw mk_runtime_error( - "BatchNormAttrs::get_output_shape: input is invalid"); + "BatchNormAttrs::get_output_shape: input_shape is invalid"); } - // the degree of the output is the same as the input + // the degree of the output is the same as the input_shape return input_shape; } diff --git a/lib/op-attrs/src/broadcast.cc b/lib/op-attrs/src/broadcast.cc index c69f480b84..f0de4cc807 100644 --- a/lib/op-attrs/src/broadcast.cc +++ b/lib/op-attrs/src/broadcast.cc @@ -1,3 +1,6 @@ #include "op-attrs/ops/broadcast.h" -namespace FlexFlow {} // namespace FlexFlow +namespace FlexFlow { + +// what's the definition of broadcast for get_output_shape +} // namespace FlexFlow diff --git a/lib/op-attrs/src/cast.cc b/lib/op-attrs/src/cast.cc index 868756abcd..60f899fed2 100644 --- a/lib/op-attrs/src/cast.cc +++ b/lib/op-attrs/src/cast.cc @@ -5,8 +5,9 @@ namespace FlexFlow { ParallelTensorShape get_output_shape(CastAttrs const &attrs, ParallelTensorShape const &input_shape) { - if (!input.is_valid()) { - throw mk_runtime_error("CastAttrs::get_output_shape: input is invalid"); + if (!input_shape.is_valid()) { + throw mk_runtime_error( + "CastAttrs::get_output_shape: input_shape is invalid"); } ParallelTensorShape output_shape = input_shape; output_shape.data_type = attrs.dtype; diff --git a/lib/op-attrs/src/dropout.cc b/lib/op-attrs/src/dropout.cc index 5d5f78ef25..7bdae67af9 100644 --- a/lib/op-attrs/src/dropout.cc +++ b/lib/op-attrs/src/dropout.cc @@ -5,7 +5,6 @@ namespace FlexFlow { ParallelTensorShape get_output_shape(DropoutAttrs const &attrs, ParallelTensorShape const &input_shape) { - return input_shape; } diff --git a/lib/op-attrs/src/element_unary.cc b/lib/op-attrs/src/element_unary.cc index 109653c5c5..08622e6f63 100644 --- a/lib/op-attrs/src/element_unary.cc +++ b/lib/op-attrs/src/element_unary.cc @@ -4,7 +4,6 @@ namespace FlexFlow { ParallelTensorShape get_output_shape(ElementUnaryAttrs const &atts, ParallelTensorShape const &input_shape) { - return input_shape; } diff --git a/lib/op-attrs/src/groupby.cc b/lib/op-attrs/src/groupby.cc index 6c1b201993..64d7797771 100644 --- a/lib/op-attrs/src/groupby.cc +++ b/lib/op-attrs/src/groupby.cc @@ -25,7 +25,6 @@ ParallelTensorShape get_output_shape(Group_byAttrs const &attrs, throw mk_runtime_error( "Group_by: input and index must have the same number of dimensions"); } - // degree of output is same as input's return input_shape; } diff --git a/lib/op-attrs/src/layer_norm.cc b/lib/op-attrs/src/layer_norm.cc index 737e527647..d98562219f 100644 --- a/lib/op-attrs/src/layer_norm.cc +++ b/lib/op-attrs/src/layer_norm.cc @@ -9,7 +9,6 @@ ParallelTensorShape get_output_shape(LayerNormAttrs const &attrs, if (input.num_dims() < 2) { throw mk_runtime_error("LayerNorm: input must have at least 2 dimensions"); } - // output degree is same as input degree return input_shape; } diff --git a/lib/op-attrs/src/pool_2d.cc b/lib/op-attrs/src/pool_2d.cc index b3859c6e06..ec6253a0b1 100644 --- a/lib/op-attrs/src/pool_2d.cc +++ b/lib/op-attrs/src/pool_2d.cc @@ -122,8 +122,6 @@ ParallelTensorShape get_output_shape(Pool2DAttrs const &attrs, return output_shape; } -} - /* ParallelTensorShape Pool2DAttrs::calculate_output_shape(ParallelTensorShape * const &input) const { */ /* return solve_mappings(input).output_shapes.at(0); */ diff --git a/lib/op-attrs/src/repartition.cc b/lib/op-attrs/src/repartition.cc index b95a400ad5..d7807fcc10 100644 --- a/lib/op-attrs/src/repartition.cc +++ b/lib/op-attrs/src/repartition.cc @@ -11,7 +11,7 @@ ParallelTensorShape get_output_shape(RepartitionAttrs const &attrs, ParallelDim dim = input.at(attrs.repartition_dim); if (dim.size % attrs.repartition_degree * dim.degree != 0) { throw mk_runtime_error("RepartitionAttrs: input.at(attrs.repartition_dim) " - "% attrs.repartition_degree * dim.degree != 0"); + "attrs.repartition_degree * dim.degree != 0"); } ParallelTensorShape output(input.dims, input.data_type); output.at(attrs.repartition_dim).degree *= attrs.repartition_degree; diff --git a/lib/op-attrs/src/replicate.cc b/lib/op-attrs/src/replicate.cc index d059bd387b..7a1b511a5e 100644 --- a/lib/op-attrs/src/replicate.cc +++ b/lib/op-attrs/src/replicate.cc @@ -20,6 +20,8 @@ ParallelTensorShape get_output_shape(ReplicateAttrs const &attrs, } ParallelTensorShape output = input; output.at(attrs.replicate_dim).size *= attrs.replicate_degree; + output.at(attrs.replicate_dim).is_replica_dim = + (input.at(attrs.replicate_dim).degree > 1); return output; } diff --git a/lib/op-attrs/src/reverse.cc b/lib/op-attrs/src/reverse.cc index f418495acf..663150861f 100644 --- a/lib/op-attrs/src/reverse.cc +++ b/lib/op-attrs/src/reverse.cc @@ -9,7 +9,6 @@ ParallelTensorShape get_output_shape(ReverseAttrs const &attrs, if (attrs.axis < 0 || attrs.axis >= input_shape.num_dims()) { throw mk_runtime_error("ReverseAttrs: axis is invalid"); } - // output degree is same as input degree, because it's just reverse operation return input_shape; } diff --git a/lib/op-attrs/src/softmax.cc b/lib/op-attrs/src/softmax.cc index d9ab0c9a84..eff13aab59 100644 --- a/lib/op-attrs/src/softmax.cc +++ b/lib/op-attrs/src/softmax.cc @@ -8,7 +8,6 @@ ParallelTensorShape get_output_shape(SoftmaxAttrs const &attrs, if (input_shape.num_dims() < 2) { throw mk_runtime_error("SoftmaxAttrs: input_shape.num_dims() < 2"); } - return input_shape; } From 7da6b505ebd88e06c1a9568dac9463f638794ea6 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Thu, 19 Oct 2023 19:25:57 +0000 Subject: [PATCH 56/69] some update --- lib/op-attrs/src/attention.cc | 6 +++--- lib/op-attrs/src/batch_matmul.cc | 2 +- lib/op-attrs/src/reduce.cc | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/op-attrs/src/attention.cc b/lib/op-attrs/src/attention.cc index 61a5c79b0a..f89124a94f 100644 --- a/lib/op-attrs/src/attention.cc +++ b/lib/op-attrs/src/attention.cc @@ -58,9 +58,9 @@ TensorShape // according to the pytorch // https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html, // we consider the batch size -// query: [seq_len, batch_size, embed_dim], -// key: (seq_len, batch_size, embed_dim) -// value: (seq_len, batch_size,embed_dim) +// query: [replicate_num, seq_len, batch_size, embed_dim],4D, +// key: (replicate_num, seq_len, batch_size, embed_dim) +// value: (replicate_num ,seq_len, batch_size,embed_dim) // multihead_attn = nn.MultiheadAttention(embed_dim, num_heads) // output: (seq_len, batch_size, embed_dim) diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index e170e29053..17406268cd 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -4,7 +4,7 @@ #include "utils/exception.h" namespace FlexFlow { -// how to get the batch size? and lhs: [b, n, m], rhs: [b, m, p] +// how to get the batch size? and lhs: [replicate_num, b, n, m], rhs: [b, m, p] // output: [b, n, p] //n == s1, m == s2 //[b, n/2, m], [b, m, p/2] -> [b, n/2, p/2] //[b, n, m/2], [b, m/2, p] -> [b, n, p/2] diff --git a/lib/op-attrs/src/reduce.cc b/lib/op-attrs/src/reduce.cc index a6c78db75b..930ec42d76 100644 --- a/lib/op-attrs/src/reduce.cc +++ b/lib/op-attrs/src/reduce.cc @@ -3,10 +3,11 @@ namespace FlexFlow { +// ParallelTensorShape get_output_shape(ReduceAttrs const &attrs, ParallelTensorShape const &input) { NOT_IMPLEMENTED() - // reduce is sum/max/min/mean, I think we just return 1D tensor + // reduce is sum/max/min/mean, I think we just return 1D tensor [1, 2, 4] => [7, ] // NOTE: how to implement this } From 40ffcd9b8b2b25a53d84b2b1d22b6999768a28ed Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Fri, 27 Oct 2023 13:44:54 +0000 Subject: [PATCH 57/69] add new batch matmul --- .../include/op-attrs/parallel_tensor_shape.h | 2 + lib/op-attrs/src/batch_matmul.cc | 102 +++++++++--------- lib/op-attrs/src/parallel_tensor_shape.cc | 10 ++ 3 files changed, 61 insertions(+), 53 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h index ca980966e8..f30786f55d 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h @@ -27,6 +27,8 @@ struct ParallelTensorShape : public use_visitable_cmp { int num_dims() const; + int get_volume() const; + ParallelDim const &at(ff_dim_t const &) const; ParallelDim &at(ff_dim_t const &); ParallelDim const &operator[](ff_dim_t const &) const; diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index 17406268cd..38f37bead2 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -1,69 +1,65 @@ #include "op-attrs/ops/batch_matmul.h" #include "op-attrs/ff_dim.h" #include "op-attrs/parallel_tensor_shape.h" +#include "utils/exception.decl.h" #include "utils/exception.h" namespace FlexFlow { -// how to get the batch size? and lhs: [replicate_num, b, n, m], rhs: [b, m, p] -// output: [b, n, p] //n == s1, m == s2 -//[b, n/2, m], [b, m, p/2] -> [b, n/2, p/2] -//[b, n, m/2], [b, m/2, p] -> [b, n, p/2] + +//lhs: [, ,, ] +//rhs:[, ,, ] +//in the original tensor, we assume the dl1/dr1 is 1 +//output:[, , , ] +//how to decide the r3, d01, do3, do4 +//Note: Lsize = r1 * dl3 * dl4, Rsize = r2 * dr3 * dr4 , Rsize = Lsize +//do3 = dl3, do4 = dr4 +//so, r3 = Lsize / do3 / do4 +//r3 / do1 = r1 / dl1 ParallelTensorShape get_output_shape(BatchMatmulAttrs const &attrs, ParallelTensorShape const &lhs, ParallelTensorShape const &rhs) { - ParallelTensorShape output_shape = lhs; +if(lhs.num_dims() != 4 || rhs.num_dims() != 4) { + throw mk_runtime_error("rhs or lhs dimension is not 4"); +} - // check if the input is valid - if (!lhs.is_valid() || !rhs.is_valid()) { - throw mk_runtime_error( - "BatchMatmulAttrs::get_output_shape: input is invalid") - } +int rl = lhs.at(ff_dim_t(0)).size ;// replicate_num of lhs +int dl1 = lhs.at(ff_dim_t(0)).degree;//degree of 0 dimension +int dl3 = lhs.at(ff_dim_t(3)).degree;//degree of third dimension +int dr4 = rhs.at(ff_dim_t(4)).degree;//degree of fouth dimenstion - if (lhs.at(ff_dim_t(0)).size != rhs.at(ff_dim_t(0)).size) { - throw mk_runtime_error( - "BatchMatmulAttrs::get_output_shape: batch size is not equal"); - } - if (lhs.at(ff_dim_t(2)).size != rhs.at(ff_dim_t(1)).size || - lhs.at(ff_dim_t(1)).size != attrs.a_seq_length_dim || - rhs.at(ff_dim_t(2)).size != attrs.b_seq_length_dim) { +int lsize = lhs.get_volume(); +int rsize = rhs.get_volume(); +if(lsize != rsize) { + throw mk_runtime_error("BatchMatmulAttrs::get_output_shape, the volume of lhs and rhs are not matched "); +} + +if (lhs.at(ff_dim_t(1)).size != rhs.at(ff_dim_t(1)).size) { throw mk_runtime_error( - "BatchMatmulAttrs::get_output_shape: third demension of lhs and second " - "dementions of rhs are not match"); - } - output_shape.at(ff_dim_t(0)).size = lhs.at(ff_dim_t(0)).size; // batch size - output_shape.at(ff_dim_t(1)).size = lhs.at(ff_dim_t(1)).size; - output_shape.at(ff_dim_t(2)).size = rhs.at(ff_dim_t(2)).size; + "BatchMatmulAttrs::get_output_shape, batch size is not equal"); +} + +if(lhs.at(ff_dim_t(3)).size != rhs.at(ff_dim_t(3)).size ) { + throw mk_runtime_error( + "BatchMatmulAttrs::get_output_shape: forth demension of lhs and third dementions of rhs are not match"); +} + +//4D tensor +ParallelTensorShape output_shape = lhs; + +output_shape.at(ff_dim_t(0)).size = lsize / (dl3 * dr4); +output_shape.at(ff_dim_t(0)).degree = output_shape.at(ff_dim_t(0)).size / (rl / dl1);//this may have some problem +output_shape.at(ff_dim_t(0)).is_replica_dim = true; + +output_shape.at(ff_dim_t(3)).size = lhs.at(ff_dim_t(3)).size; +output_shape.at(ff_dim_t(3)).degree = dl3; +output_shape.at(ff_dim_t(3)).is_replica_dim = false; + +output_shape.at(ff_dim_t(4)).size = rhs.at(ff_dim_t(4)).size(); +output_shape.at(ff_dim_t(4)).degree = dr4; +output_shape.at(ff_dim_t(4)).is_replica_dim = false; + +return output_shape; - if (lhs.at(ff_dim_t(1)).degree == 1 && lhs.at(ff_dim_t(2)).degree == 1) { - // case 0: degree is 1, [b, n, m], rhs: [b, m, p] -> [b, n, p] - for (int i = 1; i < lhs.num_dims(); i++) { - output_shape.at(ff_dim_t(i)).degree = 1; - output_shape.at(ff_dim_t(i)).is_replica_dim = false; - } - } else if (lhs.at(ff_dim_t(1)).degree == 1 && - lhs.at(ff_dim_t(2)).degree > - 1) { // case 1: [b, n, m/x], [b, m/x, p] => [b, n, y] - output_shape.at(ff_dim_t(1)).is_replica_dim = true; - output_shape.at(ff_dim_t(1)).degree = lhs.at(ff_dim_t(1)).degree; - } else if (lhs.at(ff_dim_t(1)).degree > 1 && - lhs.at(ff_dim_t(2)).degree == - 1) { // case 2: [b, n/x, m] [b m p/x] => [b n/x p/x] - output_shape.at(ff_dim_t(1)).is_replica_dim = true; - output_shape.at(ff_dim_t(2)).is_replica_dim = true; - output_shape.at(ff_dim_t(1)).degree = lhs.at(ff_dim_t(1)).degree; - output_shape.at(ff_dim_t(2)).degree = rhs.at(ff_dim_t(2)).degree; - } else if (lhs.at(ff_dim_t(1)).degree > 1 && - lhs.at(ff_dim_t(2)).degree > - 1) { // case 3: [b n/x m/y] [b m/y p/x]=> [b n/x p/x] - output_shape.at(ff_dim_t(1)).is_replica_dim = true; - output_shape.at(ff_dim_t(2)).is_replica_dim = true; - output_shape.at(ff_dim_t(1)).degree = lhs.at(ff_dim_t(1)).degree; - output_shape.at(ff_dim_t(2)).degree = rhs.at(ff_dim_t(2)).degree; - } else { - throw mk_runtime_error("BatchMatmulAttrs::get_output_shape: not supported " - "in BatchMatmulAttrs get_output_shape"); - } - return output_shape; } /* bool BatchMatmulAttrs::is_valid( */ diff --git a/lib/op-attrs/src/parallel_tensor_shape.cc b/lib/op-attrs/src/parallel_tensor_shape.cc index 9a36e7d11b..8f9514e58a 100644 --- a/lib/op-attrs/src/parallel_tensor_shape.cc +++ b/lib/op-attrs/src/parallel_tensor_shape.cc @@ -1,4 +1,5 @@ #include "op-attrs/parallel_tensor_shape.h" +#include "op-attrs/ff_dim.h" #include "utils/containers.h" #include "utils/hash-utils.h" @@ -13,6 +14,15 @@ static std::vector lift_dims(TensorDims const &dims) { return lifted_dims; } +int ParallelTensorShape::get_volume() const { + int volume = this->at(ff_dim_t(0)).size; + for(int i = 1; i < num_dims(); i++) { + volume *= this->at(ff_dim_t(0)).degree; + } + + return volume; +} + ParallelTensorDims::ParallelTensorDims(TensorDims const &dims) : data(lift_dims(dims)) {} From f68c83b60a43688787e507c5b5eb28782dddf3f1 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Fri, 27 Oct 2023 14:31:24 +0000 Subject: [PATCH 58/69] add concat --- .../include/op-attrs/parallel_tensor_shape.h | 4 +- lib/op-attrs/src/attention.cc | 2 +- lib/op-attrs/src/batch_matmul.cc | 83 ++++++++++--------- lib/op-attrs/src/combine.cc | 5 +- lib/op-attrs/src/concat.cc | 18 +++- lib/op-attrs/src/parallel_tensor_shape.cc | 4 +- lib/op-attrs/src/reduce.cc | 4 +- 7 files changed, 69 insertions(+), 51 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h index f30786f55d..e7df3b72df 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h @@ -27,8 +27,8 @@ struct ParallelTensorShape : public use_visitable_cmp { int num_dims() const; - int get_volume() const; - + int get_volume() const; + ParallelDim const &at(ff_dim_t const &) const; ParallelDim &at(ff_dim_t const &); ParallelDim const &operator[](ff_dim_t const &) const; diff --git a/lib/op-attrs/src/attention.cc b/lib/op-attrs/src/attention.cc index f89124a94f..cc6d9c3c48 100644 --- a/lib/op-attrs/src/attention.cc +++ b/lib/op-attrs/src/attention.cc @@ -58,7 +58,7 @@ TensorShape // according to the pytorch // https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html, // we consider the batch size -// query: [replicate_num, seq_len, batch_size, embed_dim],4D, +// query: [replicate_num, seq_len, batch_size, embed_dim],4D, // key: (replicate_num, seq_len, batch_size, embed_dim) // value: (replicate_num ,seq_len, batch_size,embed_dim) // multihead_attn = nn.MultiheadAttention(embed_dim, num_heads) diff --git a/lib/op-attrs/src/batch_matmul.cc b/lib/op-attrs/src/batch_matmul.cc index 38f37bead2..62c0f525e8 100644 --- a/lib/op-attrs/src/batch_matmul.cc +++ b/lib/op-attrs/src/batch_matmul.cc @@ -6,60 +6,63 @@ namespace FlexFlow { -//lhs: [, ,, ] -//rhs:[, ,, ] -//in the original tensor, we assume the dl1/dr1 is 1 -//output:[, , , ] -//how to decide the r3, d01, do3, do4 -//Note: Lsize = r1 * dl3 * dl4, Rsize = r2 * dr3 * dr4 , Rsize = Lsize -//do3 = dl3, do4 = dr4 -//so, r3 = Lsize / do3 / do4 -//r3 / do1 = r1 / dl1 +// lhs: [, ,, ] +// rhs:[, ,, ] +// in the original tensor, we assume the dl1/dr1 is 1 +// output:[, , , ] +// how to decide the r3, d01, do3, do4 +// Note: Lsize = r1 * dl3 * dl4, Rsize = r2 * dr3 * dr4 , Rsize = Lsize +// do3 = dl3, do4 = dr4 +// so, r3 = Lsize / do3 / do4 +// r3 / do1 = r1 / dl1 ParallelTensorShape get_output_shape(BatchMatmulAttrs const &attrs, ParallelTensorShape const &lhs, ParallelTensorShape const &rhs) { -if(lhs.num_dims() != 4 || rhs.num_dims() != 4) { - throw mk_runtime_error("rhs or lhs dimension is not 4"); -} + if (lhs.num_dims() != 4 || rhs.num_dims() != 4) { + throw mk_runtime_error("rhs or lhs dimension is not 4"); + } -int rl = lhs.at(ff_dim_t(0)).size ;// replicate_num of lhs -int dl1 = lhs.at(ff_dim_t(0)).degree;//degree of 0 dimension -int dl3 = lhs.at(ff_dim_t(3)).degree;//degree of third dimension -int dr4 = rhs.at(ff_dim_t(4)).degree;//degree of fouth dimenstion + int rl = lhs.at(ff_dim_t(0)).size; // replicate_num of lhs + int dl1 = lhs.at(ff_dim_t(0)).degree; // degree of 0 dimension + int dl3 = lhs.at(ff_dim_t(3)).degree; // degree of third dimension + int dr4 = rhs.at(ff_dim_t(4)).degree; // degree of fouth dimenstion -int lsize = lhs.get_volume(); -int rsize = rhs.get_volume(); -if(lsize != rsize) { - throw mk_runtime_error("BatchMatmulAttrs::get_output_shape, the volume of lhs and rhs are not matched "); -} + int lsize = lhs.get_volume(); + int rsize = rhs.get_volume(); + if (lsize != rsize) { + throw mk_runtime_error("BatchMatmulAttrs::get_output_shape, the volume of " + "lhs and rhs are not matched "); + } -if (lhs.at(ff_dim_t(1)).size != rhs.at(ff_dim_t(1)).size) { + if (lhs.at(ff_dim_t(1)).size != rhs.at(ff_dim_t(1)).size) { throw mk_runtime_error( "BatchMatmulAttrs::get_output_shape, batch size is not equal"); -} + } -if(lhs.at(ff_dim_t(3)).size != rhs.at(ff_dim_t(3)).size ) { - throw mk_runtime_error( - "BatchMatmulAttrs::get_output_shape: forth demension of lhs and third dementions of rhs are not match"); -} - -//4D tensor -ParallelTensorShape output_shape = lhs; + if (lhs.at(ff_dim_t(3)).size != rhs.at(ff_dim_t(3)).size) { + throw mk_runtime_error( + "BatchMatmulAttrs::get_output_shape: forth demension of lhs and third " + "dementions of rhs are not match"); + } -output_shape.at(ff_dim_t(0)).size = lsize / (dl3 * dr4); -output_shape.at(ff_dim_t(0)).degree = output_shape.at(ff_dim_t(0)).size / (rl / dl1);//this may have some problem -output_shape.at(ff_dim_t(0)).is_replica_dim = true; + // 4D tensor + ParallelTensorShape output_shape = lhs; -output_shape.at(ff_dim_t(3)).size = lhs.at(ff_dim_t(3)).size; -output_shape.at(ff_dim_t(3)).degree = dl3; -output_shape.at(ff_dim_t(3)).is_replica_dim = false; + output_shape.at(ff_dim_t(0)).size = lsize / (dl3 * dr4); + output_shape.at(ff_dim_t(0)).degree = + output_shape.at(ff_dim_t(0)).size / + (rl / dl1); // this may have some problem + output_shape.at(ff_dim_t(0)).is_replica_dim = true; -output_shape.at(ff_dim_t(4)).size = rhs.at(ff_dim_t(4)).size(); -output_shape.at(ff_dim_t(4)).degree = dr4; -output_shape.at(ff_dim_t(4)).is_replica_dim = false; + output_shape.at(ff_dim_t(3)).size = lhs.at(ff_dim_t(3)).size; + output_shape.at(ff_dim_t(3)).degree = dl3; + output_shape.at(ff_dim_t(3)).is_replica_dim = false; -return output_shape; + output_shape.at(ff_dim_t(4)).size = rhs.at(ff_dim_t(4)).size(); + output_shape.at(ff_dim_t(4)).degree = dr4; + output_shape.at(ff_dim_t(4)).is_replica_dim = false; + return output_shape; } /* bool BatchMatmulAttrs::is_valid( */ diff --git a/lib/op-attrs/src/combine.cc b/lib/op-attrs/src/combine.cc index d851c186c8..ee77fd08b6 100644 --- a/lib/op-attrs/src/combine.cc +++ b/lib/op-attrs/src/combine.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/combine.h" +#include "utils/exception.decl.h" #include "utils/hash-utils.h" namespace FlexFlow { @@ -6,9 +7,11 @@ ParallelTensorShape get_output_shape_shape(CombineAttrs const &attrs, ParallelTensorShape const &input_shape) { ParallelTensorShape output_shape = input_shape; + /* output_shape.at(attrs.combine_dim).degree /= attrs.combine_degree; output_shape.at(attrs.combine_dim).is_replica_dim = - output_shape.at(attrs.combine_dim).degree > 1; + output_shape.at(attrs.combine_dim).degree > 1;*/ + NOT_IMPLEMENTED(); return output_shape; } diff --git a/lib/op-attrs/src/concat.cc b/lib/op-attrs/src/concat.cc index 47807b9c1a..5efe8855d8 100644 --- a/lib/op-attrs/src/concat.cc +++ b/lib/op-attrs/src/concat.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/concat.h" +#include "utils/exception.decl.h" #include "utils/exception.h" namespace FlexFlow { @@ -12,11 +13,22 @@ ParallelTensorShape "range or input is invalid"); } } + + int dims = inputs[0].num_dims(); + for (int i = 1; i < inputs.size(); i++) { + if (inputs[i].num_dims() != dims) { + throw mk_runtime_error(" the input dims not matched at i:", i); + } + } + for (auto &i : inputs) { - output.at(attrs.axis).size += i.at(attrs.axis).size; + output.at(ff_dim_t(attrs.axis)).size += i.at(ff_dim_t(attrs.axis)).size; + } + output.at(ff_dim_t(0)).is_replica_dim = true; + // note: how to decide the degee? + for (int i = 1; i < output.num_dims(); i++) { + output.at(ff_dim_t(i)).is_replica_dim = false; } - output.at(attrs.axis).degree = inputs[0].at(attrs.axis).degree; - output.at(attrs.axis).is_replica_dim = inputs[0].at(attrs.axis).degree >= 1; return output; } diff --git a/lib/op-attrs/src/parallel_tensor_shape.cc b/lib/op-attrs/src/parallel_tensor_shape.cc index 8f9514e58a..5848991c13 100644 --- a/lib/op-attrs/src/parallel_tensor_shape.cc +++ b/lib/op-attrs/src/parallel_tensor_shape.cc @@ -15,8 +15,8 @@ static std::vector lift_dims(TensorDims const &dims) { } int ParallelTensorShape::get_volume() const { - int volume = this->at(ff_dim_t(0)).size; - for(int i = 1; i < num_dims(); i++) { + int volume = this->at(ff_dim_t(0)).size; + for (int i = 1; i < num_dims(); i++) { volume *= this->at(ff_dim_t(0)).degree; } diff --git a/lib/op-attrs/src/reduce.cc b/lib/op-attrs/src/reduce.cc index 930ec42d76..79fa6d7598 100644 --- a/lib/op-attrs/src/reduce.cc +++ b/lib/op-attrs/src/reduce.cc @@ -7,8 +7,8 @@ namespace FlexFlow { ParallelTensorShape get_output_shape(ReduceAttrs const &attrs, ParallelTensorShape const &input) { NOT_IMPLEMENTED() - // reduce is sum/max/min/mean, I think we just return 1D tensor [1, 2, 4] => [7, ] - // NOTE: how to implement this + // reduce is sum/max/min/mean, I think we just return 1D tensor [1, 2, 4] => + // [7, ] NOTE: how to implement this } } // namespace FlexFlow From 5ef2497921b817213f42d514afb99d5151645898 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Fri, 27 Oct 2023 14:44:03 +0000 Subject: [PATCH 59/69] add conv_2d --- lib/op-attrs/src/conv_2d.cc | 64 +++++++++++++------------------------ 1 file changed, 22 insertions(+), 42 deletions(-) diff --git a/lib/op-attrs/src/conv_2d.cc b/lib/op-attrs/src/conv_2d.cc index 15566f9005..5f86ffa750 100644 --- a/lib/op-attrs/src/conv_2d.cc +++ b/lib/op-attrs/src/conv_2d.cc @@ -84,61 +84,41 @@ std::vector return mappings; } -// according to pytorch, the input shape: [b, input_channel, input_h, input_w] -// kernel shape: [output_channel, input_channel, kernel_h, kernel_w] -// we may have stide_h and padding_h -// output shape: [b, output_channel, output_h, output_w] -// output_h = (input_h + 2 * padding_h - kernel_h) / stride_h + 1 -// output_w = (input_w + 2 * padding_w - kernel_w) / stride_w + 1 +// input: (, , , < input_h, di4, f>, +// ) kernel(Conv2DAttrs): out_channels, kernel_h, kernel_w, +// stride_h, stride_w, padding_h, padding_w, output shape:(, , , , ) +// output_h = (input_h + 2 * padding_h - kernel_h) / stride_h + 1 +// output_w = (input_w + 2 * padding_w - kernel_w) / stride_w + 1 +// assert: for the kernel, dk1 == dk2=dk4=dk4=dk5=1 +// question:how to decide the ro/do3/do4/do5? +// I think: do3= di3, di4= do4, di5 = do5, do1=di1, ro=ri ParallelTensorShape get_output_shape(Conv2DAttrs const &attrs, ParallelTensorShape const &input) { - ParallelTensorShape output = input; - if (input.num_dims() != 4) { + if (input.num_dims() != 5) { throw mk_runtime_error("Conv2DAttrs::get_output_shape: input is invalid"); } - - if (attrs.kernel_h > input.at(ff_dim_t(2)).size || - attrs.kernel_w > input.at(ff_dim_t(3)).size) { + if (attrs.kernel_h > input.at(ff_dim_t(3)).size || + attrs.kernel_w > input.at(ff_dim_t(4)).size) { throw mk_runtime_error( "Conv2DAttrs::get_output_shape: kernel size is larger than input size"); } - output.at(ff_dim_t(1)).size = attrs.out_channels; - output.at(ff_dim_t(2)).size = - (input.at(ff_dim_t(2)).size + 2 * attrs.padding_h - attrs.kernel_h) / + ParallelTensorShape output = input; + output.at(ff_dim_t(0)).is_replica_dim = true; + output.at(ff_dim_t(2)).size = attrs.out_channels; + output.at(ff_dim_t(3)).size = + (input.at(ff_dim_t(3)).size + 2 * attrs.padding_h - attrs.kernel_h) / attrs.stride_h + 1; - output.at(ff_dim_t(3)).size = - (input.at(ff_dim_t(3)).size + 2 * attrs.padding_w - attrs.kernel_w) / + output.at(ff_dim_t(4)).size = + (input.at(ff_dim_t(4)).size + 2 * attrs.padding_w - attrs.kernel_w) / attrs.stride_w + 1; - - if (input.at(ff_dim_t(2)).size == 1 && input.at(ff_dim_t(3)).size == 1) { - // case 1 input degree is 1, like 1GPU - output.at(ff_dim_t(0)).is_replica_dim = false; - } else if (input.at(ff_dim_t(2)).size > 1 && - input.at(ff_dim_t(3)).size == 1) { - // case 2: [b, input_channel, input_h/x, input_w], [output_channel, - // input_channel, kernel_h, kernel_w] => [b, output_channel, output_h/x, - // output_w] - output.at(ff_dim_t(2)).is_replica_dim = true; - output.at(ff_dim_t(2)).degree = input.at(ff_dim_t(2)).degree; - } else if (input.at(ff_dim_t(2)).size == 1 && - input.at(ff_dim_t(3)).size > 1) { - // case 3: [b, input_channel, input_h, input_w / x] [output_channel, - // input_channel, kernel_h, kernel_w / x] => [b, output_channel, output_h, - // output_w / x] - output.at(ff_dim_t(3)).is_replica_dim = true; - output.at(ff_dim_t(3)).degree = input.at(ff_dim_t(3)).degree; - } else if (input.at(ff_dim_t(2)).size > 1 && input.at(ff_dim_t(3)).size > 1) { - for (int i = 2; i < input.num_dims(); i++) { - output.at(ff_dim_t(i)).is_replica_dim = true; - output.at(ff_dim_t(i)).degree = input.at(ff_dim_t(i)).degree; - } - } else { - throw mk_runtime_error("Conv2DAttrs::get_output_shape: not supported in " - "Conv2DAttrs get_output_shape"); + for (int i = 1; i < output.num_dims(); i++) { + output.at(ff_dim_t(i)).is_replica_dim = false; } + return output; } From 7e23eb3302ab744f041540bc0d3462e345585cde Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Fri, 27 Oct 2023 14:46:15 +0000 Subject: [PATCH 60/69] add element binary --- lib/op-attrs/src/element_binary.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/op-attrs/src/element_binary.cc b/lib/op-attrs/src/element_binary.cc index 77ddc016b8..57236bb04f 100644 --- a/lib/op-attrs/src/element_binary.cc +++ b/lib/op-attrs/src/element_binary.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/element_binary.h" +#include "op-attrs/ff_dim.h" #include "utils/exception.h" namespace FlexFlow { @@ -7,7 +8,9 @@ ParallelTensorShape get_output_shape(ElementBinaryAttrs const &atts, ParallelTensorShape const &lhs, ParallelTensorShape const &rhs) { ParallelTensorShape output = lhs.num_dims() >= rhs.num_dims() ? lhs : rhs; - for (int i = 0; i < output.num_dims(); i++) { + // how to decide its degree and size for replicate_num + output.at(ff_dim_t(0)).is_replica_dim = false; + for (int i = 1; i < output.num_dims(); i++) { if (i >= lhs.num_dims()) { output.at(ff_dim_t(i)) = rhs.at(ff_dim_t(i)); } else if (i >= rhs.num_dims()) { From 9bb4de69bfcbb860575162863b3b9f752fc4e9a5 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sat, 28 Oct 2023 01:02:30 +0000 Subject: [PATCH 61/69] add embedding --- lib/op-attrs/src/embedding.cc | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/lib/op-attrs/src/embedding.cc b/lib/op-attrs/src/embedding.cc index 3aba747036..8407eeebfd 100644 --- a/lib/op-attrs/src/embedding.cc +++ b/lib/op-attrs/src/embedding.cc @@ -1,16 +1,38 @@ #include "op-attrs/ops/embedding.h" +#include "op-attrs/ff_dim.h" +#include "op-attrs/parallel_dim.h" +#include "op-attrs/parallel_tensor_dims.h" +#include "op-attrs/parallel_tensor_shape.h" +#include "op-attrs/tensor_shape.h" +#include "utils/exception.h" namespace FlexFlow { // pytorch nn.Embedding // Embedding OP: (num_embeddings, embedding_dim) (num_entries, out_channels) -// Input: (batch_size, seq_len) -// Output: (batch_size, seq_len, embedding_dim) -ParallelTensorShape get_output_shape(EmbeddingAttrs const &atts, +// input:(, < b, di2, f>, < seq_len, di3, f>) +// EmbeddingAttrs:req num_entries, out_channels; +// output:(, , , ) +ParallelTensorShape get_output_shape(EmbeddingAttrs const &attrs, ParallelTensorShape const &input) { - ParallelTensorShape output = input; - output.at(ff_dim_t(2)).size = atts.out_channels; - // output degree is same as input degree + if (input.num_dims() != 3) { + throw mk_runtime_error("for embedding, input shape must be 3D"); + } + + std::vector data; + data.resize(4); + data[0] = input.at(ff_dim_t(0)); + data[0].is_replica_dim = true; + data[1] = input.at(ff_dim_t(1)); + data[2] = input.at(ff_dim_t(2)); + data[3].size = attrs.out_channels; // TODO:what's the embedding_dim? + data[3].is_replica_dim = false; + + ParallelTensorShape output = ParallelTensorShape( + ParallelTensorDims(TensorDims(data.begin(), data.end())), + attrs.data_type); + return output; } From 51a9cb748884b1a5c1945eb25c065b12eff82e46 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sat, 28 Oct 2023 01:11:35 +0000 Subject: [PATCH 62/69] add flat --- lib/op-attrs/src/flat.cc | 45 +++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/lib/op-attrs/src/flat.cc b/lib/op-attrs/src/flat.cc index 51c29ec5b7..0cfe71b398 100644 --- a/lib/op-attrs/src/flat.cc +++ b/lib/op-attrs/src/flat.cc @@ -1,6 +1,8 @@ #include "op-attrs/ops/flat.h" +#include "op-attrs/ff_dim.h" #include "parallel_dim_mapping_record.h" #include "parallel_dim_mapping_record_solver.h" +#include "utils/exception.h" #include namespace FlexFlow { @@ -17,20 +19,39 @@ constexpr int NUMDIM = 3, CHANNEL = 0, SAMPLE = 1, REPLICA = 2; // flat is like the pytorch view // tensor = torch.randn(2, 3, 4) ,flattened_tensor = tensor.view(-1) #shape: // (24) +// input: (, , , ......) +// assume d1=d2=d3 +// output: 2d dimention (, ) ParallelTensorShape get_output_shape(FlatAttrs const &attrs, ParallelTensorShape const &input) { - ParallelTensorShape output_shape(input.dims, input.data_type); - - output_shape.at(ff_dim_t(Output::CHANNEL)).size = - input.at(ff_dim_t(Input::CHANNEL)).size * - input.at(ff_dim_t(Input::HEIGHT)).size * - input.at(ff_dim_t(Input::WIDTH)).size; - output_shape.at(ff_dim_t(Output::CHANNEL)).degree = - input.at(ff_dim_t(Input::CHANNEL)).degree; - output_shape.at(ff_dim_t(Output::CHANNEL)).is_replica_dim = - (input.at(ff_dim_t(Input::CHANNEL)).degree > 1); - - return output_shape; + if (input.num_dims() < 2) { + throw mk_runtime_error("for flat,its dims must greater than 2"); + } + + int degree = input.at(ff_dim_t(1)).degree; + for (int i = 1; i < input.num_dims(); i++) { + if (degree != input.at(ff_dim_t(i)).degree) { + throw mk_runtime_error( + "for flat, all degree should be equal, but elemement ", i, " not"); + } + } + std::vector data; + data.resize(2); + data[0] = input.at(ff_dim_t(0)); + data[0].is_replica_dim = true; + data[1].degree = input.at(ff_dim_t(1)).degree; + data[1].size = input.at(ff_dim_t(1)).size; + data[1].is_replica_dim = false; + + for (int i = 2; i < input.num_dims(); i++) { + data[1].size *= input.at(ff_dim_t(i)).size; + } + + ParallelTensorShape output = ParallelTensorShape( + ParallelTensorDims(TensorDims(data.begin(), data.end())), + input.data_type); + + return output; } /* bool FlatAttrs::is_valid(ParallelTensorShape const &input) const { */ From cdb38d0ed5ec09025151177e502493a297193747 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sat, 28 Oct 2023 01:45:24 +0000 Subject: [PATCH 63/69] add pool2d --- lib/op-attrs/include/op-attrs/ops/pool_2d.h | 1 - lib/op-attrs/src/conv_2d.cc | 11 ++- lib/op-attrs/src/gather.cc | 36 +++----- lib/op-attrs/src/groupby.cc | 42 +++++---- lib/op-attrs/src/layer_norm.cc | 2 +- lib/op-attrs/src/linear.cc | 47 +++------- lib/op-attrs/src/pool_2d.cc | 98 +++++++-------------- lib/op-attrs/src/topk.cc | 3 +- 8 files changed, 91 insertions(+), 149 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/pool_2d.h b/lib/op-attrs/include/op-attrs/ops/pool_2d.h index b688be85f5..3bc862c481 100644 --- a/lib/op-attrs/include/op-attrs/ops/pool_2d.h +++ b/lib/op-attrs/include/op-attrs/ops/pool_2d.h @@ -17,7 +17,6 @@ struct Pool2DAttrs { req kernel_h, kernel_w, stride_h, stride_w, padding_h, padding_w; req pool_type; req activation; - bool is_valid(ParallelTensorShape const &) const; }; FF_VISITABLE_STRUCT(Pool2DAttrs, kernel_h, diff --git a/lib/op-attrs/src/conv_2d.cc b/lib/op-attrs/src/conv_2d.cc index 5f86ffa750..75c61a82af 100644 --- a/lib/op-attrs/src/conv_2d.cc +++ b/lib/op-attrs/src/conv_2d.cc @@ -85,9 +85,14 @@ std::vector } // input: (, , , < input_h, di4, f>, -// ) kernel(Conv2DAttrs): out_channels, kernel_h, kernel_w, -// stride_h, stride_w, padding_h, padding_w, output shape:(, , , , ) +// ) + +// kernel(Conv2DAttrs): out_channels, kernel_h, kernel_w, stride_h, stride_w, +// padding_h, padding_w, + +// output shape:(, , , , ) + // output_h = (input_h + 2 * padding_h - kernel_h) / stride_h + 1 // output_w = (input_w + 2 * padding_w - kernel_w) / stride_w + 1 // assert: for the kernel, dk1 == dk2=dk4=dk4=dk5=1 diff --git a/lib/op-attrs/src/gather.cc b/lib/op-attrs/src/gather.cc index ed7e5abd7b..7402bdc67c 100644 --- a/lib/op-attrs/src/gather.cc +++ b/lib/op-attrs/src/gather.cc @@ -1,23 +1,8 @@ #include "op-attrs/ops/gather.h" -#include "utils/exception.decl.h" -#include "utils/exceptions.h" +#include "utils/exception.h" namespace FlexFlow { -bool GatherAttrs::is_valid(ParallelTensorShape const &lhs, - ParallelTensorShape const &rhs) const { - if (lhs.dims.num_dims() != rhs.dims.num_dims()) { - return false; - } - for (auto i : lhs.dims) { - if (ff_dim_t(i.size) != this->dim && - lhs.at(ff_dim_t(i.size)).size < rhs.at(ff_dim_t(i.size)).size) { - return false; - } - } - return true; -} - // https://pytorch.org/docs/stable/generated/torch.gather.html // todo: why return a vector? std::vector @@ -26,25 +11,24 @@ std::vector ParallelTensorShape const &index) { if (input.num_dims() != index.num_dims()) { throw mk_runtime_error( - "Gather: input and index must have the same number of dimensions"); + "for gather, the dimensions of input and index are not match"); } - for (int i = 0; i < input.num_dims(); i++) { + for (int i = 1; i < input.num_dims(); i++) { if (i != attrs.dim && input.at(ff_dim_t(i)).size <= index.at(ff_dim_t(i)).size) { throw mk_runtime_error( "Gather: index.size(d) <= input.size(d) for all dimensions d != dim"); } - } - - ParallelTensorShape output = input; - std::vector results; - // NOTE(lambda):why return a vector? - results.push_back(output); - return results; + ParallelTensorShape output = index; + output.at(ff_dim_t(0)) = input.at(ff_dim_t(0)); + std::vector results; + // NOTE(lambda):why return a vector? + results.push_back(output); + return results; + } } - /* bool GatherAttrs::is_valid(ParallelTensorShape const &lhs, * ParallelTensorShape const &rhs) const { */ /* if (lhs.num_dims() != rhs.num_dims()) { */ diff --git a/lib/op-attrs/src/groupby.cc b/lib/op-attrs/src/groupby.cc index 64d7797771..17c091e02e 100644 --- a/lib/op-attrs/src/groupby.cc +++ b/lib/op-attrs/src/groupby.cc @@ -1,22 +1,32 @@ #include "op-attrs/ops/groupby.h" -#include "utils/exceptions.h" +#include "utils/exception.h" namespace FlexFlow { -/* -import torch -data = torch.tensor([10, 20, 30, 40, 50, 60, 70, 80]) -# group index tensor group_indices -group_indices = torch.tensor([0, 1, 0, 2, 1, 2, 0, 1]) - -# groupby operator -unique_indices, unique_inverse_indices = torch.unique(group_indices, -return_inverse=True) print(f"unique_indices: {unique_indices} and -unique_inverse_indices: {unique_inverse_indices}") grouped_data = [] for i in -unique_indices: # use unique_inverse_indices group_data = -data[unique_inverse_indices == i] grouped_data.append(group_data) for i, group -in enumerate(grouped_data): print(f"Group {i}: {group}") -*/ +// import torch +// data = torch.tensor([10, 20, 30, 40, 50, 60, 70, 80]) +// # group index tensor group_indices +// group_indices = torch.tensor([0, 1, 0, 2, 1, 2, 0, 1]) + +// # groupby operator +// unique_indices, unique_inverse_indices = +// torch.unique(group_indices,return_inverse=True) + +// print(f"unique_indices: {unique_indices} and unique_inverse_indices: +// {unique_inverse_indices}") + +// grouped_data = [] + +// for i in unique_indices: # use unique_inverse_indices +// group_data = data[unique_inverse_indices == i] +// grouped_data.append(group_data) + +// for i, group in enumerate(grouped_data): +// print(f"Group {i}: {group}") + +// Group 0: tensor([10, 30, 70]) +// Group 1: tensor([20, 50, 80]) +// Group 2: tensor([40, 60]) ParallelTensorShape get_output_shape(Group_byAttrs const &attrs, ParallelTensorShape const &input_shape, @@ -25,7 +35,7 @@ ParallelTensorShape get_output_shape(Group_byAttrs const &attrs, throw mk_runtime_error( "Group_by: input and index must have the same number of dimensions"); } - // degree of output is same as input's + // Note: how to decide the groupby output shape? return input_shape; } diff --git a/lib/op-attrs/src/layer_norm.cc b/lib/op-attrs/src/layer_norm.cc index d98562219f..43211fbf24 100644 --- a/lib/op-attrs/src/layer_norm.cc +++ b/lib/op-attrs/src/layer_norm.cc @@ -9,7 +9,7 @@ ParallelTensorShape get_output_shape(LayerNormAttrs const &attrs, if (input.num_dims() < 2) { throw mk_runtime_error("LayerNorm: input must have at least 2 dimensions"); } - // output degree is same as input degree + // output shape is smae as input return input_shape; } diff --git a/lib/op-attrs/src/linear.cc b/lib/op-attrs/src/linear.cc index ec0f5dd235..6c1748f517 100644 --- a/lib/op-attrs/src/linear.cc +++ b/lib/op-attrs/src/linear.cc @@ -11,44 +11,21 @@ namespace FlexFlow { // pytorch linearattrs: should be {input_channels, output_channels} // pytorch: output shape:{batch_size, output_channels} // question: the Linearattrs doesn't have input_channels -ParallelTensorShape get_output_shape(LinearAttrs const &atts, - ParallelTensorShape const &input) { +// input: (, , ) +// linearattrs: should be {input_channels, output_channels} +// the Linearattrs doesn't have input_channels, just have output_channels +// output:(, , > +// I think do1 = di1, do = ri, do2= di2, do3 = di3 - ParallelTensorShape out_shape = input; - if (input.num_dims() != 2) { - throw mk_runtime_error("LinearAttrs: input shape should be 2D"); +ParallelTensorShape get_output_shape(LinearAttrs const &attrs, + ParallelTensorShape const &input) { + ParallelTensorShape output_shape = input; + if (input.num_dims() != 3) { + throw mk_runtime_error("LinearAttrs: input shape should be 3D"); } - out_shape.at(ff_dim_t(1)).size = atts.out_channels; - // linear shoud consider the degree - // case 1: input:[N, K], weight:[K, M], degree is 1 - if (input.at(ff_dim_t(0)).degree == 1 && input.at(ff_dim_t(1)).degree == 1) { - for (int i = 0; i < input.num_dims(); i++) { - out_shape.at(ff_dim_t(i)).is_replica_dim = false; - out_shape.at(ff_dim_t(i)).degree = 1; - } - } else if (input.at(ff_dim_t(0)).degree == 1 && - input.at(ff_dim_t(1)).degree > 1) { - // case 2: input [N, k/x], weight [k/x, M], output [N, M], degree is x - out_shape.at(ff_dim_t(1)).degree = input.at(ff_dim_t(1)).degree; - out_shape.at(ff_dim_t(1)).is_replica_dim = true; - } else if (input.at(ff_dim_t(0)).degree > 1 && - input.at(ff_dim_t(1)).degree == 1) { - // case 3: input [N/X, K], weight [K, M/X], output [N/X, M], degree is X - out_shape.at(ff_dim_t(0)).degree = input.at(ff_dim_t(0)).degree; - out_shape.at(ff_dim_t(0)).is_replica_dim = true; - } else if (input.at(ff_dim_t(0)).degree > 1 && - input.at(ff_dim_t(1)).degree > 1) { - // case 4: input [N/X, K/Y], weight [K/Y, M/X], output [N/X, M/X], degree is - // X - for (int i = 0; i < input.num_dims(); i++) { - out_shape.at(ff_dim_t(i)).is_replica_dim = true; - out_shape.at(ff_dim_t(i)).degree = input.at(ff_dim_t(i)).degree; - } - } else { - throw mk_runtime_error("LinearAttrs: degree is not supported"); - } - return out_shape; + output_shape.at(ff_dim_t(2)).size = attrs.out_channels; + return output_shape; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/pool_2d.cc b/lib/op-attrs/src/pool_2d.cc index ec6253a0b1..23e1c6dd3d 100644 --- a/lib/op-attrs/src/pool_2d.cc +++ b/lib/op-attrs/src/pool_2d.cc @@ -41,85 +41,53 @@ static ParallelDimMappingSolution return solve_parallel_dim_mappings(construct_mappings(input), {input}, 0, 1); } -bool Pool2DAttrs::is_valid(ParallelTensorShape const &input) const { - if (!input.is_valid()) { - return false; - } - return true; -} - // https://pytorch.org/docs/stable/generated/torch.nn.AvgPool2d.html // https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html -// pytorch: we have two type of pool2d, maxpool2d and avgpool2d -// input shape: (batch_size, channels, input_height, input_width) -// for avgpool2d, output shape: (batch_size, channels, 1, 1) -// for maxpool2d, output shape: (batch_size, channels, output_height, -// output_width) output_height = (input_height + 2 * padding_h - kernel_h) / -// stride_h + 1 output_width = (input_width + 2 * padding_w - kernel_w) / -// stride_w + 1 +// input:(< ri, di1, t>, , , , ) + +// Pool2DAttrs: req kernel_h, kernel_w, stride_h, stride_w, padding_h, +// padding_w; + +// for avgpool2d: output shape:(< ri, di1, t>, , , +// <1,1,f>, <1,1,f> ) + +// for maxpool2d, output shape:(< ri, di1, t>, , , +// , ) + +// output_height = (input_height + 2 * padding_h - kernel_h) / stride_h + 1 +// output_width = (input_width + 2 * padding_w - kernel_w) / stride_w + 1 ParallelTensorShape get_output_shape(Pool2DAttrs const &attrs, ParallelTensorShape const &input) { - - if (input.num_dims() != 4) { - throw mk_runtime_error("Pool2DAttrs: input shape should be 4D"); + if (input.num_dims() != 5) { + throw mk_runtime_error("Pool2DAttrs, input shape should be 5D"); } - ParallelTensorShape output_shape = input; + if (attrs.pool_type == PoolOp::AVG) { - output_shape.at(ff_dim_t(2)).size = 1; - output_shape.at(ff_dim_t(3)).size = 1; + std::vector data; + data.resize(4); + data[0] = input.at(ff_dim_t(0)); + data[1] = input.at(ff_dim_t(1)); + data[2] = {1, 1, false}; + data[3] = {1, 1, false}; + ParallelTensorShape output = ParallelTensorShape( + ParallelTensorDims(TensorDims(data.begin(), data.end())), + input.data_type); + return output; } else if (attrs.pool_type == PoolOp::MAX) { - output_shape.at(ff_dim_t(2)).size = - (input.at(ff_dim_t(2)).size + 2 * attrs.padding_h - attrs.kernel_h) / + ParallelTensorShape output_shape = input; + output_shape.at(ff_dim_t(3)).size = + (input.at(ff_dim_t(3)).size + 2 * attrs.padding_h - attrs.kernel_h) / attrs.stride_h + 1; - output_shape.at(ff_dim_t(3)).size = - (input.at(ff_dim_t(3)).size + 2 * attrs.padding_w - attrs.kernel_w) / + output_shape.at(ff_dim_t(4)).size = + (input.at(ff_dim_t(4)).size + 2 * attrs.padding_w - attrs.kernel_w) / attrs.stride_w + 1; + return output_shape; } else { throw mk_runtime_error("Pool2DAttrs: pool type is not supported"); } - - // case 1: input:[N, C, H, W], output:[N, C, 1, 1], degree is 1 for avgpool2d - // input: [N, C, H, W], output: [N, C, output_height, output_width], degree is - // 1 for maxpool2d - if (input.at(ff_dim_t(2)).degree == 1 && input.at(ff_dim_t(3)).degree == 1) { - for (int i = 2; i < input.num_dims(); i++) { - output_shape.at(ff_dim_t(i)).is_replica_dim = false; - output_shape.at(ff_dim_t(i)).degree = 1; - } - } else if (input.at(ff_dim_t(2)).degree > 1 && - input.at(ff_dim_t(3)).degree == 1) { - // case 2: input [N, C, H/X, W] output [N, C, 1, 1], degree is X - // input [N, C, H/X, W] output [N, C, output_height/x, output_width], degree - // is X - output_shape.at(ff_dim_t(2)).degree = input.at(ff_dim_t(2)).degree; - output_shape.at(ff_dim_t(2)).is_replica_dim = true; - output_shape.at(ff_dim_t(3)).degree = 1; - output_shape.at(ff_dim_t(3)).is_replica_dim = false; - } else if (input.at(ff_dim_t(2)).degree == 1 && - input.at(ff_dim_t(3)).degree > 1) { - // case 3: input [N, C, H, W/X] output [N, C, 1, 1], degree is X - // input [N, C, H, W/X] output [N, C, output_height, output_width/x], degree - // is X - output_shape.at(ff_dim_t(2)).degree = 1; - output_shape.at(ff_dim_t(2)).is_replica_dim = false; - output_shape.at(ff_dim_t(3)).degree = input.at(ff_dim_t(3)).degree; - output_shape.at(ff_dim_t(3)).is_replica_dim = true; - } else if (input.at(ff_dim_t(2)).degree > 1 && - input.at(ff_dim_t(3)).degree > 1) { - // case 4: input [N, C, H/X, W/Y] output [N, C, 1, 1], degree is X and Y for - // avgpool2d input [N, C, H/X, W/Y] output [N, C, output_height/x, - // output_width/y], degree is X and Y for maxpool2d - for (int i = 2; i < input.num_dims(); i++) { - output_shape.at(ff_dim_t(i)).is_replica_dim = true; - output_shape.at(ff_dim_t(i)).degree = input.at(ff_dim_t(i)).degree; - } - } else { - throw mk_runtime_error("Pool2DAttrs: degree is not supported"); - } - - return output_shape; } /* ParallelTensorShape Pool2DAttrs::calculate_output_shape(ParallelTensorShape diff --git a/lib/op-attrs/src/topk.cc b/lib/op-attrs/src/topk.cc index 73bf59b048..8c1d043a57 100644 --- a/lib/op-attrs/src/topk.cc +++ b/lib/op-attrs/src/topk.cc @@ -15,8 +15,7 @@ ParallelTensorShape get_output_shape(TopKAttrs const &attrs, output.at(ff_dim_t(attrs.axis)).size = attrs.k; output.at(ff_dim_t(attrs.axis)).degree = input.at(ff_dim_t(attrs.axis)).degree; - output.at(ff_dim_t(attrs.axis)).is_replica_dim = - input.at(ff_dim_t(attrs.axis)).degree > 1; + output.at(ff_dim_t(attrs.axis)).is_replica_dim = false; return output; } From 9f49b959002f392168a048d07b3155797335963b Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sat, 28 Oct 2023 01:49:44 +0000 Subject: [PATCH 64/69] add reduce --- lib/op-attrs/src/reduce.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/op-attrs/src/reduce.cc b/lib/op-attrs/src/reduce.cc index 79fa6d7598..b28c722268 100644 --- a/lib/op-attrs/src/reduce.cc +++ b/lib/op-attrs/src/reduce.cc @@ -1,14 +1,22 @@ #include "op-attrs/ops/reduce.h" -#include "utils/exceptions.h" +#include "utils/exception.decl.h" +#include "utils/exception.h" namespace FlexFlow { // ParallelTensorShape get_output_shape(ReduceAttrs const &attrs, ParallelTensorShape const &input) { - NOT_IMPLEMENTED() - // reduce is sum/max/min/mean, I think we just return 1D tensor [1, 2, 4] => - // [7, ] NOTE: how to implement this + if (input.num_dims() - attrs.axes.size() == 1) { + throw mk_runtime_error(" for reduce, the input and attrs.axes must match"); + } + ParallelTensorShape output = input; + for (int i = 0; i < attrs.axes.size(); i++) { + output.at(attrs.axes.at(i)).size = 1; + output.at(attrs.axes.at(i)).is_replica_dim = false; + } + + return output; } } // namespace FlexFlow From a715cdf9eae26ef492f9d89cc90c92ac8de2632a Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sat, 28 Oct 2023 02:07:51 +0000 Subject: [PATCH 65/69] implement the reshape --- lib/op-attrs/src/reduction.cc | 3 +- lib/op-attrs/src/repartition.cc | 3 +- lib/op-attrs/src/replicate.cc | 6 ++-- lib/op-attrs/src/reshape.cc | 52 ++++++++++++++++++++------------- 4 files changed, 38 insertions(+), 26 deletions(-) diff --git a/lib/op-attrs/src/reduction.cc b/lib/op-attrs/src/reduction.cc index 6336e15253..48455bd706 100644 --- a/lib/op-attrs/src/reduction.cc +++ b/lib/op-attrs/src/reduction.cc @@ -13,8 +13,7 @@ namespace FlexFlow { ParallelTensorShape get_output_shape(ReductionAttrs const &attrs, ParallelTensorShape const &input_shape) { ParallelTensorShape output(input_shape.dims, input_shape.data_type); - output.at(attrs.reduction_dim).degree /= attrs.reduction_degree; - output.at(attrs.reduction_dim).size /= attrs.reduction_degree; + output.at(attrs.reduction_dim).size = 1; return output; } diff --git a/lib/op-attrs/src/repartition.cc b/lib/op-attrs/src/repartition.cc index d7807fcc10..3046cf1ca7 100644 --- a/lib/op-attrs/src/repartition.cc +++ b/lib/op-attrs/src/repartition.cc @@ -14,7 +14,8 @@ ParallelTensorShape get_output_shape(RepartitionAttrs const &attrs, "attrs.repartition_degree * dim.degree != 0"); } ParallelTensorShape output(input.dims, input.data_type); - output.at(attrs.repartition_dim).degree *= attrs.repartition_degree; + output.at(attrs.repartition_dim).degree *= + attrs.repartition_degree; // NOTE: this may have some problem return output; } diff --git a/lib/op-attrs/src/replicate.cc b/lib/op-attrs/src/replicate.cc index 7a1b511a5e..b3c4e8e970 100644 --- a/lib/op-attrs/src/replicate.cc +++ b/lib/op-attrs/src/replicate.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/replicate.h" +#include "op-attrs/ff_dim.h" #include "op-attrs/parallel_dim.h" #include "utils/exception.h" @@ -19,9 +20,8 @@ ParallelTensorShape get_output_shape(ReplicateAttrs const &attrs, "range or input is invalid"); } ParallelTensorShape output = input; - output.at(attrs.replicate_dim).size *= attrs.replicate_degree; - output.at(attrs.replicate_dim).is_replica_dim = - (input.at(attrs.replicate_dim).degree > 1); + output.at(ff_dim_t(0)).is_replica_dim = true; + output.at(ff_dim_t(0)).size *= attrs.replicate_degree; return output; } diff --git a/lib/op-attrs/src/reshape.cc b/lib/op-attrs/src/reshape.cc index b7e887002a..1cbbd36863 100644 --- a/lib/op-attrs/src/reshape.cc +++ b/lib/op-attrs/src/reshape.cc @@ -1,17 +1,15 @@ #include "op-attrs/ops/reshape.h" #include "op-attrs/ff_dim.h" +#include "op-attrs/parallel_tensor_shape.h" #include "utils/exception.h" namespace FlexFlow { // https://pytorch.org/docs/stable/generated/torch.reshape.html -// pytorch: the input: [2,3,4], shape maybe [-1,6], should we add this? and -// the output is [4, 6] currently we doesn't consider the case of -1,we can -// support this later the input:[2,3,4], attrs.shape:[4,6], the output is [4, -// 6] ParallelTensorShape get_output_shape(ReshapeAttrs const &attrs, ParallelTensorShape const &input) { - std::size_t input_volume = input.dims.get_volume(); + std::size_t input_volume = + input.dims.get_volume() / input.at(ff_dim_t(0)).size; std::size_t attrs_volume = 1; for (int i = 0; i < attrs.shape.dims.num_dims(); i++) { attrs_volume *= attrs.shape.at(ff_dim_t(i)); @@ -20,28 +18,42 @@ ParallelTensorShape get_output_shape(ReshapeAttrs const &attrs, throw mk_runtime_error("ReshapeAttrs: input_volume != attrs_volume"); } - ParallelTensorShape output = input; - output.data_type = input.data_type; + std::vector data; + if (attrs.shape.dims.num_dims() == 1) { // infer the shape if (attrs.shape.at(ff_dim_t(0)) == -1) { - - output.at(ff_dim_t(0)).size = input_volume; - output.at(ff_dim_t(0)).degree = 1; - output.at(ff_dim_t(0)).is_replica_dim = false; + // the output shape will be (, ) + data.resize(2); + data[0] = input.at(ff_dim_t(0)); + data[1].size = input_volume; + // how to decide the degree? + ParallelTensorShape output = ParallelTensorShape( + ParallelTensorDims(TensorDims(data.begin(), data.end())), + input.data_type); + return output; } else { - output.at(ff_dim_t(0)).size = attrs.shape.at(ff_dim_t(0)); - output.at(ff_dim_t(1)).size = input_volume / attrs.shape.at(ff_dim_t(0)); - for (int i = 0; i < 2; i++) { - output.at(ff_dim_t(i)).degree = 1; - output.at(ff_dim_t(i)).is_replica_dim = false; + // i = attrs.shape.at(ff_dim_t(0) + // the output shape will be (, , ) + data.resize(3); + data[0] = input.at(ff_dim_t(0)); + data[1].size = attrs.shape.at(ff_dim_t(0)); + data[2].size = input_volume / attrs.shape.at(ff_dim_t(0)); + for (int i = 1; i < 3; i++) { + // how to decide the degree? + data[i].is_replica_dim = false; } + ParallelTensorShape output = ParallelTensorShape( + ParallelTensorDims(TensorDims(data.begin(), data.end())), + input.data_type); + return output; } - } else { - ParallelTensorDims dims{attrs.shape.dims}; - output = {dims, input.data_type}; - // Note: I think reshape doesn't need to consider the degree } + + ParallelTensorDims dims{attrs.shape.dims}; + ParallelTensorShape output = {dims, input.data_type}; + return output; } From 326f7f3d5971a2e3ee477e5641d5d657f8acd46e Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sat, 28 Oct 2023 02:24:11 +0000 Subject: [PATCH 66/69] implement the split --- lib/op-attrs/include/op-attrs/ops/flat.h | 3 +++ lib/op-attrs/include/op-attrs/ops/gather.h | 6 +++--- lib/op-attrs/include/op-attrs/ops/split.h | 4 ++-- lib/op-attrs/src/reverse.cc | 1 - lib/op-attrs/src/split.cc | 18 ++++++++++++++++-- 5 files changed, 24 insertions(+), 8 deletions(-) diff --git a/lib/op-attrs/include/op-attrs/ops/flat.h b/lib/op-attrs/include/op-attrs/ops/flat.h index 706689199d..88b0a6cb54 100644 --- a/lib/op-attrs/include/op-attrs/ops/flat.h +++ b/lib/op-attrs/include/op-attrs/ops/flat.h @@ -11,6 +11,9 @@ struct FlatAttrs {}; FF_VISITABLE_STRUCT(FlatAttrs); CHECK_VALID_OP_ATTR(FlatAttrs); +ParallelTensorShape get_output_shape(FlatAttrs const &attrs, + ParallelTensorShape const &input); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/ops/gather.h b/lib/op-attrs/include/op-attrs/ops/gather.h index 852dc9cd5e..ad97e52556 100644 --- a/lib/op-attrs/include/op-attrs/ops/gather.h +++ b/lib/op-attrs/include/op-attrs/ops/gather.h @@ -14,9 +14,9 @@ struct GatherAttrs { FF_VISITABLE_STRUCT(GatherAttrs, dim); CHECK_VALID_OP_ATTR(GatherAttrs); -std::vector get_output_shapes(GatherAttrs const &, - ParallelTensorShape const &, - ParallelTensorShape const &); +std::vector get_output_shape(GatherAttrs const &, + ParallelTensorShape const &, + ParallelTensorShape const &); } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/ops/split.h b/lib/op-attrs/include/op-attrs/ops/split.h index 14f9395a26..f2f904a9f7 100644 --- a/lib/op-attrs/include/op-attrs/ops/split.h +++ b/lib/op-attrs/include/op-attrs/ops/split.h @@ -13,8 +13,8 @@ struct SplitAttrs { }; FF_VISITABLE_STRUCT(SplitAttrs, splits, axis); CHECK_VALID_OP_ATTR(SplitAttrs); -std::vector get_output_shapes(SplitAttrs const &, - ParallelTensorShape const &); +std::vector get_output_shape(SplitAttrs const &, + ParallelTensorShape const &); } // namespace FlexFlow diff --git a/lib/op-attrs/src/reverse.cc b/lib/op-attrs/src/reverse.cc index 663150861f..f79fcfd0ed 100644 --- a/lib/op-attrs/src/reverse.cc +++ b/lib/op-attrs/src/reverse.cc @@ -9,7 +9,6 @@ ParallelTensorShape get_output_shape(ReverseAttrs const &attrs, if (attrs.axis < 0 || attrs.axis >= input_shape.num_dims()) { throw mk_runtime_error("ReverseAttrs: axis is invalid"); } - // output degree is same as input degree, because it's just reverse operation return input_shape; } diff --git a/lib/op-attrs/src/split.cc b/lib/op-attrs/src/split.cc index 5c6f3d6924..8ab083c46b 100644 --- a/lib/op-attrs/src/split.cc +++ b/lib/op-attrs/src/split.cc @@ -21,8 +21,22 @@ std::vector outputs.back().at(ff_dim_t(attrs.axis)).size = attrs.splits[i]; outputs.back().at(ff_dim_t(attrs.axis)).degree = input.at(ff_dim_t(attrs.axis)).degree; - outputs.back().at(ff_dim_t(attrs.axis)).is_replica_dim = - input.at(ff_dim_t(attrs.axis)).degree > 1; + outputs.back().at(ff_dim_t(attrs.axis)).is_replica_dim = attrs.axis == 0; + } + return outputs; +} + +std::vector + get_output_shape(SplitAttrs const &attrs, + ParallelTensorShape const &input) { + std::size_t dims_sum = sum(attrs.splits); + if (dims_sum != input.at(ff_dim_t(attrs.axis)).size) { + throw mk_runtime_error( + "SplitAttrs: dims_sum != input.at(ff_dim_t(attrs.axis)).size"); + } + + std::vector outputs; + for (std::size_t i = 0; i < attrs.splits.size(); ++i) { } return outputs; } From 6c824660f0c6f5d687f28e272e3098853923d7dc Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sat, 28 Oct 2023 02:28:21 +0000 Subject: [PATCH 67/69] add transpose --- lib/op-attrs/src/transpose.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/op-attrs/src/transpose.cc b/lib/op-attrs/src/transpose.cc index 88772b72e0..ac853bd0ed 100644 --- a/lib/op-attrs/src/transpose.cc +++ b/lib/op-attrs/src/transpose.cc @@ -4,18 +4,20 @@ namespace FlexFlow { -// assume we have [x, y, z, l], perms is [0,2] we return [z, y, x, l] +// assume input:[, , , ] +// perem is [1,2] +// output:[, , , ] ParallelTensorShape get_output_shape(TransposeAttrs const &attrs, ParallelTensorShape const &input) { if (attrs.perm.size() != 2) { throw mk_runtime_error("TransposeAttrs: perm.size() != 2"); } - auto dim0 = attrs.perm[0]; + auto dim0 = attrs.perm[0]; // dim0 and dim1 should not be 0 auto dim1 = attrs.perm[1]; - if (dim0 < 0 || dim1 < 0 || dim0 >= input.num_dims() || + if (dim0 <= 0 || dim1 <= 0 || dim0 >= input.num_dims() || dim1 >= input.num_dims()) { - throw mk_runtime_error("TransposeAttrs: dim0 < 0 || dim1 < 0 || dim0 >= " + throw mk_runtime_error("TransposeAttrs: dim0 <= 0 || dim1 <= 0 || dim0 >= " "input.num_dims() || dim1 >= input.num_dims()"); } @@ -26,10 +28,8 @@ ParallelTensorShape get_output_shape(TransposeAttrs const &attrs, output.at(ff_dim_t(dim1)).size = temp; output.at(ff_dim_t(dim0)).degree = input.at(ff_dim_t(dim1)).degree; output.at(ff_dim_t(dim1)).degree = degree; - output.at(ff_dim_t(dim0)).is_replica_dim = - output.at(ff_dim_t(dim0)).degree > 1; - output.at(ff_dim_t(dim1)).is_replica_dim = - output.at(ff_dim_t(dim1)).degree > 1; + output.at(ff_dim_t(dim0)).is_replica_dim = dim0 == 0; + output.at(ff_dim_t(dim1)).is_replica_dim = dim1 == 0; return output; } From ce7ba69830840403fa1eaaf2cd443525e76c1ba2 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sat, 28 Oct 2023 02:55:08 +0000 Subject: [PATCH 68/69] leave the attention --- lib/op-attrs/src/attention.cc | 88 +++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/lib/op-attrs/src/attention.cc b/lib/op-attrs/src/attention.cc index cc6d9c3c48..717487d1db 100644 --- a/lib/op-attrs/src/attention.cc +++ b/lib/op-attrs/src/attention.cc @@ -1,5 +1,6 @@ #include "op-attrs/ops/attention.h" #include "op-attrs/parallel_tensor_shape.h" +#include "utils/exception.decl.h" #include "utils/exception.h" namespace FlexFlow { @@ -122,6 +123,93 @@ ParallelTensorShape get_output_shape( return output; } +// https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html, +// we consider the batch size +// query/key/value: 4D dimensions +//query:[, , , ] + +// key:[, , , ] + +// value:[, , , ] +// multihead_attn = nn.MultiheadAttention(embed_dim, num_heads) + + +// output: (seq_len, batch_size, embed_dim) + +//output: [, , , ] + + +//how to decide the ro/do0? ro = rq * dq1 * dq12 * dq3 / do1/do2/do3 + +//how to decide the do1 or do2 or do3? +//ro / do0 = dq / dq0 + + // k->(<, ,, ,, ) //num_head * kdim = embed_dim + +// v->(<, ,, ,, ) //num_head * vdim = embed_dim + +// q->(<, , . ,, ) //num_head * kdim = embed_dim + +// // attn = q @k (, , , ) + +//how to decide the ra11/da11/da12/da13/da14? + +//rk * dk2 * dk3 = rv * dv2 * dv3 , dk3 = dv3, + +//so da13 = dk2, da14 = dv2, ra11 = rk * dk2 * dk3 / (da13 * da14) = rk * dk3 / dv2 = rv + +//da11 = rk / dk0 / ra11 + +//attn: (, , , ) + + + +ParallelTensorShape get_output_shape( + MultiHeadAttentionAttrs const &attrs, + MultiHeadAttentionInputs const &input) { + + if (input.query.num_dims() != 4 || input.key.num_dims() != 4 || + input.value.num_dims() != 4) { + throw mk_runtime_error("MultiHeadAttentionAttrs: num_dims != 4"); + } + + + if (input.query.at(ff_dim_t(1)).size != input.key.at(ff_dim_t(1)).size || + input.query.at(ff_dim_t(1)).size != input.value.at(ff_dim_t(1)).size || + input.key.at(ff_dim_t(1)).size != input.value.at(ff_dim_t(1)).size) { + throw mk_runtime_error("MultiHeadAttentionAttrs: seq_len not match"); + } + + if (input.query.at(ff_dim_t(2)).size != input.key.at(ff_dim_t(2)).size || + input.query.at(ff_dim_t(2)).size != input.value.at(ff_dim_t(2)).size || + input.key.at(ff_dim_t(2)).size != input.value.at(ff_dim_t(2)).size) { + throw mk_runtime_error("MultiHeadAttentionAttrs: batch_size not match"); + } + + if (input.query.at(ff_dim_t(3)).size != input.key.at(ff_dim_t(3)).size || + input.query.at(ff_dim_t(3)).size != input.value.at(ff_dim_t(3)).size || + input.key.at(ff_dim_t(3)).size != input.value.at(ff_dim_t(3)).size) { + throw mk_runtime_error("MultiHeadAttentionAttrs: embed_dim not match"); + } + + if (input.query.at(ff_dim_t(3)).size != attrs.embed_dim || + input.key.at(ff_dim_t(3)).size != attrs.embed_dim || + input.value.at(ff_dim_t(3)).size != attrs.embed_dim) { + throw mk_runtime_error( + "MultiHeadAttentionAttrs: input's embed_dim not match to attrs"); + } + + if (attrs.embed_dim != (attrs.num_heads * attrs.kdim)) { + throw mk_runtime_error( + "MultiHeadAttentionAttrs: embed_dim not match to num_heads * kdim"); + } + + + NOT_IMPLEMENTED(); + +} + + } // namespace FlexFlow // Tensor FFModel::multihead_attention(const Tensor query, From fec4928ea094b327724b0ac7b2ace4f9450cd704 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sat, 28 Oct 2023 23:54:21 +0000 Subject: [PATCH 69/69] add attention --- lib/op-attrs/src/attention.cc | 82 +++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 19 deletions(-) diff --git a/lib/op-attrs/src/attention.cc b/lib/op-attrs/src/attention.cc index 717487d1db..fb9ab0cd29 100644 --- a/lib/op-attrs/src/attention.cc +++ b/lib/op-attrs/src/attention.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/attention.h" +#include "op-attrs/ff_dim.h" #include "op-attrs/parallel_tensor_shape.h" #include "utils/exception.decl.h" #include "utils/exception.h" @@ -111,7 +112,7 @@ ParallelTensorShape get_output_shape( // attn = q @k (seq_len, num_head, batch_size, batch_size) // attn = attn @v (seq_len, num_head, batch_size, vdim) // attn = attn.transpose(1,2) (seq_len, batch_size, num_head, vdim) - // attn = attn.reshape(seq_len, batch_size, num_head*vdim) + // // Note: we support tensor parallelism for seq_len/batch_size/embed_dim ParallelTensorShape output = input.query; @@ -126,43 +127,78 @@ ParallelTensorShape get_output_shape( // https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html, // we consider the batch size // query/key/value: 4D dimensions -//query:[, , , ] +// query:[, , , ] // key:[, , , ] // value:[, , , ] // multihead_attn = nn.MultiheadAttention(embed_dim, num_heads) +// ### k:(<, ,, ,, ) -// output: (seq_len, batch_size, embed_dim) +// k->(<, ,, ,, ) //num_head * kdim = embed_dim , dk2 = dk4 + +// v->(<, ,, ,, ) //num_head * vdim = embed_dim , dv2 = dv4 + +// q->(<, , . ,, ) //num_head * kdim = embed_dim , dq2 = dq4 + +// we have dk1 = dv1 = dq1 dk2 = dk4=dv2=dv4=dq2=dq4 + +// 1)/ attn = q @k (, , , +// , ) + +// how to decide the ra11//da10/da11/da12/da13/da14? ⇒ I think da11 =dk1, da12 = +// dk2, da13. = dk3, da14 = dq3 -//output: [, , , ] +// rk * dk3 * dk4=rq * dq3 * dq4 = ra11 * da13 * da14 = ra11 * dk3 * dq3 +// => ra11 = (rk * dk4) / dq3 = (rq * dq4) / dk3 , ra11/da10 = rq / dq0, -//how to decide the ro/do0? ro = rq * dq1 * dq12 * dq3 / do1/do2/do3 +// =>da10 = ra11 * dq0 / rq = dq0 * dq4 / dk3 -//how to decide the do1 or do2 or do3? -//ro / do0 = dq / dq0 +// output attn: (< (rq * dq4) / dk3, dq0 * dq4 / dk3, t>, , +// , , ) - // k->(<, ,, ,, ) //num_head * kdim = embed_dim +// 2)attn = attn @v (seq_len, num_head, batch_size, vdim) -// v->(<, ,, ,, ) //num_head * vdim = embed_dim +// input attn:(< (rq * dq4) / dk3, dq0 * dq4 / dk3, t>, , +// , , ) -// q->(<, , . ,, ) //num_head * kdim = embed_dim +// input v: ((<, , , ,, ) //num_head * vdim = embed_dim -// // attn = q @k (, , , ) +// output attn:(, , , , -//how to decide the ra11/da11/da12/da13/da14? +// how to decide ra21//da20/da21/da22/da23/da24? ⇒ da21 = dk1, da22 = dk2, da23 +// = dk3, da24 = dv4 -//rk * dk2 * dk3 = rv * dv2 * dv3 , dk3 = dv3, +// ra21 * da23 * da24 = rv * dv3 * dv4 ⇒ ra21 = (rv * dv3) / dk3 -//so da13 = dk2, da14 = dv2, ra11 = rk * dk2 * dk3 / (da13 * da14) = rk * dk3 / dv2 = rv +// ra21 / da20 = (rq * dq4) / dk3 / (dq0 * dq4 / dk3) ⇒ da20 = (rv * dv3 * dq0) +// / (rq * dk3) -//da11 = rk / dk0 / ra11 +// output attn:(<(rv * dv3) / dk3 , (rv * dv3 * dq0) / (rq * dk3), t>, , , , ) -//attn: (, , , ) +// 3) attn = attn.transpose(1,2 ) (seq_len, batch_size, num_head, vdim) +// input attn:(<(rv * dv3) / dk3 , (rv * dv3 * dq0) / (rq * dk3), t>, , , , +// output attn:(<(rv * dv3) / dk3 , (rv * dv3 * dq0) / (rq * dk3), t>, , , , + +// 4)attn = attn.reshape(seq_len, batch_size, num_head*vdim) + +// input attn:(<(rv * dv3) / dk3 , (rv * dv3 * dq0) / (rq * dk3), t>, , , , + +// output attn:(<(rv * dv3) / dk3 , (rv * dv3 * dq0) / (rq * dk3), t>, , , , ParallelTensorShape get_output_shape( MultiHeadAttentionAttrs const &attrs, @@ -173,7 +209,6 @@ ParallelTensorShape get_output_shape( throw mk_runtime_error("MultiHeadAttentionAttrs: num_dims != 4"); } - if (input.query.at(ff_dim_t(1)).size != input.key.at(ff_dim_t(1)).size || input.query.at(ff_dim_t(1)).size != input.value.at(ff_dim_t(1)).size || input.key.at(ff_dim_t(1)).size != input.value.at(ff_dim_t(1)).size) { @@ -204,12 +239,21 @@ ParallelTensorShape get_output_shape( "MultiHeadAttentionAttrs: embed_dim not match to num_heads * kdim"); } + ParallelTensorShape output = input.key; - NOT_IMPLEMENTED(); + output.at(ff_dim_t(0)).size = + (input.value.at(ff_dim_t(0)).size * input.value.at(ff_dim_t(2)).degree) / + input.key.at(ff_dim_t(2)).degree; // rv3 * dv3 / dk3 + output.at(ff_dim_t(0)).degree = + (input.value.at(ff_dim_t(0)).size * input.value.at(ff_dim_t(2)).degree * + input.query.at(ff_dim_t(0)).degree) / + (input.query.at(ff_dim_t(0)).size * input.key.at(ff_dim_t(2)).degree); + // (rv * dv3 * dq0) / (rq * dk3) + output.at(ff_dim_t(0)).is_replica_dim = true; + return output; } - } // namespace FlexFlow // Tensor FFModel::multihead_attention(const Tensor query,