From 6ad490aac289d080ecbdad00c4038021937111e1 Mon Sep 17 00:00:00 2001 From: dijopaul Date: Wed, 11 Sep 2024 06:19:38 -0700 Subject: [PATCH 1/2] Adding sigmoid optimizations --- .../cadence/hifi/operators/CMakeLists.txt | 2 +- .../cadence/hifi/operators/op_sigmoid.cpp | 72 +++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 backends/cadence/hifi/operators/op_sigmoid.cpp diff --git a/backends/cadence/hifi/operators/CMakeLists.txt b/backends/cadence/hifi/operators/CMakeLists.txt index d56d19fc37f..509b7d1447b 100644 --- a/backends/cadence/hifi/operators/CMakeLists.txt +++ b/backends/cadence/hifi/operators/CMakeLists.txt @@ -23,6 +23,7 @@ set(_aten_ops__srcs "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_add.cpp" "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_div.cpp" "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_mul.cpp" + "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_sigmoid.cpp" "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_sub.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_bmm.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_cat.cpp" @@ -30,7 +31,6 @@ set(_aten_ops__srcs "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_embedding.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_full.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_permute_copy.cpp" - "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_sigmoid.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_slice_copy.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_softmax.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_split_with_sizes_copy.cpp" diff --git a/backends/cadence/hifi/operators/op_sigmoid.cpp b/backends/cadence/hifi/operators/op_sigmoid.cpp new file mode 100644 index 00000000000..0b7a72bcc73 --- /dev/null +++ b/backends/cadence/hifi/operators/op_sigmoid.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include "kernels.h" + +namespace torch { +namespace executor { +namespace native { + +using Tensor = exec_aten::Tensor; + +Tensor& sigmoid_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) { + (void)ctx; + + ET_KERNEL_CHECK( + ctx, in.scalar_type() != ScalarType::Bool, InvalidArgument, out); + ET_KERNEL_CHECK(ctx, tensor_is_floating_type(out), InvalidArgument, out); + + // Resize for dynamic shape + ET_KERNEL_CHECK_MSG( + ctx, + resize_tensor(out, in.sizes()) == Error::Ok, + InvalidArgument, + out, + "Failed to resize output tensor."); + + ScalarType in_type = in.scalar_type(); + ScalarType out_type = out.scalar_type(); + + int fall_back = 0; + if((in_type != ScalarType::Float) || (out_type != ScalarType::Float)) + fall_back = 1; + + if(!fall_back) + { + float* data_in = in.mutable_data_ptr(); + float* data_out = out.mutable_data_ptr(); + xa_nn_vec_sigmoid_f32_f32(data_out, data_in, in.numel()); + } + else + { + ET_SWITCH_REALHB_TYPES(in_type, ctx, "sigmoid.out", CTYPE_IN, [&]() { + ET_SWITCH_FLOATH_TYPES(out_type, ctx, "sigmoid.out", CTYPE_OUT, [&]() { + apply_unary_map_fn( + [](const CTYPE_IN val_in) { + // perform math in double to preserve precision + double in_casted = static_cast(val_in); + double out_val = 1.0 / (1.0 + exp(-in_casted)); + return static_cast(out_val); + }, + in.const_data_ptr(), + out.mutable_data_ptr(), + in.numel()); + }); + }); + } + + return out; +} + +} // namespace native +} // namespace executor +} // namespace torch From c0b10052a4f47991f6e3340b851747eb4b58a443 Mon Sep 17 00:00:00 2001 From: dijopaul Date: Thu, 12 Sep 2024 04:48:42 -0700 Subject: [PATCH 2/2] Adding tanh optimizations --- backends/cadence/aot/functions_hifi.yaml | 5 +++ .../cadence/hifi/operators/CMakeLists.txt | 2 + backends/cadence/hifi/operators/op_tanh.cpp | 40 +++++++++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 backends/cadence/hifi/operators/op_tanh.cpp diff --git a/backends/cadence/aot/functions_hifi.yaml b/backends/cadence/aot/functions_hifi.yaml index 729db66850a..bab47eccff4 100644 --- a/backends/cadence/aot/functions_hifi.yaml +++ b/backends/cadence/aot/functions_hifi.yaml @@ -92,6 +92,11 @@ - arg_meta: null kernel_name: torch::executor::sub_out +- op: tanh.out + kernels: + - arg_meta: null + kernel_name: torch::executor::tanh_out + - op: view_copy.out kernels: - arg_meta: null diff --git a/backends/cadence/hifi/operators/CMakeLists.txt b/backends/cadence/hifi/operators/CMakeLists.txt index 509b7d1447b..15e6c280ef6 100644 --- a/backends/cadence/hifi/operators/CMakeLists.txt +++ b/backends/cadence/hifi/operators/CMakeLists.txt @@ -25,6 +25,7 @@ set(_aten_ops__srcs "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_mul.cpp" "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_sigmoid.cpp" "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_sub.cpp" + "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_tanh.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_bmm.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_cat.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp" @@ -37,6 +38,7 @@ set(_aten_ops__srcs "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_to_copy.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_view_copy.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_where.cpp" + "${EXECUTORCH_ROOT}/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_floath.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/activation_ops_util.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/broadcast_util.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/copy_ops_util.cpp" diff --git a/backends/cadence/hifi/operators/op_tanh.cpp b/backends/cadence/hifi/operators/op_tanh.cpp new file mode 100644 index 00000000000..47a057fe774 --- /dev/null +++ b/backends/cadence/hifi/operators/op_tanh.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include "kernels.h" + +namespace torch { +namespace executor { +namespace native { + +Tensor& tanh_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) { + + int fall_back = 0; + if((in.scalar_type() != ScalarType::Float) || (out.scalar_type() != ScalarType::Float)) + fall_back = 1; + + if(!fall_back) + { + float* data_in = in.mutable_data_ptr(); + float* data_out = out.mutable_data_ptr(); + xa_nn_vec_tanh_f32_f32(data_out, data_in, (int)in.numel()); + return out; + } + else + { + return internal::unary_ufunc_realhb_to_floath(std::tanh, ctx, in, out); + } + +} + +} // namespace native +} // namespace executor +} // namespace torch