From 6ad490aac289d080ecbdad00c4038021937111e1 Mon Sep 17 00:00:00 2001
From: dijopaul <dijopaul@cadence.com>
Date: Wed, 11 Sep 2024 06:19:38 -0700
Subject: [PATCH 1/2] Adding sigmoid optimizations

---
 .../cadence/hifi/operators/CMakeLists.txt     |  2 +-
 .../cadence/hifi/operators/op_sigmoid.cpp     | 72 +++++++++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100644 backends/cadence/hifi/operators/op_sigmoid.cpp
diff --git a/backends/cadence/hifi/operators/CMakeLists.txt b/backends/cadence/hifi/operators/CMakeLists.txt
index d56d19fc37f..509b7d1447b 100644
--- a/backends/cadence/hifi/operators/CMakeLists.txt
+++ b/backends/cadence/hifi/operators/CMakeLists.txt
@@ -23,6 +23,7 @@ set(_aten_ops__srcs
     "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_add.cpp"
     "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_div.cpp"
     "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_mul.cpp"
+    "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_sigmoid.cpp"
     "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_sub.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_bmm.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_cat.cpp"
@@ -30,7 +31,6 @@ set(_aten_ops__srcs
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_embedding.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_full.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_permute_copy.cpp"
-    "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_sigmoid.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_slice_copy.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_softmax.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_split_with_sizes_copy.cpp"
diff --git a/backends/cadence/hifi/operators/op_sigmoid.cpp b/backends/cadence/hifi/operators/op_sigmoid.cpp
new file mode 100644
index 00000000000..0b7a72bcc73
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_sigmoid.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cmath>
+
+#include <executorch/kernels/portable/cpu/util/functional_util.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+#include "kernels.h"
+
+namespace torch {
+namespace executor {
+namespace native {
+
+using Tensor = exec_aten::Tensor;
+
+Tensor& sigmoid_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) {
+  (void)ctx;
+
+  ET_KERNEL_CHECK(
+      ctx, in.scalar_type() != ScalarType::Bool, InvalidArgument, out);
+  ET_KERNEL_CHECK(ctx, tensor_is_floating_type(out), InvalidArgument, out);
+
+  // Resize for dynamic shape
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      resize_tensor(out, in.sizes()) == Error::Ok,
+      InvalidArgument,
+      out,
+      "Failed to resize output tensor.");
+
+  ScalarType in_type = in.scalar_type();
+  ScalarType out_type = out.scalar_type();
+  
+  int fall_back = 0;
+  if((in_type != ScalarType::Float) || (out_type != ScalarType::Float))
+      fall_back = 1;
+  
+  if(!fall_back)
+  {
+    float* data_in = in.mutable_data_ptr<float>();
+    float* data_out = out.mutable_data_ptr<float>();
+    xa_nn_vec_sigmoid_f32_f32(data_out, data_in, in.numel());
+  }
+  else
+  {
+    ET_SWITCH_REALHB_TYPES(in_type, ctx, "sigmoid.out", CTYPE_IN, [&]() {
+      ET_SWITCH_FLOATH_TYPES(out_type, ctx, "sigmoid.out", CTYPE_OUT, [&]() {
+        apply_unary_map_fn(
+            [](const CTYPE_IN val_in) {
+              // perform math in double to preserve precision
+              double in_casted = static_cast<double>(val_in);
+              double out_val = 1.0 / (1.0 + exp(-in_casted));
+              return static_cast<CTYPE_OUT>(out_val);
+            },
+            in.const_data_ptr<CTYPE_IN>(),
+            out.mutable_data_ptr<CTYPE_OUT>(),
+            in.numel());
+      });
+    }); 
+  }
+
+  return out;
+}
+
+} // namespace native
+} // namespace executor
+} // namespace torch

From c0b10052a4f47991f6e3340b851747eb4b58a443 Mon Sep 17 00:00:00 2001
From: dijopaul <dijopaul@cadence.com>
Date: Thu, 12 Sep 2024 04:48:42 -0700
Subject: [PATCH 2/2] Adding tanh optimizations

---
 backends/cadence/aot/functions_hifi.yaml      |  5 +++
 .../cadence/hifi/operators/CMakeLists.txt     |  2 +
 backends/cadence/hifi/operators/op_tanh.cpp   | 40 +++++++++++++++++++
 3 files changed, 47 insertions(+)
 create mode 100644 backends/cadence/hifi/operators/op_tanh.cpp

diff --git a/backends/cadence/aot/functions_hifi.yaml b/backends/cadence/aot/functions_hifi.yaml
index 729db66850a..bab47eccff4 100644
--- a/backends/cadence/aot/functions_hifi.yaml
+++ b/backends/cadence/aot/functions_hifi.yaml
@@ -92,6 +92,11 @@
     - arg_meta: null
       kernel_name: torch::executor::sub_out
 
+- op: tanh.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::tanh_out
+
 - op: view_copy.out
   kernels:
     - arg_meta: null
diff --git a/backends/cadence/hifi/operators/CMakeLists.txt b/backends/cadence/hifi/operators/CMakeLists.txt
index 509b7d1447b..15e6c280ef6 100644
--- a/backends/cadence/hifi/operators/CMakeLists.txt
+++ b/backends/cadence/hifi/operators/CMakeLists.txt
@@ -25,6 +25,7 @@ set(_aten_ops__srcs
     "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_mul.cpp"
     "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_sigmoid.cpp"
     "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_sub.cpp"
+    "${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_tanh.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_bmm.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_cat.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp"
@@ -37,6 +38,7 @@ set(_aten_ops__srcs
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_to_copy.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_view_copy.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_where.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_floath.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/activation_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/broadcast_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/copy_ops_util.cpp"
diff --git a/backends/cadence/hifi/operators/op_tanh.cpp b/backends/cadence/hifi/operators/op_tanh.cpp
new file mode 100644
index 00000000000..47a057fe774
--- /dev/null
+++ b/backends/cadence/hifi/operators/op_tanh.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/kernels/portable/cpu/pattern/pattern.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+#include <cmath>
+#include "kernels.h"
+
+namespace torch {
+namespace executor {
+namespace native {
+
+Tensor& tanh_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) {
+
+  int fall_back = 0;
+  if((in.scalar_type() != ScalarType::Float) || (out.scalar_type() != ScalarType::Float))
+      fall_back = 1;
+  
+  if(!fall_back)
+  {
+    float* data_in = in.mutable_data_ptr<float>();
+    float* data_out = out.mutable_data_ptr<float>();
+    xa_nn_vec_tanh_f32_f32(data_out, data_in, (int)in.numel());
+    return out;
+  }
+  else
+  {
+    return internal::unary_ufunc_realhb_to_floath(std::tanh, ctx, in, out);
+  }
+
+}
+
+} // namespace native
+} // namespace executor
+} // namespace torch