From 25268af2d915732986fa825ee5c87c5546070a3b Mon Sep 17 00:00:00 2001
From: lucylq <lfq@meta.com>
Date: Wed, 14 May 2025 10:39:41 -0700
Subject: [PATCH 1/3] Move optimized target definitions to op_registration.bzl

^

So we can pull these definitions into codegen.bzl (we can't pull in targets.bzl files).

Differential Revision: [D74741846](https://our.internmc.facebook.com/intern/diff/D74741846/)

[ghstack-poisoned]
---
 kernels/optimized/cpu/targets.bzl          | 128 +--------------------
 kernels/optimized/op_registration_util.bzl | 121 +++++++++++++++++++
 2 files changed, 124 insertions(+), 125 deletions(-)

diff --git a/kernels/optimized/cpu/targets.bzl b/kernels/optimized/cpu/targets.bzl
index 4b9cdef5936..0d60d2c6bee 100644
--- a/kernels/optimized/cpu/targets.bzl
+++ b/kernels/optimized/cpu/targets.bzl
@@ -1,127 +1,5 @@
 load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
-load("@fbsource//xplat/executorch/kernels/optimized:op_registration_util.bzl", "define_op_target", "op_target")
-
-_OPTIMIZED_ATEN_OPS = (
-    op_target(
-        name = "op_add",
-        deps = [
-            ":binary_ops",
-            ":add_sub_impl",
-            "//executorch/kernels/portable/cpu:scalar_utils",
-            "//executorch/kernels/portable/cpu/util:broadcast_util",
-        ],
-    ),
-    op_target(
-        name = "op_bmm",
-        deps = [
-            "//executorch/kernels/optimized:libblas",
-            "//executorch/kernels/portable/cpu/util:matmul_ops_util",
-        ],
-    ),
-    op_target(
-        name = "op_div",
-        deps = [
-            ":binary_ops",
-            "//executorch/kernels/portable/cpu:scalar_utils",
-            "//executorch/kernels/portable/cpu/util:broadcast_util",
-        ],
-    ),
-    op_target(
-        name = "op_elu",
-        deps = [
-            "//executorch/extension/threadpool:threadpool",
-            "//executorch/kernels/portable/cpu:scalar_utils",
-            "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch",
-        ],
-    ),
-    op_target(name = "op_exp"),
-    op_target(
-        name = "op_fft_c2r",
-        compiler_flags = [] if runtime.is_oss else [
-            "-Wno-global-constructors",
-            "-Wno-shadow",
-        ],
-        deps = [":fft_utils"],
-    ),
-    op_target(
-        name = "op_fft_r2c",
-        compiler_flags = [] if runtime.is_oss else [
-            "-Wno-global-constructors",
-            "-Wno-shadow",
-        ],
-        deps = [":fft_utils"],
-    ),
-    op_target(name = "op_sigmoid"),
-    op_target(
-        name = "op_gelu",
-        deps = [
-            "//executorch/kernels/portable/cpu/util:activation_ops_util",
-            "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch",
-        ],
-    ),
-    op_target(
-        name = "op_le",
-        deps = [
-            "//executorch/kernels/portable/cpu:scalar_utils",
-            "//executorch/kernels/portable/cpu/util:broadcast_util",
-        ],
-    ),
-    op_target(
-        name = "op_linear",
-        deps = [
-            "//executorch/kernels/optimized:libblas",
-            "//executorch/kernels/portable/cpu/util:matmul_ops_util",
-        ],
-    ),
-    op_target(
-        name = "op_log_softmax",
-        deps = [
-            "//executorch/kernels/portable/cpu/util:activation_ops_util",
-            "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch",
-        ],
-    ),
-    op_target(
-        name = "op_mm",
-        deps = [
-            "//executorch/kernels/optimized:libblas",
-            "//executorch/kernels/portable/cpu/util:matmul_ops_util",
-        ],
-    ),
-    op_target(
-        name = "op_mul",
-        deps = [
-            ":binary_ops",
-            "//executorch/kernels/portable/cpu:scalar_utils",
-            "//executorch/kernels/portable/cpu/util:broadcast_util",
-            "//executorch/runtime/core/exec_aten/util:tensor_util",
-        ],
-    ),
-    op_target(
-        name = "op_native_layer_norm",
-        deps = [
-            ":moments_utils",
-            "//executorch/kernels/portable/cpu/util:normalization_ops_util",
-        ],
-    ),
-    op_target(name = "op_neg"),
-    op_target(
-        name = "op_sub",
-        deps = [
-            ":binary_ops",
-            ":add_sub_impl",
-            "//executorch/kernels/portable/cpu:scalar_utils",
-            "//executorch/kernels/portable/cpu/util:broadcast_util",
-        ],
-    ),
-    op_target(
-        name = "op_where",
-        deps = [
-            "//executorch/extension/threadpool:threadpool",
-            "//executorch/kernels/portable/cpu/util:elementwise_util",
-        ],
-    ),
-)
-
+load("@fbsource//xplat/executorch/kernels/optimized:op_registration_util.bzl", "OPTIMIZED_ATEN_OPS", "define_op_target", "op_target")
 
 def get_sleef_preprocessor_flags():
     if runtime.is_oss:
@@ -137,10 +15,10 @@ def define_common_targets():
     """
 
     # Define build targets for all operators registered in the tables above.
-    for op in _OPTIMIZED_ATEN_OPS:
+    for op in OPTIMIZED_ATEN_OPS:
         define_op_target(**op)
 
-    aten_op_targets = [":{}".format(op["name"]) for op in _OPTIMIZED_ATEN_OPS]
+    aten_op_targets = [":{}".format(op["name"]) for op in OPTIMIZED_ATEN_OPS]
     all_op_targets = aten_op_targets
 
     runtime.cxx_library(
diff --git a/kernels/optimized/op_registration_util.bzl b/kernels/optimized/op_registration_util.bzl
index 3ac89132380..fac601637ed 100644
--- a/kernels/optimized/op_registration_util.bzl
+++ b/kernels/optimized/op_registration_util.bzl
@@ -137,3 +137,124 @@ def define_op_target(name, compiler_flags, deps):
         compiler_flags = compiler_flags,
         deps = deps,
     )
+
+OPTIMIZED_ATEN_OPS = (
+    op_target(
+        name = "op_add",
+        deps = [
+            ":binary_ops",
+            ":add_sub_impl",
+            "//executorch/kernels/portable/cpu:scalar_utils",
+            "//executorch/kernels/portable/cpu/util:broadcast_util",
+        ],
+    ),
+    op_target(
+        name = "op_bmm",
+        deps = [
+            "//executorch/kernels/optimized:libblas",
+            "//executorch/kernels/portable/cpu/util:matmul_ops_util",
+        ],
+    ),
+    op_target(
+        name = "op_div",
+        deps = [
+            ":binary_ops",
+            "//executorch/kernels/portable/cpu:scalar_utils",
+            "//executorch/kernels/portable/cpu/util:broadcast_util",
+        ],
+    ),
+    op_target(
+        name = "op_elu",
+        deps = [
+            "//executorch/extension/threadpool:threadpool",
+            "//executorch/kernels/portable/cpu:scalar_utils",
+            "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch",
+        ],
+    ),
+    op_target(name = "op_exp"),
+    op_target(
+        name = "op_fft_c2r",
+        compiler_flags = [] if runtime.is_oss else [
+            "-Wno-global-constructors",
+            "-Wno-shadow",
+        ],
+        deps = [":fft_utils"],
+    ),
+    op_target(
+        name = "op_fft_r2c",
+        compiler_flags = [] if runtime.is_oss else [
+            "-Wno-global-constructors",
+            "-Wno-shadow",
+        ],
+        deps = [":fft_utils"],
+    ),
+    op_target(name = "op_sigmoid"),
+    op_target(
+        name = "op_gelu",
+        deps = [
+            "//executorch/kernels/portable/cpu/util:activation_ops_util",
+            "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch",
+        ],
+    ),
+    op_target(
+        name = "op_le",
+        deps = [
+            "//executorch/kernels/portable/cpu:scalar_utils",
+            "//executorch/kernels/portable/cpu/util:broadcast_util",
+        ],
+    ),
+    op_target(
+        name = "op_linear",
+        deps = [
+            "//executorch/kernels/optimized:libblas",
+            "//executorch/kernels/portable/cpu/util:matmul_ops_util",
+        ],
+    ),
+    op_target(
+        name = "op_log_softmax",
+        deps = [
+            "//executorch/kernels/portable/cpu/util:activation_ops_util",
+            "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch",
+        ],
+    ),
+    op_target(
+        name = "op_mm",
+        deps = [
+            "//executorch/kernels/optimized:libblas",
+            "//executorch/kernels/portable/cpu/util:matmul_ops_util",
+        ],
+    ),
+    op_target(
+        name = "op_mul",
+        deps = [
+            ":binary_ops",
+            "//executorch/kernels/portable/cpu:scalar_utils",
+            "//executorch/kernels/portable/cpu/util:broadcast_util",
+            "//executorch/runtime/core/exec_aten/util:tensor_util",
+        ],
+    ),
+    op_target(
+        name = "op_native_layer_norm",
+        deps = [
+            ":moments_utils",
+            "//executorch/kernels/portable/cpu/util:normalization_ops_util",
+        ],
+    ),
+    op_target(name = "op_neg"),
+    op_target(
+        name = "op_sub",
+        deps = [
+            ":binary_ops",
+            ":add_sub_impl",
+            "//executorch/kernels/portable/cpu:scalar_utils",
+            "//executorch/kernels/portable/cpu/util:broadcast_util",
+        ],
+    ),
+    op_target(
+        name = "op_where",
+        deps = [
+            "//executorch/extension/threadpool:threadpool",
+            "//executorch/kernels/portable/cpu/util:elementwise_util",
+        ],
+    ),
+)

From 1ccd622058278e8631f67160771e819302de2e29 Mon Sep 17 00:00:00 2001
From: lucylq <lfq@meta.com>
Date: Wed, 14 May 2025 10:51:08 -0700
Subject: [PATCH 2/3] Update on "Move optimized target definitions to
 op_registration.bzl"

^

So we can pull these definitions into codegen.bzl (we can't pull in targets.bzl files).

Differential Revision: [D74741846](https://our.internmc.facebook.com/intern/diff/D74741846/)

[ghstack-poisoned]

From 538d81bdc5020b9c13c6e3caa156ee41ef5617e5 Mon Sep 17 00:00:00 2001
From: lucylq <lfq@meta.com>
Date: Wed, 14 May 2025 14:28:23 -0700
Subject: [PATCH 3/3] Update on "Move optimized target definitions to
 op_registration.bzl"

^

So we can pull these definitions into codegen.bzl (we can't pull in targets.bzl files).

Differential Revision: [D74741846](https://our.internmc.facebook.com/intern/diff/D74741846/)

[ghstack-poisoned]
---
 .../optimized/op_registration_util.bzl        | 130 ++++++++++++++++--
 1 file changed, 122 insertions(+), 8 deletions(-)

diff --git a/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl
index d48a22cee37..fac601637ed 100644
--- a/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl
+++ b/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl
@@ -1,10 +1,3 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under both the MIT license found in the
-# LICENSE-MIT file in the root directory of this source tree and the Apache
-# License, Version 2.0 found in the LICENSE-APACHE file in the root directory
-# of this source tree.
-
 load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
 load("@fbsource//xplat/executorch/build:selects.bzl", "selects")
 load(
@@ -106,7 +99,7 @@ def define_op_library(name, compiler_flags, deps):
             # pragma unroll fails with -Os, don't need to warn us and
             # fail Werror builds; see https://godbolt.org/z/zvf85vTsr
             "-Wno-pass-failed",
-        ] + get_compiler_optimization_flags(),
+        ] + compiler_flags + get_compiler_optimization_flags(),
         deps = [
             "//executorch/runtime/kernel:kernel_includes",
         ] + augmented_deps + get_vec_deps(),
@@ -144,3 +137,124 @@ def define_op_target(name, compiler_flags, deps):
         compiler_flags = compiler_flags,
         deps = deps,
     )
+
+OPTIMIZED_ATEN_OPS = (
+    op_target(
+        name = "op_add",
+        deps = [
+            ":binary_ops",
+            ":add_sub_impl",
+            "//executorch/kernels/portable/cpu:scalar_utils",
+            "//executorch/kernels/portable/cpu/util:broadcast_util",
+        ],
+    ),
+    op_target(
+        name = "op_bmm",
+        deps = [
+            "//executorch/kernels/optimized:libblas",
+            "//executorch/kernels/portable/cpu/util:matmul_ops_util",
+        ],
+    ),
+    op_target(
+        name = "op_div",
+        deps = [
+            ":binary_ops",
+            "//executorch/kernels/portable/cpu:scalar_utils",
+            "//executorch/kernels/portable/cpu/util:broadcast_util",
+        ],
+    ),
+    op_target(
+        name = "op_elu",
+        deps = [
+            "//executorch/extension/threadpool:threadpool",
+            "//executorch/kernels/portable/cpu:scalar_utils",
+            "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch",
+        ],
+    ),
+    op_target(name = "op_exp"),
+    op_target(
+        name = "op_fft_c2r",
+        compiler_flags = [] if runtime.is_oss else [
+            "-Wno-global-constructors",
+            "-Wno-shadow",
+        ],
+        deps = [":fft_utils"],
+    ),
+    op_target(
+        name = "op_fft_r2c",
+        compiler_flags = [] if runtime.is_oss else [
+            "-Wno-global-constructors",
+            "-Wno-shadow",
+        ],
+        deps = [":fft_utils"],
+    ),
+    op_target(name = "op_sigmoid"),
+    op_target(
+        name = "op_gelu",
+        deps = [
+            "//executorch/kernels/portable/cpu/util:activation_ops_util",
+            "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch",
+        ],
+    ),
+    op_target(
+        name = "op_le",
+        deps = [
+            "//executorch/kernels/portable/cpu:scalar_utils",
+            "//executorch/kernels/portable/cpu/util:broadcast_util",
+        ],
+    ),
+    op_target(
+        name = "op_linear",
+        deps = [
+            "//executorch/kernels/optimized:libblas",
+            "//executorch/kernels/portable/cpu/util:matmul_ops_util",
+        ],
+    ),
+    op_target(
+        name = "op_log_softmax",
+        deps = [
+            "//executorch/kernels/portable/cpu/util:activation_ops_util",
+            "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch",
+        ],
+    ),
+    op_target(
+        name = "op_mm",
+        deps = [
+            "//executorch/kernels/optimized:libblas",
+            "//executorch/kernels/portable/cpu/util:matmul_ops_util",
+        ],
+    ),
+    op_target(
+        name = "op_mul",
+        deps = [
+            ":binary_ops",
+            "//executorch/kernels/portable/cpu:scalar_utils",
+            "//executorch/kernels/portable/cpu/util:broadcast_util",
+            "//executorch/runtime/core/exec_aten/util:tensor_util",
+        ],
+    ),
+    op_target(
+        name = "op_native_layer_norm",
+        deps = [
+            ":moments_utils",
+            "//executorch/kernels/portable/cpu/util:normalization_ops_util",
+        ],
+    ),
+    op_target(name = "op_neg"),
+    op_target(
+        name = "op_sub",
+        deps = [
+            ":binary_ops",
+            ":add_sub_impl",
+            "//executorch/kernels/portable/cpu:scalar_utils",
+            "//executorch/kernels/portable/cpu/util:broadcast_util",
+        ],
+    ),
+    op_target(
+        name = "op_where",
+        deps = [
+            "//executorch/extension/threadpool:threadpool",
+            "//executorch/kernels/portable/cpu/util:elementwise_util",
+        ],
+    ),
+)