From 25268af2d915732986fa825ee5c87c5546070a3b Mon Sep 17 00:00:00 2001 From: lucylq Date: Wed, 14 May 2025 10:39:41 -0700 Subject: [PATCH 1/3] Move optimized target definitions to op_registration.bzl ^ So we can pull these definitions into codegen.bzl (we can't pull in targets.bzl files). Differential Revision: [D74741846](https://our.internmc.facebook.com/intern/diff/D74741846/) [ghstack-poisoned] --- kernels/optimized/cpu/targets.bzl | 128 +-------------------- kernels/optimized/op_registration_util.bzl | 121 +++++++++++++++++++ 2 files changed, 124 insertions(+), 125 deletions(-) diff --git a/kernels/optimized/cpu/targets.bzl b/kernels/optimized/cpu/targets.bzl index 4b9cdef5936..0d60d2c6bee 100644 --- a/kernels/optimized/cpu/targets.bzl +++ b/kernels/optimized/cpu/targets.bzl @@ -1,127 +1,5 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") -load("@fbsource//xplat/executorch/kernels/optimized:op_registration_util.bzl", "define_op_target", "op_target") - -_OPTIMIZED_ATEN_OPS = ( - op_target( - name = "op_add", - deps = [ - ":binary_ops", - ":add_sub_impl", - "//executorch/kernels/portable/cpu:scalar_utils", - "//executorch/kernels/portable/cpu/util:broadcast_util", - ], - ), - op_target( - name = "op_bmm", - deps = [ - "//executorch/kernels/optimized:libblas", - "//executorch/kernels/portable/cpu/util:matmul_ops_util", - ], - ), - op_target( - name = "op_div", - deps = [ - ":binary_ops", - "//executorch/kernels/portable/cpu:scalar_utils", - "//executorch/kernels/portable/cpu/util:broadcast_util", - ], - ), - op_target( - name = "op_elu", - deps = [ - "//executorch/extension/threadpool:threadpool", - "//executorch/kernels/portable/cpu:scalar_utils", - "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", - ], - ), - op_target(name = "op_exp"), - op_target( - name = "op_fft_c2r", - compiler_flags = [] if runtime.is_oss else [ - "-Wno-global-constructors", - "-Wno-shadow", - ], - deps = [":fft_utils"], - ), - op_target( - name = "op_fft_r2c", - compiler_flags = [] if runtime.is_oss else [ - "-Wno-global-constructors", - "-Wno-shadow", - ], - deps = [":fft_utils"], - ), - op_target(name = "op_sigmoid"), - op_target( - name = "op_gelu", - deps = [ - "//executorch/kernels/portable/cpu/util:activation_ops_util", - "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", - ], - ), - op_target( - name = "op_le", - deps = [ - "//executorch/kernels/portable/cpu:scalar_utils", - "//executorch/kernels/portable/cpu/util:broadcast_util", - ], - ), - op_target( - name = "op_linear", - deps = [ - "//executorch/kernels/optimized:libblas", - "//executorch/kernels/portable/cpu/util:matmul_ops_util", - ], - ), - op_target( - name = "op_log_softmax", - deps = [ - "//executorch/kernels/portable/cpu/util:activation_ops_util", - "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", - ], - ), - op_target( - name = "op_mm", - deps = [ - "//executorch/kernels/optimized:libblas", - "//executorch/kernels/portable/cpu/util:matmul_ops_util", - ], - ), - op_target( - name = "op_mul", - deps = [ - ":binary_ops", - "//executorch/kernels/portable/cpu:scalar_utils", - "//executorch/kernels/portable/cpu/util:broadcast_util", - "//executorch/runtime/core/exec_aten/util:tensor_util", - ], - ), - op_target( - name = "op_native_layer_norm", - deps = [ - ":moments_utils", - "//executorch/kernels/portable/cpu/util:normalization_ops_util", - ], - ), - op_target(name = "op_neg"), - op_target( - name = "op_sub", - deps = [ - ":binary_ops", - ":add_sub_impl", - "//executorch/kernels/portable/cpu:scalar_utils", - "//executorch/kernels/portable/cpu/util:broadcast_util", - ], - ), - op_target( - name = "op_where", - deps = [ - "//executorch/extension/threadpool:threadpool", - "//executorch/kernels/portable/cpu/util:elementwise_util", - ], - ), -) - +load("@fbsource//xplat/executorch/kernels/optimized:op_registration_util.bzl", "OPTIMIZED_ATEN_OPS", "define_op_target", "op_target") def get_sleef_preprocessor_flags(): if runtime.is_oss: @@ -137,10 +15,10 @@ def define_common_targets(): """ # Define build targets for all operators registered in the tables above. - for op in _OPTIMIZED_ATEN_OPS: + for op in OPTIMIZED_ATEN_OPS: define_op_target(**op) - aten_op_targets = [":{}".format(op["name"]) for op in _OPTIMIZED_ATEN_OPS] + aten_op_targets = [":{}".format(op["name"]) for op in OPTIMIZED_ATEN_OPS] all_op_targets = aten_op_targets runtime.cxx_library( diff --git a/kernels/optimized/op_registration_util.bzl b/kernels/optimized/op_registration_util.bzl index 3ac89132380..fac601637ed 100644 --- a/kernels/optimized/op_registration_util.bzl +++ b/kernels/optimized/op_registration_util.bzl @@ -137,3 +137,124 @@ def define_op_target(name, compiler_flags, deps): compiler_flags = compiler_flags, deps = deps, ) + +OPTIMIZED_ATEN_OPS = ( + op_target( + name = "op_add", + deps = [ + ":binary_ops", + ":add_sub_impl", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/kernels/portable/cpu/util:broadcast_util", + ], + ), + op_target( + name = "op_bmm", + deps = [ + "//executorch/kernels/optimized:libblas", + "//executorch/kernels/portable/cpu/util:matmul_ops_util", + ], + ), + op_target( + name = "op_div", + deps = [ + ":binary_ops", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/kernels/portable/cpu/util:broadcast_util", + ], + ), + op_target( + name = "op_elu", + deps = [ + "//executorch/extension/threadpool:threadpool", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", + ], + ), + op_target(name = "op_exp"), + op_target( + name = "op_fft_c2r", + compiler_flags = [] if runtime.is_oss else [ + "-Wno-global-constructors", + "-Wno-shadow", + ], + deps = [":fft_utils"], + ), + op_target( + name = "op_fft_r2c", + compiler_flags = [] if runtime.is_oss else [ + "-Wno-global-constructors", + "-Wno-shadow", + ], + deps = [":fft_utils"], + ), + op_target(name = "op_sigmoid"), + op_target( + name = "op_gelu", + deps = [ + "//executorch/kernels/portable/cpu/util:activation_ops_util", + "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", + ], + ), + op_target( + name = "op_le", + deps = [ + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/kernels/portable/cpu/util:broadcast_util", + ], + ), + op_target( + name = "op_linear", + deps = [ + "//executorch/kernels/optimized:libblas", + "//executorch/kernels/portable/cpu/util:matmul_ops_util", + ], + ), + op_target( + name = "op_log_softmax", + deps = [ + "//executorch/kernels/portable/cpu/util:activation_ops_util", + "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", + ], + ), + op_target( + name = "op_mm", + deps = [ + "//executorch/kernels/optimized:libblas", + "//executorch/kernels/portable/cpu/util:matmul_ops_util", + ], + ), + op_target( + name = "op_mul", + deps = [ + ":binary_ops", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/kernels/portable/cpu/util:broadcast_util", + "//executorch/runtime/core/exec_aten/util:tensor_util", + ], + ), + op_target( + name = "op_native_layer_norm", + deps = [ + ":moments_utils", + "//executorch/kernels/portable/cpu/util:normalization_ops_util", + ], + ), + op_target(name = "op_neg"), + op_target( + name = "op_sub", + deps = [ + ":binary_ops", + ":add_sub_impl", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/kernels/portable/cpu/util:broadcast_util", + ], + ), + op_target( + name = "op_where", + deps = [ + "//executorch/extension/threadpool:threadpool", + "//executorch/kernels/portable/cpu/util:elementwise_util", + ], + ), +) From 1ccd622058278e8631f67160771e819302de2e29 Mon Sep 17 00:00:00 2001 From: lucylq Date: Wed, 14 May 2025 10:51:08 -0700 Subject: [PATCH 2/3] Update on "Move optimized target definitions to op_registration.bzl" ^ So we can pull these definitions into codegen.bzl (we can't pull in targets.bzl files). Differential Revision: [D74741846](https://our.internmc.facebook.com/intern/diff/D74741846/) [ghstack-poisoned] From 538d81bdc5020b9c13c6e3caa156ee41ef5617e5 Mon Sep 17 00:00:00 2001 From: lucylq Date: Wed, 14 May 2025 14:28:23 -0700 Subject: [PATCH 3/3] Update on "Move optimized target definitions to op_registration.bzl" ^ So we can pull these definitions into codegen.bzl (we can't pull in targets.bzl files). Differential Revision: [D74741846](https://our.internmc.facebook.com/intern/diff/D74741846/) [ghstack-poisoned] --- .../optimized/op_registration_util.bzl | 130 ++++++++++++++++-- 1 file changed, 122 insertions(+), 8 deletions(-) diff --git a/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl index d48a22cee37..fac601637ed 100644 --- a/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl +++ b/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl @@ -1,10 +1,3 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under both the MIT license found in the -# LICENSE-MIT file in the root directory of this source tree and the Apache -# License, Version 2.0 found in the LICENSE-APACHE file in the root directory -# of this source tree. - load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") load("@fbsource//xplat/executorch/build:selects.bzl", "selects") load( @@ -106,7 +99,7 @@ def define_op_library(name, compiler_flags, deps): # pragma unroll fails with -Os, don't need to warn us and # fail Werror builds; see https://godbolt.org/z/zvf85vTsr "-Wno-pass-failed", - ] + get_compiler_optimization_flags(), + ] + compiler_flags + get_compiler_optimization_flags(), deps = [ "//executorch/runtime/kernel:kernel_includes", ] + augmented_deps + get_vec_deps(), @@ -144,3 +137,124 @@ def define_op_target(name, compiler_flags, deps): compiler_flags = compiler_flags, deps = deps, ) + +OPTIMIZED_ATEN_OPS = ( + op_target( + name = "op_add", + deps = [ + ":binary_ops", + ":add_sub_impl", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/kernels/portable/cpu/util:broadcast_util", + ], + ), + op_target( + name = "op_bmm", + deps = [ + "//executorch/kernels/optimized:libblas", + "//executorch/kernels/portable/cpu/util:matmul_ops_util", + ], + ), + op_target( + name = "op_div", + deps = [ + ":binary_ops", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/kernels/portable/cpu/util:broadcast_util", + ], + ), + op_target( + name = "op_elu", + deps = [ + "//executorch/extension/threadpool:threadpool", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", + ], + ), + op_target(name = "op_exp"), + op_target( + name = "op_fft_c2r", + compiler_flags = [] if runtime.is_oss else [ + "-Wno-global-constructors", + "-Wno-shadow", + ], + deps = [":fft_utils"], + ), + op_target( + name = "op_fft_r2c", + compiler_flags = [] if runtime.is_oss else [ + "-Wno-global-constructors", + "-Wno-shadow", + ], + deps = [":fft_utils"], + ), + op_target(name = "op_sigmoid"), + op_target( + name = "op_gelu", + deps = [ + "//executorch/kernels/portable/cpu/util:activation_ops_util", + "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", + ], + ), + op_target( + name = "op_le", + deps = [ + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/kernels/portable/cpu/util:broadcast_util", + ], + ), + op_target( + name = "op_linear", + deps = [ + "//executorch/kernels/optimized:libblas", + "//executorch/kernels/portable/cpu/util:matmul_ops_util", + ], + ), + op_target( + name = "op_log_softmax", + deps = [ + "//executorch/kernels/portable/cpu/util:activation_ops_util", + "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", + ], + ), + op_target( + name = "op_mm", + deps = [ + "//executorch/kernels/optimized:libblas", + "//executorch/kernels/portable/cpu/util:matmul_ops_util", + ], + ), + op_target( + name = "op_mul", + deps = [ + ":binary_ops", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/kernels/portable/cpu/util:broadcast_util", + "//executorch/runtime/core/exec_aten/util:tensor_util", + ], + ), + op_target( + name = "op_native_layer_norm", + deps = [ + ":moments_utils", + "//executorch/kernels/portable/cpu/util:normalization_ops_util", + ], + ), + op_target(name = "op_neg"), + op_target( + name = "op_sub", + deps = [ + ":binary_ops", + ":add_sub_impl", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/kernels/portable/cpu/util:broadcast_util", + ], + ), + op_target( + name = "op_where", + deps = [ + "//executorch/extension/threadpool:threadpool", + "//executorch/kernels/portable/cpu/util:elementwise_util", + ], + ), +)