From 35caf8766ff21535649100d8639c8bc7303cf998 Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Tue, 24 Jun 2025 10:53:31 -0700 Subject: [PATCH] Fix bug in le broadcast for single element broadcast Summary: Really comparison ops cannot be handled in the same way as other binary ops for broadcasting because output tensor dtype is different than input tensor dtype. As a result we just have to fall back to portable. Even the current vectorized impl for le, ge etc. assumes that the output type of compare is same as input type. That might actually be a bug. A potential way to handle this maybe via vectorized compare natively supporting binary output vector Test Plan: Tests added which fail before and passes after Reviewers: Subscribers: Tasks: Tags: --- kernels/optimized/cpu/op_le.cpp | 49 ------------------------ kernels/test/op_le_test.cpp | 67 +++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 49 deletions(-) diff --git a/kernels/optimized/cpu/op_le.cpp b/kernels/optimized/cpu/op_le.cpp index 4aeeb69323f..8e56e1ca4fc 100644 --- a/kernels/optimized/cpu/op_le.cpp +++ b/kernels/optimized/cpu/op_le.cpp @@ -30,57 +30,8 @@ Tensor& opt_le_tensor_out( (void)ctx; ScalarType a_type = a.scalar_type(); - ScalarType b_type = b.scalar_type(); ScalarType out_type = out.scalar_type(); - if (a.numel() == 1 || b.numel() == 1) { - const Tensor* tensor; - const Tensor* scalar; - ScalarType tensor_type; - ScalarType scalar_type; - if (a.numel() == 1) { - tensor = &b; - tensor_type = b_type; - scalar = &a; - scalar_type = a_type; - } else { - tensor = &a; - tensor_type = a_type; - scalar = &b; - scalar_type = b_type; - } - ET_KERNEL_CHECK( - ctx, - resize_to_broadcast_target_size(a, b, out) == Error::Ok, - InvalidArgument, - out); - - constexpr auto name = "le.Tensor_out"; - - ET_SWITCH_REALB_TYPES(tensor_type, ctx, name, CTYPE, [&]() { - ET_SWITCH_REALB_TYPES(scalar_type, ctx, name, CTYPE_SCALAR, [&]() { - CTYPE_SCALAR scalar_val = *scalar->const_data_ptr(); - CTYPE scalar_casted = static_cast(scalar_val); - - using Vec = at::vec::Vectorized; - if (a.numel() == 1) { - at::vec::map( - [scalar_casted](Vec x) { return Vec(scalar_casted).le(x); }, - out.mutable_data_ptr(), - tensor->const_data_ptr(), - out.numel()); - } else { - at::vec::map( - [scalar_casted](Vec x) { return x.le(Vec(scalar_casted)); }, - out.mutable_data_ptr(), - tensor->const_data_ptr(), - out.numel()); - } - }); - }); - return out; - } - // Check for optimized broadcast paths auto selected_optimized_path = select_optimized_path(a, b, out); if (selected_optimized_path == ElementwiseOptimizedPath::kTreatAs1d) { diff --git a/kernels/test/op_le_test.cpp b/kernels/test/op_le_test.cpp index d8ecec11c46..4a9b97dfe8a 100644 --- a/kernels/test/op_le_test.cpp +++ b/kernels/test/op_le_test.cpp @@ -1112,3 +1112,70 @@ TEST_F(OpLeTensorOutTest, Broadcast22dBy1dReverseTest) { EXPECT_TENSOR_EQ(out, tf_bool.make({3, 4}, expected_data)); } + +TEST_F(OpLeTensorOutTest, MonotonicIncreasingVsScalarBroadcastTest) { + TensorFactory tf; + TensorFactory tf_bool; + + // Test case: 1D tensor [0, 1, 2, ..., 63] vs 2D tensor [1, 1] with value 2 + std::vector lhs_data; + for (int i = 0; i < 64; ++i) { + lhs_data.push_back(i); + } + + Tensor lhs = tf.make({64}, lhs_data); + Tensor rhs = tf.make({1, 1}, {2}); + Tensor out = tf_bool.zeros({1, 64}); + + op_le_tensor_out(lhs, rhs, out); + + // Expected: [0, 1, 2] <= 2 should be [true, true, true], rest false + using ctype = + executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper< + ScalarType::Bool>::ctype; + std::vector expected_data; + for (int i = 0; i < 64; ++i) { + expected_data.push_back(i <= 2); + } + + EXPECT_TENSOR_EQ(out, tf_bool.make({1, 64}, expected_data)); + + // Test with rhs value 4 + rhs = tf.make({1, 1}, {4}); + out = tf_bool.zeros({1, 64}); + + op_le_tensor_out(lhs, rhs, out); + + expected_data.clear(); + for (int i = 0; i < 64; ++i) { + expected_data.push_back(i <= 4); + } + + EXPECT_TENSOR_EQ(out, tf_bool.make({1, 64}, expected_data)); + + // Test with rhs value 10 + rhs = tf.make({1, 1}, {10}); + out = tf_bool.zeros({1, 64}); + + op_le_tensor_out(lhs, rhs, out); + + expected_data.clear(); + for (int i = 0; i < 64; ++i) { + expected_data.push_back(i <= 10); + } + + EXPECT_TENSOR_EQ(out, tf_bool.make({1, 64}, expected_data)); + + // Test with rhs value 32 + rhs = tf.make({1, 1}, {32}); + out = tf_bool.zeros({1, 64}); + + op_le_tensor_out(lhs, rhs, out); + + expected_data.clear(); + for (int i = 0; i < 64; ++i) { + expected_data.push_back(i <= 32); + } + + EXPECT_TENSOR_EQ(out, tf_bool.make({1, 64}, expected_data)); +}