From 87cdae87509c4a23b0f12f2accace97bfea304f1 Mon Sep 17 00:00:00 2001 From: Dayuxiaoshui <792179245@qq.com> Date: Fri, 12 Dec 2025 03:59:28 +0000 Subject: [PATCH 1/7] =?UTF-8?q?Fix=20ACOS=20precision=20issue=20for=20boun?= =?UTF-8?q?dary=20values=20(x=3D=C2=B11.0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ACOS operator was producing incorrect results for boundary values due to poor precision of ASIN's Taylor series expansion near x=±1.0. Root cause: - ASIN used a 6-term Taylor series that converges slowly near boundaries - ACOS was implemented as acos(x) = π/2 - asin(x), inheriting ASIN errors - At x=1.0, ASIN error of 0.354874 (22.6%) caused ACOS to output 0.354874 instead of 0.0 Solution: - Modified ASIN to use system library function (asinf) for |x| >= 0.9 - Modified ACOS to use system library function (acosf) for |x| >= 0.9 - For |x| < 0.9, continue using Taylor series (accurate in this range) This ensures high precision for boundary values while maintaining the existing behavior for values in the middle range. Fixes #18580 --- src/target/llvm/intrin_rule_llvm.cc | 37 +++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index 4ce7ce9f2291..60fa34713a4f 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -167,9 +167,18 @@ TVM_REGISTER_OP("tir.sinh") TVM_REGISTER_OP("tir.asin") .set_attr("llvm.FLegalize", [](const PrimExpr& e) -> PrimExpr { using tir::make_const; + using namespace intrin; const tir::CallNode* call = e.as(); ICHECK(call != nullptr); const PrimExpr& x = call->args[0]; + + // Use system library function for values near boundaries where Taylor series + // has poor precision. Threshold chosen to keep error < 1% for Taylor series. + PrimExpr threshold = make_const(x.dtype(), 0.9); + PrimExpr abs_x = tir::abs(x); + PrimExpr use_lib = abs_x >= threshold; + + // Taylor series for values away from boundaries PrimExpr x2 = x * x; PrimExpr term1 = x; PrimExpr term3 = term1 * x2 / make_const(x.dtype(), 6); @@ -178,25 +187,45 @@ TVM_REGISTER_OP("tir.asin") PrimExpr term9 = term7 * x2 * make_const(x.dtype(), 1225) / make_const(x.dtype(), 3456); PrimExpr term11 = term9 * x2 * make_const(x.dtype(), 3969) / make_const(x.dtype(), 28160); PrimExpr series = term1 + term3 + term5 + term7 + term9 + term11; + + // System library function for boundary values + PrimExpr lib_result = DispatchPureExtern(e); + /* --- domain limit check --- */ PrimExpr lower = make_const(x.dtype(), -1.0); PrimExpr upper = make_const(x.dtype(), 1.0); - PrimExpr out_range = tir::Or(x upper); + PrimExpr out_range = tir::Or(x < lower, x > upper); // Use a quiet NaN constant PrimExpr nan_const = make_const(x.dtype(), std::numeric_limits::quiet_NaN()); - // select: if out of [-1,1] → NaN, else → series - return tir::Select(out_range, nan_const, series); + + // select: if out of [-1,1] → NaN, else if |x| >= threshold → lib, else → series + return tir::Select(out_range, nan_const, + tir::Select(use_lib, lib_result, series)); }); TVM_REGISTER_OP("tir.acos") .set_attr("llvm.FLegalize", [](const PrimExpr& e) -> PrimExpr { using tir::make_const; + using namespace intrin; const tir::CallNode* call = e.as(); ICHECK(call != nullptr) << "Invalid call node in acos legalization"; const PrimExpr& x = call->args[0]; + + // Use system library function for values near boundaries where ASIN Taylor series + // has poor precision, which would cause ACOS errors. + PrimExpr threshold = make_const(x.dtype(), 0.9); + PrimExpr abs_x = tir::abs(x); + PrimExpr use_lib = abs_x >= threshold; + + // For values away from boundaries, use π/2 - asin(x) PrimExpr half_pi = make_const(x.dtype(), M_PI / 2); PrimExpr asin_x = asin(x); - return half_pi - asin_x; + PrimExpr formula_result = half_pi - asin_x; + + // System library function for boundary values + PrimExpr lib_result = DispatchPureExtern(e); + + return tir::Select(use_lib, lib_result, formula_result); }); TVM_REGISTER_OP("tir.atan") From 38da5475a762678380a59e9494c8f53ff1797adc Mon Sep 17 00:00:00 2001 From: Dayuxiaoshui <792179245@qq.com> Date: Fri, 12 Dec 2025 04:49:56 +0000 Subject: [PATCH 2/7] Add explicit domain check for acos to match asin implementation --- src/target/llvm/intrin_rule_llvm.cc | 39 +++++++++++++---------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index 60fa34713a4f..5837d074bc71 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -171,14 +171,11 @@ TVM_REGISTER_OP("tir.asin") const tir::CallNode* call = e.as(); ICHECK(call != nullptr); const PrimExpr& x = call->args[0]; - - // Use system library function for values near boundaries where Taylor series - // has poor precision. Threshold chosen to keep error < 1% for Taylor series. + PrimExpr threshold = make_const(x.dtype(), 0.9); PrimExpr abs_x = tir::abs(x); PrimExpr use_lib = abs_x >= threshold; - - // Taylor series for values away from boundaries + PrimExpr x2 = x * x; PrimExpr term1 = x; PrimExpr term3 = term1 * x2 / make_const(x.dtype(), 6); @@ -187,20 +184,16 @@ TVM_REGISTER_OP("tir.asin") PrimExpr term9 = term7 * x2 * make_const(x.dtype(), 1225) / make_const(x.dtype(), 3456); PrimExpr term11 = term9 * x2 * make_const(x.dtype(), 3969) / make_const(x.dtype(), 28160); PrimExpr series = term1 + term3 + term5 + term7 + term9 + term11; - - // System library function for boundary values + PrimExpr lib_result = DispatchPureExtern(e); - - /* --- domain limit check --- */ + PrimExpr lower = make_const(x.dtype(), -1.0); PrimExpr upper = make_const(x.dtype(), 1.0); PrimExpr out_range = tir::Or(x < lower, x > upper); - // Use a quiet NaN constant PrimExpr nan_const = make_const(x.dtype(), std::numeric_limits::quiet_NaN()); - - // select: if out of [-1,1] → NaN, else if |x| >= threshold → lib, else → series + return tir::Select(out_range, nan_const, - tir::Select(use_lib, lib_result, series)); + tir::Select(use_lib, lib_result, series)); }); TVM_REGISTER_OP("tir.acos") @@ -210,22 +203,24 @@ TVM_REGISTER_OP("tir.acos") const tir::CallNode* call = e.as(); ICHECK(call != nullptr) << "Invalid call node in acos legalization"; const PrimExpr& x = call->args[0]; - - // Use system library function for values near boundaries where ASIN Taylor series - // has poor precision, which would cause ACOS errors. + PrimExpr threshold = make_const(x.dtype(), 0.9); PrimExpr abs_x = tir::abs(x); PrimExpr use_lib = abs_x >= threshold; - - // For values away from boundaries, use π/2 - asin(x) + PrimExpr half_pi = make_const(x.dtype(), M_PI / 2); PrimExpr asin_x = asin(x); PrimExpr formula_result = half_pi - asin_x; - - // System library function for boundary values + PrimExpr lib_result = DispatchPureExtern(e); - - return tir::Select(use_lib, lib_result, formula_result); + + PrimExpr lower = make_const(x.dtype(), -1.0); + PrimExpr upper = make_const(x.dtype(), 1.0); + PrimExpr out_range = tir::Or(x < lower, x > upper); + PrimExpr nan_const = make_const(x.dtype(), std::numeric_limits::quiet_NaN()); + + return tir::Select(out_range, nan_const, + tir::Select(use_lib, lib_result, formula_result)); }); TVM_REGISTER_OP("tir.atan") From e27cea07a39eb0d7acf4e0ffcc5575520405d3dc Mon Sep 17 00:00:00 2001 From: Dayuxiaoshui <792179245@qq.com> Date: Fri, 12 Dec 2025 05:08:01 +0000 Subject: [PATCH 3/7] Fix clang-format issues: adjust operator spacing and return statement formatting --- src/target/llvm/intrin_rule_llvm.cc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index 5837d074bc71..ba8eac7122b7 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -189,11 +189,10 @@ TVM_REGISTER_OP("tir.asin") PrimExpr lower = make_const(x.dtype(), -1.0); PrimExpr upper = make_const(x.dtype(), 1.0); - PrimExpr out_range = tir::Or(x < lower, x > upper); + PrimExpr out_range = tir::Or(x upper); PrimExpr nan_const = make_const(x.dtype(), std::numeric_limits::quiet_NaN()); - return tir::Select(out_range, nan_const, - tir::Select(use_lib, lib_result, series)); + return tir::Select(out_range, nan_const, tir::Select(use_lib, lib_result, series)); }); TVM_REGISTER_OP("tir.acos") @@ -216,11 +215,10 @@ TVM_REGISTER_OP("tir.acos") PrimExpr lower = make_const(x.dtype(), -1.0); PrimExpr upper = make_const(x.dtype(), 1.0); - PrimExpr out_range = tir::Or(x < lower, x > upper); + PrimExpr out_range = tir::Or(x upper); PrimExpr nan_const = make_const(x.dtype(), std::numeric_limits::quiet_NaN()); - return tir::Select(out_range, nan_const, - tir::Select(use_lib, lib_result, formula_result)); + return tir::Select(out_range, nan_const, tir::Select(use_lib, lib_result, formula_result)); }); TVM_REGISTER_OP("tir.atan") From 7d1b8df953e6fde4a8974a1e0a1737a91ec71ff1 Mon Sep 17 00:00:00 2001 From: Dayuxiaoshui <792179245@qq.com> Date: Fri, 12 Dec 2025 06:30:22 +0000 Subject: [PATCH 4/7] Fix compilation errors: use tvm::abs and intrin:: namespace for template parameters --- src/target/llvm/intrin_rule_llvm.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index ba8eac7122b7..2cc3d2f2959d 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -173,7 +173,7 @@ TVM_REGISTER_OP("tir.asin") const PrimExpr& x = call->args[0]; PrimExpr threshold = make_const(x.dtype(), 0.9); - PrimExpr abs_x = tir::abs(x); + PrimExpr abs_x = tvm::abs(x); PrimExpr use_lib = abs_x >= threshold; PrimExpr x2 = x * x; @@ -185,7 +185,7 @@ TVM_REGISTER_OP("tir.asin") PrimExpr term11 = term9 * x2 * make_const(x.dtype(), 3969) / make_const(x.dtype(), 28160); PrimExpr series = term1 + term3 + term5 + term7 + term9 + term11; - PrimExpr lib_result = DispatchPureExtern(e); + PrimExpr lib_result = intrin::DispatchPureExtern(e); PrimExpr lower = make_const(x.dtype(), -1.0); PrimExpr upper = make_const(x.dtype(), 1.0); @@ -204,14 +204,14 @@ TVM_REGISTER_OP("tir.acos") const PrimExpr& x = call->args[0]; PrimExpr threshold = make_const(x.dtype(), 0.9); - PrimExpr abs_x = tir::abs(x); + PrimExpr abs_x = tvm::abs(x); PrimExpr use_lib = abs_x >= threshold; PrimExpr half_pi = make_const(x.dtype(), M_PI / 2); PrimExpr asin_x = asin(x); PrimExpr formula_result = half_pi - asin_x; - PrimExpr lib_result = DispatchPureExtern(e); + PrimExpr lib_result = intrin::DispatchPureExtern(e); PrimExpr lower = make_const(x.dtype(), -1.0); PrimExpr upper = make_const(x.dtype(), 1.0); From 22d1b1457609eb60a0c59614261b185be40bc1d5 Mon Sep 17 00:00:00 2001 From: Dayuxiaoshui <792179245@qq.com> Date: Fri, 12 Dec 2025 06:55:06 +0000 Subject: [PATCH 5/7] Fix namespace resolution for DispatchPureExtern in LLVM intrinsic rules --- src/target/llvm/intrin_rule_llvm.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index 2cc3d2f2959d..138df3871daf 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -185,7 +185,7 @@ TVM_REGISTER_OP("tir.asin") PrimExpr term11 = term9 * x2 * make_const(x.dtype(), 3969) / make_const(x.dtype(), 28160); PrimExpr series = term1 + term3 + term5 + term7 + term9 + term11; - PrimExpr lib_result = intrin::DispatchPureExtern(e); + PrimExpr lib_result = ::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e); PrimExpr lower = make_const(x.dtype(), -1.0); PrimExpr upper = make_const(x.dtype(), 1.0); @@ -211,7 +211,7 @@ TVM_REGISTER_OP("tir.acos") PrimExpr asin_x = asin(x); PrimExpr formula_result = half_pi - asin_x; - PrimExpr lib_result = intrin::DispatchPureExtern(e); + PrimExpr lib_result = ::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e); PrimExpr lower = make_const(x.dtype(), -1.0); PrimExpr upper = make_const(x.dtype(), 1.0); From 07d8c2155f5561f6fbe11f916877bcd1d783016f Mon Sep 17 00:00:00 2001 From: Dayuxiaoshui <792179245@qq.com> Date: Fri, 12 Dec 2025 07:10:49 +0000 Subject: [PATCH 6/7] Fix line length lint errors in intrin_rule_llvm.cc --- src/target/llvm/intrin_rule_llvm.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index 138df3871daf..14a2772a8af4 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -185,7 +185,8 @@ TVM_REGISTER_OP("tir.asin") PrimExpr term11 = term9 * x2 * make_const(x.dtype(), 3969) / make_const(x.dtype(), 28160); PrimExpr series = term1 + term3 + term5 + term7 + term9 + term11; - PrimExpr lib_result = ::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e); + PrimExpr lib_result = + ::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e); PrimExpr lower = make_const(x.dtype(), -1.0); PrimExpr upper = make_const(x.dtype(), 1.0); @@ -211,7 +212,8 @@ TVM_REGISTER_OP("tir.acos") PrimExpr asin_x = asin(x); PrimExpr formula_result = half_pi - asin_x; - PrimExpr lib_result = ::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e); + PrimExpr lib_result = + ::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e); PrimExpr lower = make_const(x.dtype(), -1.0); PrimExpr upper = make_const(x.dtype(), 1.0); From b584e10876f82f8267f0bfc061f64180d435d909 Mon Sep 17 00:00:00 2001 From: Dayuxiaoshui <792179245@qq.com> Date: Sun, 14 Dec 2025 09:21:00 +0000 Subject: [PATCH 7/7] Improve asin/acos precision by adjusting threshold and add boundary tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Lower threshold from 0.9 to 0.5 for asin/acos legalization * For |x| >= 0.5: use system library function for better accuracy * For |x| < 0.5: use Taylor series for efficiency * This improves precision for values near the threshold while maintaining performance for smaller values - Add test_asin_acos_boundary_values() test function * Tests boundary values (±1.0) * Tests threshold switching point (±0.5) * Tests values below threshold (±0.49, ±0.3, 0.0) * Tests out-of-domain values (should return NaN) This addresses precision issues with asin/acos near boundary values. --- src/target/llvm/intrin_rule_llvm.cc | 4 +- tests/python/tir-base/test_tir_intrin.py | 53 ++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index 14a2772a8af4..a8a3d911ca8e 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -172,7 +172,7 @@ TVM_REGISTER_OP("tir.asin") ICHECK(call != nullptr); const PrimExpr& x = call->args[0]; - PrimExpr threshold = make_const(x.dtype(), 0.9); + PrimExpr threshold = make_const(x.dtype(), 0.5); PrimExpr abs_x = tvm::abs(x); PrimExpr use_lib = abs_x >= threshold; @@ -204,7 +204,7 @@ TVM_REGISTER_OP("tir.acos") ICHECK(call != nullptr) << "Invalid call node in acos legalization"; const PrimExpr& x = call->args[0]; - PrimExpr threshold = make_const(x.dtype(), 0.9); + PrimExpr threshold = make_const(x.dtype(), 0.5); PrimExpr abs_x = tvm::abs(x); PrimExpr use_lib = abs_x >= threshold; diff --git a/tests/python/tir-base/test_tir_intrin.py b/tests/python/tir-base/test_tir_intrin.py index 8dabdbb344f3..1e8c88e08e65 100644 --- a/tests/python/tir-base/test_tir_intrin.py +++ b/tests/python/tir-base/test_tir_intrin.py @@ -135,6 +135,58 @@ def run_test(tvm_intrin, np_func, atol=1e-5, rtol=1e-5): run_test(*func, atol, rtol) +def test_asin_acos_boundary_values(): + """Test asin and acos with boundary values and threshold switching.""" + test_funcs = [ + (tvm.tir.asin, lambda x: np.arcsin(x)), + (tvm.tir.acos, lambda x: np.arccos(x)), + ] + + def run_test(tvm_intrin, np_func): + m = te.var("m") + A = te.placeholder((m,), name="A") + B = te.compute((m,), lambda *i: tvm_intrin(A(*i)), name="B") + + mod = te.create_prim_func([A, B]) + sch = tir.Schedule(mod) + func = tvm.compile(sch.mod, target="llvm") + + dev = tvm.cpu(0) + + # Test boundary values: ±1.0 (should use system library) + boundary_values = np.array([1.0, -1.0], dtype=np.float32) + a1 = tvm.runtime.tensor(boundary_values, dev) + b1 = tvm.runtime.tensor(np.empty_like(boundary_values), dev) + func(a1, b1) + tvm.testing.assert_allclose(b1.numpy(), np_func(boundary_values), atol=1e-5, rtol=1e-5) + + # Test values at threshold: ±0.5 (should use system library) + threshold_values = np.array([0.5, -0.5], dtype=np.float32) + a2 = tvm.runtime.tensor(threshold_values, dev) + b2 = tvm.runtime.tensor(np.empty_like(threshold_values), dev) + func(a2, b2) + tvm.testing.assert_allclose(b2.numpy(), np_func(threshold_values), atol=1e-4, rtol=1e-4) + + # Test values just below threshold: ±0.49 (should use Taylor series) + below_threshold_values = np.array([0.49, -0.49, 0.3, -0.3, 0.0], dtype=np.float32) + a3 = tvm.runtime.tensor(below_threshold_values, dev) + b3 = tvm.runtime.tensor(np.empty_like(below_threshold_values), dev) + func(a3, b3) + tvm.testing.assert_allclose( + b3.numpy(), np_func(below_threshold_values), atol=1e-3, rtol=1e-3 + ) + + # Test out-of-domain values: should return NaN + out_of_domain = np.array([1.1, -1.1, 2.0, -2.0], dtype=np.float32) + a4 = tvm.runtime.tensor(out_of_domain, dev) + b4 = tvm.runtime.tensor(np.empty_like(out_of_domain), dev) + func(a4, b4) + assert np.all(np.isnan(b4.numpy())), "Out-of-domain inputs should return NaN" + + for func in test_funcs: + run_test(*func) + + def test_binary_intrin(): test_funcs = [ (tvm.tir.atan2, lambda x1, x2: np.arctan2(x1, x2)), @@ -315,6 +367,7 @@ def test_fma(): test_nearbyint() test_unary_intrin() test_round_intrinsics_on_int() + test_asin_acos_boundary_values() test_binary_intrin() test_ldexp() test_clz()