From 80f174951804d0ab87bd4dc769dba11c8fe93075 Mon Sep 17 00:00:00 2001
From: Stephen Toub <stoub@microsoft.com>
Date: Wed, 1 Apr 2026 11:02:44 -0400
Subject: [PATCH 1/2] Vectorize Acosh for Vector64/128/256/512 and
 TensorPrimitives

Add vectorized Acosh implementations for float and double across all SIMD vector types.
- AcoshDouble: uses log identity (log(x + sqrt(x^2 - 1))) for vectorization
- AcoshSingle: widens to double and calls AcoshDouble
- Hook up TensorPrimitives.Acosh to use vectorized implementations

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../Tensors/netcore/TensorPrimitives.Acosh.cs | 65 +++++++++++++--
 .../tests/TensorPrimitives.Generic.cs         |  2 +-
 .../System/Runtime/Intrinsics/Vector128.cs    | 47 +++++++++++
 .../System/Runtime/Intrinsics/Vector256.cs    | 47 +++++++++++
 .../System/Runtime/Intrinsics/Vector512.cs    | 40 ++++++++++
 .../src/System/Runtime/Intrinsics/Vector64.cs | 55 +++++++++++++
 .../System/Runtime/Intrinsics/VectorMath.cs   | 79 +++++++++++++++++++
 .../ref/System.Runtime.Intrinsics.cs          |  8 ++
 8 files changed, 337 insertions(+), 6 deletions(-)
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Acosh.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Acosh.cs
index 7313cfe15f2d5d..bbc1f34ed93cdd 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Acosh.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Acosh.cs
@@ -1,6 +1,7 @@
 ﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Diagnostics;
 using System.Runtime.Intrinsics;
 
 namespace System.Numerics.Tensors
@@ -26,14 +27,68 @@ public static void Acosh<T>(ReadOnlySpan<T> x, Span<T> destination)
             InvokeSpanIntoSpan<T, AcoshOperator<T>>(x, destination);
 
         /// <summary>T.Acosh(x)</summary>
-        private readonly struct AcoshOperator<T> : IUnaryOperator<T, T>
+        internal readonly struct AcoshOperator<T> : IUnaryOperator<T, T>
             where T : IHyperbolicFunctions<T>
         {
-            public static bool Vectorizable => false; // TODO: Vectorize
+            public static bool Vectorizable =>
+#if NET11_0_OR_GREATER
+                typeof(T) == typeof(float) || typeof(T) == typeof(double);
+#else
+                false;
+#endif
+
             public static T Invoke(T x) => T.Acosh(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+#if NET11_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector128.Acosh(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector128.Acosh(x.AsSingle()).As<float, T>();
+                }
+#else
+                throw new NotSupportedException();
+#endif
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+#if NET11_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector256.Acosh(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector256.Acosh(x.AsSingle()).As<float, T>();
+                }
+#else
+                throw new NotSupportedException();
+#endif
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+#if NET11_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector512.Acosh(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector512.Acosh(x.AsSingle()).As<float, T>();
+                }
+#else
+                throw new NotSupportedException();
+#endif
+            }
         }
     }
 }
diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs
index 771813177bb19c..16fa3052e9b7b5 100644
--- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs
+++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs
@@ -457,7 +457,7 @@ public static IEnumerable<object[]> SpanDestinationFunctionsToTest()
             // The current trigonometric algorithm depends on hardware FMA support for best precision.
             T? trigTolerance = IsFmaSupported ? null : Helpers.DetermineTolerance<T>(doubleTolerance: 1e-10, floatTolerance: 1e-4f);
 
-            yield return Create(TensorPrimitives.Acosh, T.Acosh);
+            yield return Create(TensorPrimitives.Acosh, T.Acosh, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14, floatTolerance: 1e-6f));
             yield return Create(TensorPrimitives.AcosPi, T.AcosPi);
             yield return Create(TensorPrimitives.Acos, T.Acos);
             yield return Create(TensorPrimitives.Asinh, T.Asinh);
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
index dc20ffbd556bf5..fc14df4cbc01fc 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
@@ -854,6 +854,53 @@ public static Vector128<float> Asin(Vector128<float> vector)
             }
         }
 
+        /// <summary>Computes the inverse hyperbolic cosine of each element in a vector.</summary>
+        /// <param name="vector">The vector whose inverse hyperbolic cosine is to be computed.</param>
+        /// <returns>A vector whose elements are the inverse hyperbolic cosine of the corresponding elements in <paramref name="vector" />.</returns>
+        /// <remarks>The input should be greater than or equal to 1.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<double> Acosh(Vector128<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.AcoshDouble<Vector128<double>, Vector128<long>, Vector128<ulong>>(vector);
+            }
+            else
+            {
+                return Create(
+                    Vector64.Acosh(vector._lower),
+                    Vector64.Acosh(vector._upper)
+                );
+            }
+        }
+
+        /// <summary>Computes the inverse hyperbolic cosine of each element in a vector.</summary>
+        /// <param name="vector">The vector whose inverse hyperbolic cosine is to be computed.</param>
+        /// <returns>A vector whose elements are the inverse hyperbolic cosine of the corresponding elements in <paramref name="vector" />.</returns>
+        /// <remarks>The input should be greater than or equal to 1.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> Acosh(Vector128<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                if (Vector256.IsHardwareAccelerated)
+                {
+                    return VectorMath.AcoshSingle<Vector128<float>, Vector128<int>, Vector128<uint>, Vector256<double>, Vector256<long>, Vector256<ulong>>(vector);
+                }
+                else
+                {
+                    return VectorMath.AcoshSingle<Vector128<float>, Vector128<int>, Vector128<uint>, Vector128<double>, Vector128<long>, Vector128<ulong>>(vector);
+                }
+            }
+            else
+            {
+                return Create(
+                    Vector64.Acosh(vector._lower),
+                    Vector64.Acosh(vector._upper)
+                );
+            }
+        }
+
         /// <inheritdoc cref="Vector64.Cos(Vector64{double})" />
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<double> Cos(Vector128<double> vector)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
index c51c1f5329ef74..69b699375d73a2 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
@@ -855,6 +855,53 @@ public static Vector256<float> Asin(Vector256<float> vector)
             }
         }
 
+        /// <summary>Computes the inverse hyperbolic cosine of each element in a vector.</summary>
+        /// <param name="vector">The vector whose inverse hyperbolic cosine is to be computed.</param>
+        /// <returns>A vector whose elements are the inverse hyperbolic cosine of the corresponding elements in <paramref name="vector" />.</returns>
+        /// <remarks>The input should be greater than or equal to 1.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<double> Acosh(Vector256<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.AcoshDouble<Vector256<double>, Vector256<long>, Vector256<ulong>>(vector);
+            }
+            else
+            {
+                return Create(
+                    Vector128.Acosh(vector._lower),
+                    Vector128.Acosh(vector._upper)
+                );
+            }
+        }
+
+        /// <summary>Computes the inverse hyperbolic cosine of each element in a vector.</summary>
+        /// <param name="vector">The vector whose inverse hyperbolic cosine is to be computed.</param>
+        /// <returns>A vector whose elements are the inverse hyperbolic cosine of the corresponding elements in <paramref name="vector" />.</returns>
+        /// <remarks>The input should be greater than or equal to 1.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<float> Acosh(Vector256<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                if (Vector512.IsHardwareAccelerated)
+                {
+                    return VectorMath.AcoshSingle<Vector256<float>, Vector256<int>, Vector256<uint>, Vector512<double>, Vector512<long>, Vector512<ulong>>(vector);
+                }
+                else
+                {
+                    return VectorMath.AcoshSingle<Vector256<float>, Vector256<int>, Vector256<uint>, Vector256<double>, Vector256<long>, Vector256<ulong>>(vector);
+                }
+            }
+            else
+            {
+                return Create(
+                    Vector128.Acosh(vector._lower),
+                    Vector128.Acosh(vector._upper)
+                );
+            }
+        }
+
         /// <inheritdoc cref="Vector128.Cos(Vector128{double})" />
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<double> Cos(Vector256<double> vector)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
index 5bc4b5e0964a52..c2267540a49aa5 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
@@ -758,6 +758,46 @@ public static Vector512<float> Asin(Vector512<float> vector)
             }
         }
 
+        /// <summary>Computes the inverse hyperbolic cosine of each element in a vector.</summary>
+        /// <param name="vector">The vector whose inverse hyperbolic cosine is to be computed.</param>
+        /// <returns>A vector whose elements are the inverse hyperbolic cosine of the corresponding elements in <paramref name="vector" />.</returns>
+        /// <remarks>The input should be greater than or equal to 1.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector512<double> Acosh(Vector512<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.AcoshDouble<Vector512<double>, Vector512<long>, Vector512<ulong>>(vector);
+            }
+            else
+            {
+                return Create(
+                    Vector256.Acosh(vector._lower),
+                    Vector256.Acosh(vector._upper)
+                );
+            }
+        }
+
+        /// <summary>Computes the inverse hyperbolic cosine of each element in a vector.</summary>
+        /// <param name="vector">The vector whose inverse hyperbolic cosine is to be computed.</param>
+        /// <returns>A vector whose elements are the inverse hyperbolic cosine of the corresponding elements in <paramref name="vector" />.</returns>
+        /// <remarks>The input should be greater than or equal to 1.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector512<float> Acosh(Vector512<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.AcoshSingle<Vector512<float>, Vector512<int>, Vector512<uint>, Vector512<double>, Vector512<long>, Vector512<ulong>>(vector);
+            }
+            else
+            {
+                return Create(
+                    Vector256.Acosh(vector._lower),
+                    Vector256.Acosh(vector._upper)
+                );
+            }
+        }
+
         /// <inheritdoc cref="Vector256.Cos(Vector256{double})" />
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<double> Cos(Vector512<double> vector)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
index 7077fe391347e6..6382ec6d536f30 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
@@ -819,6 +819,47 @@ public static Vector64<float> Asin(Vector64<float> vector)
             }
         }
 
+        /// <summary>Computes the inverse hyperbolic cosine of each element in a vector.</summary>
+        /// <param name="vector">The vector whose inverse hyperbolic cosine is to be computed.</param>
+        /// <returns>A vector whose elements are the inverse hyperbolic cosine of the corresponding elements in <paramref name="vector" />.</returns>
+        /// <remarks>The input should be greater than or equal to 1.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<double> Acosh(Vector64<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.AcoshDouble<Vector64<double>, Vector64<long>, Vector64<ulong>>(vector);
+            }
+            else
+            {
+                return Acosh<double>(vector);
+            }
+        }
+
+        /// <summary>Computes the inverse hyperbolic cosine of each element in a vector.</summary>
+        /// <param name="vector">The vector whose inverse hyperbolic cosine is to be computed.</param>
+        /// <returns>A vector whose elements are the inverse hyperbolic cosine of the corresponding elements in <paramref name="vector" />.</returns>
+        /// <remarks>The input should be greater than or equal to 1.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<float> Acosh(Vector64<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                if (Vector128.IsHardwareAccelerated)
+                {
+                    return VectorMath.AcoshSingle<Vector64<float>, Vector64<int>, Vector64<uint>, Vector128<double>, Vector128<long>, Vector128<ulong>>(vector);
+                }
+                else
+                {
+                    return VectorMath.AcoshSingle<Vector64<float>, Vector64<int>, Vector64<uint>, Vector64<double>, Vector64<long>, Vector64<ulong>>(vector);
+                }
+            }
+            else
+            {
+                return Acosh<float>(vector);
+            }
+        }
+
         /// <summary>Computes the cos of each element in a vector.</summary>
         /// <param name="vector">The vector that will have its Cos computed.</param>
         /// <returns>A vector whose elements are the cos of the elements in <paramref name="vector" />.</returns>
@@ -3713,6 +3754,20 @@ internal static Vector64<T> Asin<T>(Vector64<T> vector)
             return result;
         }
 
+        internal static Vector64<T> Acosh<T>(Vector64<T> vector)
+            where T : IHyperbolicFunctions<T>
+        {
+            Unsafe.SkipInit(out Vector64<T> result);
+
+            for (int index = 0; index < Vector64<T>.Count; index++)
+            {
+                T value = T.Acosh(vector.GetElementUnsafe(index));
+                result.SetElementUnsafe(index, value);
+            }
+
+            return result;
+        }
+
         internal static Vector64<T> Sin<T>(Vector64<T> vector)
             where T : ITrigonometricFunctions<T>
         {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
index bdbe668efa326b..c179e6c0710e14 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
@@ -3221,5 +3221,84 @@ private static TVectorDouble AsinSingleCoreDouble<TVectorDouble>(TVectorDouble a
 
             return ax + ax * g * poly + (TVectorDouble.Create(PIBY2) & gtHalf);
         }
+
+        public static TVectorDouble AcoshDouble<TVectorDouble, TVectorInt64, TVectorUInt64>(TVectorDouble x)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+            where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
+            where TVectorUInt64 : unmanaged, ISimdVector<TVectorUInt64, ulong>
+        {
+            // The AMD AOCL-LibM scalar acosh implementation (acosh.c) uses range-based
+            // polynomial lookup tables which cannot be trivially vectorized due to the cost
+            // of gather instructions. Instead, this uses the mathematical identity:
+            //   acosh(x) = log(x + sqrt(x^2 - 1))
+            // with special handling for x near 1 and large x for improved accuracy.
+
+            const double LN2 = 0.693147180559945309417;
+            const double NEAR_ONE_THRESHOLD = 1.0 + 2.98023223876953125e-08; // 1 + 2^-25
+            const double LARGE_THRESHOLD = 268435456.0; // 2^28
+
+            // Return NaN for x < 1
+            TVectorDouble nanMask = TVectorDouble.LessThan(x, TVectorDouble.One);
+
+            // For x close to 1 (1 < x <= 1 + 2^-25), use sqrt(2 * (x - 1))
+            TVectorDouble nearOneMask = TVectorDouble.LessThanOrEqual(x, TVectorDouble.Create(NEAR_ONE_THRESHOLD));
+
+            // For large values (x > 2^28), use log(2) + log(x)
+            TVectorDouble largeMask = TVectorDouble.GreaterThan(x, TVectorDouble.Create(LARGE_THRESHOLD));
+
+            // Normal case: log(x + sqrt(x^2 - 1))
+            TVectorDouble x2 = x * x;
+            TVectorDouble sqrtArg = x2 - TVectorDouble.One;
+            TVectorDouble normal = LogDouble<TVectorDouble, TVectorInt64, TVectorUInt64>(x + TVectorDouble.Sqrt(sqrtArg));
+
+            // Large value case: log(2) + log(x)
+            TVectorDouble large = TVectorDouble.Create(LN2) + LogDouble<TVectorDouble, TVectorInt64, TVectorUInt64>(x);
+
+            // Near one case: sqrt(2 * (x - 1))
+            TVectorDouble nearOne = TVectorDouble.Sqrt(TVectorDouble.Create(2.0) * (x - TVectorDouble.One));
+
+            // Select appropriate result based on magnitude
+            TVectorDouble result = TVectorDouble.ConditionalSelect(largeMask, large, normal);
+            result = TVectorDouble.ConditionalSelect(nearOneMask, nearOne, result);
+            result = TVectorDouble.ConditionalSelect(nanMask, TVectorDouble.Create(double.NaN), result);
+
+            return result;
+        }
+
+        public static TVectorSingle AcoshSingle<TVectorSingle, TVectorInt32, TVectorUInt32, TVectorDouble, TVectorInt64, TVectorUInt64>(TVectorSingle x)
+            where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
+            where TVectorInt32 : unmanaged, ISimdVector<TVectorInt32, int>
+            where TVectorUInt32 : unmanaged, ISimdVector<TVectorUInt32, uint>
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+            where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
+            where TVectorUInt64 : unmanaged, ISimdVector<TVectorUInt64, ulong>
+        {
+            // This code is based on `acoshf` from amd/aocl-libm-ose
+            // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // AMD acoshf.c uses mathematical identities (no polynomial approximation):
+            // For x > 1/sqrt(eps): acosh(x) = log(2) + log(x)
+            // For 2 < x <= 1/sqrt(eps): acosh(x) = log(x + sqrt(x^2 - 1))
+            // For sqrt(eps) <= x <= 2: t=x-1, acosh(x) = log1p(t + sqrt(2t + t^2))
+            // Widens to double for improved accuracy, matching AMD acoshf.c behavior.
+
+            if (TVectorSingle.ElementCount == TVectorDouble.ElementCount)
+            {
+                TVectorDouble dx = Widen<TVectorSingle, TVectorDouble>(x);
+                return Narrow<TVectorDouble, TVectorSingle>(AcoshDouble<TVectorDouble, TVectorInt64, TVectorUInt64>(dx));
+            }
+            else
+            {
+                TVectorDouble dxLo = WidenLower<TVectorSingle, TVectorDouble>(x);
+                TVectorDouble dxHi = WidenUpper<TVectorSingle, TVectorDouble>(x);
+                return Narrow<TVectorDouble, TVectorSingle>(
+                    AcoshDouble<TVectorDouble, TVectorInt64, TVectorUInt64>(dxLo),
+                    AcoshDouble<TVectorDouble, TVectorInt64, TVectorUInt64>(dxHi)
+                );
+            }
+        }
     }
 }
diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
index ada0999ed91e96..ef42ac72dcd810 100644
--- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
+++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
@@ -21,6 +21,8 @@ public static partial class Vector128
         public static bool AnyWhereAllBitsSet<T>(System.Runtime.Intrinsics.Vector128<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<double> Asin(System.Runtime.Intrinsics.Vector128<double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<float> Asin(System.Runtime.Intrinsics.Vector128<float> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<double> Acosh(System.Runtime.Intrinsics.Vector128<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> Acosh(System.Runtime.Intrinsics.Vector128<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<System.Byte> AsByte<T>(this System.Runtime.Intrinsics.Vector128<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<System.Double> AsDouble<T>(this System.Runtime.Intrinsics.Vector128<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<System.Int16> AsInt16<T>(this System.Runtime.Intrinsics.Vector128<T> vector) { throw null; }
@@ -484,6 +486,8 @@ public static partial class Vector256
         public static bool AnyWhereAllBitsSet<T>(System.Runtime.Intrinsics.Vector256<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<double> Asin(System.Runtime.Intrinsics.Vector256<double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<float> Asin(System.Runtime.Intrinsics.Vector256<float> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<double> Acosh(System.Runtime.Intrinsics.Vector256<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<float> Acosh(System.Runtime.Intrinsics.Vector256<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<System.Byte> AsByte<T>(this System.Runtime.Intrinsics.Vector256<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<System.Double> AsDouble<T>(this System.Runtime.Intrinsics.Vector256<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<System.Int16> AsInt16<T>(this System.Runtime.Intrinsics.Vector256<T> vector) { throw null; }
@@ -936,6 +940,8 @@ public static partial class Vector512
         public static bool AnyWhereAllBitsSet<T>(System.Runtime.Intrinsics.Vector512<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> Asin(System.Runtime.Intrinsics.Vector512<double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> Asin(System.Runtime.Intrinsics.Vector512<float> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> Acosh(System.Runtime.Intrinsics.Vector512<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> Acosh(System.Runtime.Intrinsics.Vector512<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<System.Byte> AsByte<T>(this System.Runtime.Intrinsics.Vector512<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<System.Double> AsDouble<T>(this System.Runtime.Intrinsics.Vector512<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<System.Int16> AsInt16<T>(this System.Runtime.Intrinsics.Vector512<T> vector) { throw null; }
@@ -1387,6 +1393,8 @@ public static partial class Vector64
         public static bool AnyWhereAllBitsSet<T>(System.Runtime.Intrinsics.Vector64<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<double> Asin(System.Runtime.Intrinsics.Vector64<double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<float> Asin(System.Runtime.Intrinsics.Vector64<float> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<double> Acosh(System.Runtime.Intrinsics.Vector64<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<float> Acosh(System.Runtime.Intrinsics.Vector64<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<System.Byte> AsByte<T>(this System.Runtime.Intrinsics.Vector64<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<System.Double> AsDouble<T>(this System.Runtime.Intrinsics.Vector64<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<System.Int16> AsInt16<T>(this System.Runtime.Intrinsics.Vector64<T> vector) { throw null; }

From 3544d6622a612e056dc01513f77f9028fc4150f4 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 1 Apr 2026 19:07:47 +0000
Subject: [PATCH 2/2] Address PR feedback: fix alphabetical ordering, improve
 near-one accuracy, fix comment

Agent-Logs-Url: https://github.com/dotnet/runtime/sessions/43d57cf9-0cfe-4d16-9977-8c3294328c02

Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com>
---
 .../System/Runtime/Intrinsics/VectorMath.cs   | 31 +++++++------------
 .../ref/System.Runtime.Intrinsics.cs          | 16 +++++-----
 2 files changed, 20 insertions(+), 27 deletions(-)

diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
index c179e6c0710e14..0598aeca7260d9 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
@@ -3230,36 +3230,30 @@ public static TVectorDouble AcoshDouble<TVectorDouble, TVectorInt64, TVectorUInt
             // The AMD AOCL-LibM scalar acosh implementation (acosh.c) uses range-based
             // polynomial lookup tables which cannot be trivially vectorized due to the cost
             // of gather instructions. Instead, this uses the mathematical identity:
-            //   acosh(x) = log(x + sqrt(x^2 - 1))
-            // with special handling for x near 1 and large x for improved accuracy.
+            //   acosh(x) = log(x + sqrt((x - 1) * (x + 1)))
+            // using (x-1)*(x+1) instead of x^2-1 to avoid catastrophic cancellation near x=1,
+            // with special handling for large x for improved accuracy.
 
             const double LN2 = 0.693147180559945309417;
-            const double NEAR_ONE_THRESHOLD = 1.0 + 2.98023223876953125e-08; // 1 + 2^-25
             const double LARGE_THRESHOLD = 268435456.0; // 2^28
 
             // Return NaN for x < 1
             TVectorDouble nanMask = TVectorDouble.LessThan(x, TVectorDouble.One);
 
-            // For x close to 1 (1 < x <= 1 + 2^-25), use sqrt(2 * (x - 1))
-            TVectorDouble nearOneMask = TVectorDouble.LessThanOrEqual(x, TVectorDouble.Create(NEAR_ONE_THRESHOLD));
-
             // For large values (x > 2^28), use log(2) + log(x)
             TVectorDouble largeMask = TVectorDouble.GreaterThan(x, TVectorDouble.Create(LARGE_THRESHOLD));
 
-            // Normal case: log(x + sqrt(x^2 - 1))
-            TVectorDouble x2 = x * x;
-            TVectorDouble sqrtArg = x2 - TVectorDouble.One;
-            TVectorDouble normal = LogDouble<TVectorDouble, TVectorInt64, TVectorUInt64>(x + TVectorDouble.Sqrt(sqrtArg));
+            // Normal case: log(x + sqrt((x - 1) * (x + 1)))
+            // Using (x-1)*(x+1) avoids catastrophic cancellation when x is near 1
+            TVectorDouble xm1 = x - TVectorDouble.One;
+            TVectorDouble xp1 = x + TVectorDouble.One;
+            TVectorDouble normal = LogDouble<TVectorDouble, TVectorInt64, TVectorUInt64>(x + TVectorDouble.Sqrt(xm1 * xp1));
 
             // Large value case: log(2) + log(x)
             TVectorDouble large = TVectorDouble.Create(LN2) + LogDouble<TVectorDouble, TVectorInt64, TVectorUInt64>(x);
 
-            // Near one case: sqrt(2 * (x - 1))
-            TVectorDouble nearOne = TVectorDouble.Sqrt(TVectorDouble.Create(2.0) * (x - TVectorDouble.One));
-
             // Select appropriate result based on magnitude
             TVectorDouble result = TVectorDouble.ConditionalSelect(largeMask, large, normal);
-            result = TVectorDouble.ConditionalSelect(nearOneMask, nearOne, result);
             result = TVectorDouble.ConditionalSelect(nanMask, TVectorDouble.Create(double.NaN), result);
 
             return result;
@@ -3279,11 +3273,10 @@ public static TVectorSingle AcoshSingle<TVectorSingle, TVectorInt32, TVectorUInt
             // Licensed under the BSD 3-Clause "New" or "Revised" License
             // See THIRD-PARTY-NOTICES.TXT for the full license text
 
-            // AMD acoshf.c uses mathematical identities (no polynomial approximation):
-            // For x > 1/sqrt(eps): acosh(x) = log(2) + log(x)
-            // For 2 < x <= 1/sqrt(eps): acosh(x) = log(x + sqrt(x^2 - 1))
-            // For sqrt(eps) <= x <= 2: t=x-1, acosh(x) = log1p(t + sqrt(2t + t^2))
-            // Widens to double for improved accuracy, matching AMD acoshf.c behavior.
+            // This implementation computes single-precision acosh by widening the
+            // input to double precision, calling AcoshDouble, and then narrowing
+            // the result back to single precision. AcoshDouble uses mathematical
+            // identities (no polynomial approximation) for improved accuracy.
 
             if (TVectorSingle.ElementCount == TVectorDouble.ElementCount)
             {
diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
index ef42ac72dcd810..6c891ddd5f22cc 100644
--- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
+++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
@@ -19,10 +19,10 @@ public static partial class Vector128
         public static System.Runtime.Intrinsics.Vector128<T> AndNot<T>(System.Runtime.Intrinsics.Vector128<T> left, System.Runtime.Intrinsics.Vector128<T> right) { throw null; }
         public static bool Any<T>(System.Runtime.Intrinsics.Vector128<T> vector, T value) { throw null; }
         public static bool AnyWhereAllBitsSet<T>(System.Runtime.Intrinsics.Vector128<T> vector) { throw null; }
-        public static System.Runtime.Intrinsics.Vector128<double> Asin(System.Runtime.Intrinsics.Vector128<double> vector) { throw null; }
-        public static System.Runtime.Intrinsics.Vector128<float> Asin(System.Runtime.Intrinsics.Vector128<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<double> Acosh(System.Runtime.Intrinsics.Vector128<double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<float> Acosh(System.Runtime.Intrinsics.Vector128<float> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<double> Asin(System.Runtime.Intrinsics.Vector128<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> Asin(System.Runtime.Intrinsics.Vector128<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<System.Byte> AsByte<T>(this System.Runtime.Intrinsics.Vector128<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<System.Double> AsDouble<T>(this System.Runtime.Intrinsics.Vector128<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<System.Int16> AsInt16<T>(this System.Runtime.Intrinsics.Vector128<T> vector) { throw null; }
@@ -484,10 +484,10 @@ public static partial class Vector256
         public static System.Runtime.Intrinsics.Vector256<T> AndNot<T>(System.Runtime.Intrinsics.Vector256<T> left, System.Runtime.Intrinsics.Vector256<T> right) { throw null; }
         public static bool Any<T>(System.Runtime.Intrinsics.Vector256<T> vector, T value) { throw null; }
         public static bool AnyWhereAllBitsSet<T>(System.Runtime.Intrinsics.Vector256<T> vector) { throw null; }
-        public static System.Runtime.Intrinsics.Vector256<double> Asin(System.Runtime.Intrinsics.Vector256<double> vector) { throw null; }
-        public static System.Runtime.Intrinsics.Vector256<float> Asin(System.Runtime.Intrinsics.Vector256<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<double> Acosh(System.Runtime.Intrinsics.Vector256<double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<float> Acosh(System.Runtime.Intrinsics.Vector256<float> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<double> Asin(System.Runtime.Intrinsics.Vector256<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<float> Asin(System.Runtime.Intrinsics.Vector256<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<System.Byte> AsByte<T>(this System.Runtime.Intrinsics.Vector256<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<System.Double> AsDouble<T>(this System.Runtime.Intrinsics.Vector256<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<System.Int16> AsInt16<T>(this System.Runtime.Intrinsics.Vector256<T> vector) { throw null; }
@@ -938,10 +938,10 @@ public static partial class Vector512
         public static System.Runtime.Intrinsics.Vector512<T> AndNot<T>(System.Runtime.Intrinsics.Vector512<T> left, System.Runtime.Intrinsics.Vector512<T> right) { throw null; }
         public static bool Any<T>(System.Runtime.Intrinsics.Vector512<T> vector, T value) { throw null; }
         public static bool AnyWhereAllBitsSet<T>(System.Runtime.Intrinsics.Vector512<T> vector) { throw null; }
-        public static System.Runtime.Intrinsics.Vector512<double> Asin(System.Runtime.Intrinsics.Vector512<double> vector) { throw null; }
-        public static System.Runtime.Intrinsics.Vector512<float> Asin(System.Runtime.Intrinsics.Vector512<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> Acosh(System.Runtime.Intrinsics.Vector512<double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> Acosh(System.Runtime.Intrinsics.Vector512<float> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> Asin(System.Runtime.Intrinsics.Vector512<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> Asin(System.Runtime.Intrinsics.Vector512<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<System.Byte> AsByte<T>(this System.Runtime.Intrinsics.Vector512<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<System.Double> AsDouble<T>(this System.Runtime.Intrinsics.Vector512<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<System.Int16> AsInt16<T>(this System.Runtime.Intrinsics.Vector512<T> vector) { throw null; }
@@ -1391,10 +1391,10 @@ public static partial class Vector64
         public static System.Runtime.Intrinsics.Vector64<T> AndNot<T>(System.Runtime.Intrinsics.Vector64<T> left, System.Runtime.Intrinsics.Vector64<T> right) { throw null; }
         public static bool Any<T>(System.Runtime.Intrinsics.Vector64<T> vector, T value) { throw null; }
         public static bool AnyWhereAllBitsSet<T>(System.Runtime.Intrinsics.Vector64<T> vector) { throw null; }
-        public static System.Runtime.Intrinsics.Vector64<double> Asin(System.Runtime.Intrinsics.Vector64<double> vector) { throw null; }
-        public static System.Runtime.Intrinsics.Vector64<float> Asin(System.Runtime.Intrinsics.Vector64<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<double> Acosh(System.Runtime.Intrinsics.Vector64<double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<float> Acosh(System.Runtime.Intrinsics.Vector64<float> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<double> Asin(System.Runtime.Intrinsics.Vector64<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<float> Asin(System.Runtime.Intrinsics.Vector64<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<System.Byte> AsByte<T>(this System.Runtime.Intrinsics.Vector64<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<System.Double> AsDouble<T>(this System.Runtime.Intrinsics.Vector64<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<System.Int16> AsInt16<T>(this System.Runtime.Intrinsics.Vector64<T> vector) { throw null; }