From a2461ab521e376a40f6552558c04c1b256a27433 Mon Sep 17 00:00:00 2001
From: Stephen Toub <stoub@microsoft.com>
Date: Wed, 1 Apr 2026 12:36:19 -0400
Subject: [PATCH 1/2] Add vectorized Atan2 implementations for
 Vector64/128/256/512

Ports AMD AOCL-LibM atan2 algorithm to vectorized form. Since the AMD
scalar atan2 uses a 241-entry lookup table that cannot be trivially
vectorized, this implements atan2(y,x) using the vectorized AtanDouble
with quadrant adjustments.

Includes:
- VectorMath: AtanDouble (dependency), Atan2Double, Atan2Single
- Vector64/128/256/512: Atan2(double), Atan2(float) overloads
- TensorPrimitives.Atan2: Vectorized operator
- Test tolerance updates for Atan2 and Atan2Pi

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../Tensors/netcore/TensorPrimitives.Atan2.cs |  63 ++++-
 .../tests/TensorPrimitives.Generic.cs         |  12 +-
 .../tests/TensorTests.cs                      |  36 ++-
 .../System/Runtime/Intrinsics/Vector128.cs    |  49 ++++
 .../System/Runtime/Intrinsics/Vector256.cs    |  42 ++++
 .../System/Runtime/Intrinsics/Vector512.cs    |  42 ++++
 .../src/System/Runtime/Intrinsics/Vector64.cs |  57 +++++
 .../System/Runtime/Intrinsics/VectorMath.cs   | 225 ++++++++++++++++++
 .../ref/System.Runtime.Intrinsics.cs          |   8 +
 9 files changed, 522 insertions(+), 12 deletions(-)
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atan2.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atan2.cs
index 310738623184f4..1b39d1afc0b222 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atan2.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atan2.cs
@@ -1,6 +1,7 @@
 ﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Diagnostics;
 using System.Runtime.Intrinsics;
 
 namespace System.Numerics.Tensors
@@ -70,11 +71,65 @@ public static void Atan2<T>(T y, ReadOnlySpan<T> x, Span<T> destination)
         private readonly struct Atan2Operator<T> : IBinaryOperator<T>
             where T : IFloatingPointIeee754<T>
         {
-            public static bool Vectorizable => false; // TODO: Vectorize
+            public static bool Vectorizable =>
+#if NET11_0_OR_GREATER
+                typeof(T) == typeof(float) || typeof(T) == typeof(double);
+#else
+                false;
+#endif
+
             public static T Invoke(T y, T x) => T.Atan2(y, x);
-            public static Vector128<T> Invoke(Vector128<T> y, Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> y, Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> y, Vector512<T> x) => throw new NotSupportedException();
+
+            public static Vector128<T> Invoke(Vector128<T> y, Vector128<T> x)
+            {
+#if NET11_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector128.Atan2(y.AsDouble(), x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector128.Atan2(y.AsSingle(), x.AsSingle()).As<float, T>();
+                }
+#else
+                throw new NotSupportedException();
+#endif
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> y, Vector256<T> x)
+            {
+#if NET11_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector256.Atan2(y.AsDouble(), x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector256.Atan2(y.AsSingle(), x.AsSingle()).As<float, T>();
+                }
+#else
+                throw new NotSupportedException();
+#endif
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> y, Vector512<T> x)
+            {
+#if NET11_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector512.Atan2(y.AsDouble(), x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector512.Atan2(y.AsSingle(), x.AsSingle()).As<float, T>();
+                }
+#else
+                throw new NotSupportedException();
+#endif
+            }
         }
     }
 }
diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs
index 771813177bb19c..5298ca1815a343 100644
--- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs
+++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs
@@ -624,8 +624,8 @@ public void SpanDestinationFunctions_ThrowsForOverlappingInputsWithOutputs(SpanD
         #region Span,Span -> Destination
         public static IEnumerable<object[]> SpanSpanDestinationFunctionsToTest()
         {
-            yield return Create(TensorPrimitives.Atan2, T.Atan2);
-            yield return Create(TensorPrimitives.Atan2Pi, T.Atan2Pi);
+            yield return Create(TensorPrimitives.Atan2, T.Atan2, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14, floatTolerance: 1e-6f));
+            yield return Create(TensorPrimitives.Atan2Pi, T.Atan2Pi, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14, floatTolerance: 1e-6f));
             yield return Create(TensorPrimitives.CopySign, T.CopySign);
             yield return Create(TensorPrimitives.Hypot, T.Hypot);
             yield return Create(TensorPrimitives.Ieee754Remainder, T.Ieee754Remainder);
@@ -763,8 +763,8 @@ public void SpanSpanDestination_ThrowsForOverlappingInputsWithOutputs(SpanSpanDe
         #region Span,Scalar -> Destination
         public static IEnumerable<object[]> SpanScalarDestinationFunctionsToTest()
         {
-            yield return Create(TensorPrimitives.Atan2, T.Atan2);
-            yield return Create(TensorPrimitives.Atan2Pi, T.Atan2Pi);
+            yield return Create(TensorPrimitives.Atan2, T.Atan2, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14, floatTolerance: 1e-6f));
+            yield return Create(TensorPrimitives.Atan2Pi, T.Atan2Pi, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14, floatTolerance: 1e-6f));
             yield return Create(TensorPrimitives.CopySign, T.CopySign);
             yield return Create(TensorPrimitives.Ieee754Remainder, T.Ieee754Remainder);
             yield return Create(TensorPrimitives.Pow, T.Pow, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-13, floatTolerance: 1e-5f));
@@ -873,8 +873,8 @@ public void SpanScalarDestination_ThrowsForOverlappingInputsWithOutputs(SpanScal
         #region Scalar,Span -> Destination
         public static IEnumerable<object[]> ScalarSpanFloatDestinationFunctionsToTest()
         {
-            yield return Create(TensorPrimitives.Atan2, T.Atan2);
-            yield return Create(TensorPrimitives.Atan2Pi, T.Atan2Pi);
+            yield return Create(TensorPrimitives.Atan2, T.Atan2, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14, floatTolerance: 1e-6f));
+            yield return Create(TensorPrimitives.Atan2Pi, T.Atan2Pi, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14, floatTolerance: 1e-6f));
             yield return Create(TensorPrimitives.Pow, T.Pow, Helpers.DetermineTolerance<T>(floatTolerance: 1e-5f));
             yield return Create(TensorPrimitives.Ieee754Remainder, T.Ieee754Remainder);
 
diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorTests.cs b/src/libraries/System.Numerics.Tensors/tests/TensorTests.cs
index 814ef0aa909223..6365e3f06828f8 100644
--- a/src/libraries/System.Numerics.Tensors/tests/TensorTests.cs
+++ b/src/libraries/System.Numerics.Tensors/tests/TensorTests.cs
@@ -183,8 +183,6 @@ public void TensorExtensionsSpanInTOut<T>(PerformCalculationSpanInTOut<T> tensor
         public static IEnumerable<object[]> TwoSpanInSpanOutData()
         {
             yield return Create<float>(TensorPrimitives.Add, Tensor.Add);
-            yield return Create<float>(TensorPrimitives.Atan2, Tensor.Atan2);
-            yield return Create<float>(TensorPrimitives.Atan2Pi, Tensor.Atan2Pi);
             yield return Create<float>(TensorPrimitives.CopySign, Tensor.CopySign);
             yield return Create<float>(TensorPrimitives.Divide, Tensor.Divide);
             yield return Create<float>(TensorPrimitives.Hypot, Tensor.Hypot);
@@ -197,6 +195,12 @@ static object[] Create<T>(PerformCalculationTwoSpanInSpanOut<T> tensorPrimitives
                 => new object[] { tensorPrimitivesMethod, tensorOperation };
         }
 
+        public static IEnumerable<object[]> TwoSpanInSpanOutDataWithTolerance()
+        {
+            yield return new object[] { (PerformCalculationTwoSpanInSpanOut<float>)TensorPrimitives.Atan2, (PerformTwoSpanInSpanOut<float>)Tensor.Atan2, 1e-6f };
+            yield return new object[] { (PerformCalculationTwoSpanInSpanOut<float>)TensorPrimitives.Atan2Pi, (PerformTwoSpanInSpanOut<float>)Tensor.Atan2Pi, 1e-6f };
+        }
+
         [Theory, MemberData(nameof(TwoSpanInSpanOutData))]
         public void TensorExtensionsTwoSpanInSpanOut<T>(PerformCalculationTwoSpanInSpanOut<T> tensorPrimitivesOperation, PerformTwoSpanInSpanOut<T> tensorOperation)
             where T: INumberBase<T>, IComparisonOperators<T, T, bool>
@@ -226,6 +230,34 @@ public void TensorExtensionsTwoSpanInSpanOut<T>(PerformCalculationTwoSpanInSpanO
             });
         }
 
+        [Theory, MemberData(nameof(TwoSpanInSpanOutDataWithTolerance))]
+        public void TensorExtensionsTwoSpanInSpanOutWithTolerance(PerformCalculationTwoSpanInSpanOut<float> tensorPrimitivesOperation, PerformTwoSpanInSpanOut<float> tensorOperation, float tolerance)
+        {
+            Assert.All(Helpers.TensorShapes, tensorLength =>
+            {
+                nint length = CalculateTotalLength(tensorLength);
+                float[] data1 = new float[length];
+                float[] data2 = new float[length];
+                float[] expectedOutput = new float[length];
+
+                FillTensor<float>(data1);
+                FillTensor<float>(data2);
+                Tensor<float> x = Tensor.Create<float>(data1, tensorLength, []);
+                Tensor<float> y = Tensor.Create<float>(data2, tensorLength, []);
+                tensorPrimitivesOperation((ReadOnlySpan<float>)data1, data2, expectedOutput);
+                Tensor<float> results = tensorOperation(x, y);
+
+                Assert.Equal(tensorLength, results.Lengths);
+                nint[] startingIndex = new nint[tensorLength.Length];
+                ReadOnlySpan<float> span = MemoryMarshal.CreateSpan(ref results[startingIndex], (int)length);
+
+                for (int i = 0; i < data1.Length; i++)
+                {
+                    Assert.True(Helpers.IsEqualWithTolerance(expectedOutput[i], span[i], tolerance), $"Expected: {expectedOutput[i]}, Actual: {span[i]}");
+                }
+            });
+        }
+
         public delegate T PerformTwoSpanInFloatOut<T>(in ReadOnlyTensorSpan<T> input, in ReadOnlyTensorSpan<T> input2);
         public delegate T PerformCalculationTwoSpanInFloatOut<T>(ReadOnlySpan<T> input, ReadOnlySpan<T> inputTwo);
         public static IEnumerable<object[]> TwoSpanInFloatOutData()
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
index dc20ffbd556bf5..9c63998a60148b 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
@@ -854,6 +854,55 @@ public static Vector128<float> Asin(Vector128<float> vector)
             }
         }
 
+        /// <summary>Computes the arc tangent for the quotient of two vectors.</summary>
+        /// <param name="y">The vector that will be divided by <paramref name="x" />.</param>
+        /// <param name="x">The vector that will divide <paramref name="y" />.</param>
+        /// <returns>A vector whose elements are the arc tangent of the quotient of the corresponding elements in <paramref name="y" /> and <paramref name="x" />.</returns>
+        /// <remarks>The angles are returned in radians.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<double> Atan2(Vector128<double> y, Vector128<double> x)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.Atan2Double<Vector128<double>>(y, x);
+            }
+            else
+            {
+                return Create(
+                    Vector64.Atan2(y._lower, x._lower),
+                    Vector64.Atan2(y._upper, x._upper)
+                );
+            }
+        }
+
+        /// <summary>Computes the arc tangent for the quotient of two vectors.</summary>
+        /// <param name="y">The vector that will be divided by <paramref name="x" />.</param>
+        /// <param name="x">The vector that will divide <paramref name="y" />.</param>
+        /// <returns>A vector whose elements are the arc tangent of the quotient of the corresponding elements in <paramref name="y" /> and <paramref name="x" />.</returns>
+        /// <remarks>The angles are returned in radians.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> Atan2(Vector128<float> y, Vector128<float> x)
+        {
+            if (IsHardwareAccelerated)
+            {
+                if (Vector256.IsHardwareAccelerated)
+                {
+                    return VectorMath.Atan2Single<Vector128<float>, Vector256<double>>(y, x);
+                }
+                else
+                {
+                    return VectorMath.Atan2Single<Vector128<float>, Vector128<double>>(y, x);
+                }
+            }
+            else
+            {
+                return Create(
+                    Vector64.Atan2(y._lower, x._lower),
+                    Vector64.Atan2(y._upper, x._upper)
+                );
+            }
+        }
+
         /// <inheritdoc cref="Vector64.Cos(Vector64{double})" />
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<double> Cos(Vector128<double> vector)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
index c51c1f5329ef74..d0449c26d92802 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
@@ -855,6 +855,48 @@ public static Vector256<float> Asin(Vector256<float> vector)
             }
         }
 
+        /// <summary>Computes the arc tangent for the quotient of two vectors.</summary>
+        /// <param name="y">The vector that will be divided by <paramref name="x" />.</param>
+        /// <param name="x">The vector that will divide <paramref name="y" />.</param>
+        /// <returns>A vector whose elements are the arc tangent of the quotient of the corresponding elements in <paramref name="y" /> and <paramref name="x" />.</returns>
+        /// <remarks>The angles are returned in radians.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<double> Atan2(Vector256<double> y, Vector256<double> x)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.Atan2Double<Vector256<double>>(y, x);
+            }
+            else
+            {
+                return Create(
+                    Vector128.Atan2(y._lower, x._lower),
+                    Vector128.Atan2(y._upper, x._upper)
+                );
+            }
+        }
+
+        /// <summary>Computes the arc tangent for the quotient of two vectors.</summary>
+        /// <param name="y">The vector that will be divided by <paramref name="x" />.</param>
+        /// <param name="x">The vector that will divide <paramref name="y" />.</param>
+        /// <returns>A vector whose elements are the arc tangent of the quotient of the corresponding elements in <paramref name="y" /> and <paramref name="x" />.</returns>
+        /// <remarks>The angles are returned in radians.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<float> Atan2(Vector256<float> y, Vector256<float> x)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.Atan2Single<Vector256<float>, Vector512<double>>(y, x);
+            }
+            else
+            {
+                return Create(
+                    Vector128.Atan2(y._lower, x._lower),
+                    Vector128.Atan2(y._upper, x._upper)
+                );
+            }
+        }
+
         /// <inheritdoc cref="Vector128.Cos(Vector128{double})" />
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<double> Cos(Vector256<double> vector)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
index 5bc4b5e0964a52..f927d8ea747aa3 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
@@ -758,6 +758,48 @@ public static Vector512<float> Asin(Vector512<float> vector)
             }
         }
 
+        /// <summary>Computes the arc tangent for the quotient of two vectors.</summary>
+        /// <param name="y">The vector that will be divided by <paramref name="x" />.</param>
+        /// <param name="x">The vector that will divide <paramref name="y" />.</param>
+        /// <returns>A vector whose elements are the arc tangent of the quotient of the corresponding elements in <paramref name="y" /> and <paramref name="x" />.</returns>
+        /// <remarks>The angles are returned in radians.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector512<double> Atan2(Vector512<double> y, Vector512<double> x)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.Atan2Double<Vector512<double>>(y, x);
+            }
+            else
+            {
+                return Create(
+                    Vector256.Atan2(y._lower, x._lower),
+                    Vector256.Atan2(y._upper, x._upper)
+                );
+            }
+        }
+
+        /// <summary>Computes the arc tangent for the quotient of two vectors.</summary>
+        /// <param name="y">The vector that will be divided by <paramref name="x" />.</param>
+        /// <param name="x">The vector that will divide <paramref name="y" />.</param>
+        /// <returns>A vector whose elements are the arc tangent of the quotient of the corresponding elements in <paramref name="y" /> and <paramref name="x" />.</returns>
+        /// <remarks>The angles are returned in radians.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector512<float> Atan2(Vector512<float> y, Vector512<float> x)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.Atan2Single<Vector512<float>, Vector512<double>>(y, x);
+            }
+            else
+            {
+                return Create(
+                    Vector256.Atan2(y._lower, x._lower),
+                    Vector256.Atan2(y._upper, x._upper)
+                );
+            }
+        }
+
         /// <inheritdoc cref="Vector256.Cos(Vector256{double})" />
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<double> Cos(Vector512<double> vector)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
index 7077fe391347e6..db8ae37e1cb006 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
@@ -819,6 +819,49 @@ public static Vector64<float> Asin(Vector64<float> vector)
             }
         }
 
+        /// <summary>Computes the arc tangent for the quotient of two vectors.</summary>
+        /// <param name="y">The vector that will be divided by <paramref name="x" />.</param>
+        /// <param name="x">The vector that will divide <paramref name="y" />.</param>
+        /// <returns>A vector whose elements are the arc tangent of the quotient of the corresponding elements in <paramref name="y" /> and <paramref name="x" />.</returns>
+        /// <remarks>The angles are returned in radians.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<double> Atan2(Vector64<double> y, Vector64<double> x)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.Atan2Double<Vector64<double>>(y, x);
+            }
+            else
+            {
+                return Atan2<double>(y, x);
+            }
+        }
+
+        /// <summary>Computes the arc tangent for the quotient of two vectors.</summary>
+        /// <param name="y">The vector that will be divided by <paramref name="x" />.</param>
+        /// <param name="x">The vector that will divide <paramref name="y" />.</param>
+        /// <returns>A vector whose elements are the arc tangent of the quotient of the corresponding elements in <paramref name="y" /> and <paramref name="x" />.</returns>
+        /// <remarks>The angles are returned in radians.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<float> Atan2(Vector64<float> y, Vector64<float> x)
+        {
+            if (IsHardwareAccelerated)
+            {
+                if (Vector128.IsHardwareAccelerated)
+                {
+                    return VectorMath.Atan2Single<Vector64<float>, Vector128<double>>(y, x);
+                }
+                else
+                {
+                    return VectorMath.Atan2Single<Vector64<float>, Vector64<double>>(y, x);
+                }
+            }
+            else
+            {
+                return Atan2<float>(y, x);
+            }
+        }
+
         /// <summary>Computes the cos of each element in a vector.</summary>
         /// <param name="vector">The vector that will have its Cos computed.</param>
         /// <returns>A vector whose elements are the cos of the elements in <paramref name="vector" />.</returns>
@@ -3713,6 +3756,20 @@ internal static Vector64<T> Asin<T>(Vector64<T> vector)
             return result;
         }
 
+        internal static Vector64<T> Atan2<T>(Vector64<T> y, Vector64<T> x)
+            where T : IFloatingPointIeee754<T>
+        {
+            Unsafe.SkipInit(out Vector64<T> result);
+
+            for (int index = 0; index < Vector64<T>.Count; index++)
+            {
+                T value = T.Atan2(y.GetElementUnsafe(index), x.GetElementUnsafe(index));
+                result.SetElementUnsafe(index, value);
+            }
+
+            return result;
+        }
+
         internal static Vector64<T> Sin<T>(Vector64<T> vector)
             where T : ITrigonometricFunctions<T>
         {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
index bdbe668efa326b..0b922807e2ee55 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
@@ -3221,5 +3221,230 @@ private static TVectorDouble AsinSingleCoreDouble<TVectorDouble>(TVectorDouble a
 
             return ax + ax * g * poly + (TVectorDouble.Create(PIBY2) & gtHalf);
         }
+
+        public static TVectorDouble AtanDouble<TVectorDouble>(TVectorDouble x)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+        {
+            // This code is based on `atan` from amd/aocl-libm-ose
+            // Copyright (C) 2008-2023 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Implementation Notes
+            // --------------------
+            // Argument reduction to range [-7/16,7/16]
+            // Use the following identities:
+            // atan(x) = pi/2 - atan(1/x)                when x > 39/16
+            //         = arctan(1.5) + atan((x-1.5)/(1+1.5*x))  when 19/16 < x <= 39/16
+            //         = pi/4 + atan((x-1)/(1+x))        when 11/16 < x <= 19/16
+            //         = arctan(0.5) + atan((2x-1)/(2+x))       when 7/16 < x <= 11/16
+            //         = atan(x)                         when x <= 7/16
+            //
+            // Core approximation: Remez(4,4) on [-7/16,7/16]
+
+            // Range boundaries
+            const double R7_16 = 0.4375;    // 7/16
+            const double R11_16 = 0.6875;   // 11/16
+            const double R19_16 = 1.1875;   // 19/16
+            const double R39_16 = 2.4375;   // 39/16
+
+            // (chi, clo) pairs for high-precision addition
+            const double CHI_0 = 0.0;
+            const double CLO_0 = 0.0;
+            const double CHI_HALF = 4.63647609000806093515e-01;  // arctan(0.5)
+            const double CLO_HALF = 2.26987774529616809294e-17;
+            const double CHI_1 = 7.85398163397448278999e-01;     // arctan(1.0) = pi/4
+            const double CLO_1 = 3.06161699786838240164e-17;
+            const double CHI_1_5 = 9.82793723247329054082e-01;   // arctan(1.5)
+            const double CLO_1_5 = 1.39033110312309953701e-17;
+            const double CHI_INF = 1.57079632679489655800e+00;   // arctan(inf) = pi/2
+            const double CLO_INF = 6.12323399573676480327e-17;
+
+            // Remez(4,4) polynomial coefficients for numerator
+            const double P0 = 0.268297920532545909e0;
+            const double P1 = 0.447677206805497472e0;
+            const double P2 = 0.220638780716667420e0;
+            const double P3 = 0.304455919504853031e-1;
+            const double P4 = 0.142316903342317766e-3;
+
+            // Remez(4,4) polynomial coefficients for denominator
+            const double Q0 = 0.804893761597637733e0;
+            const double Q1 = 0.182596787737507063e1;
+            const double Q2 = 0.141254259931958921e1;
+            const double Q3 = 0.424602594203847109e0;
+            const double Q4 = 0.389525873944742195e-1;
+
+            TVectorDouble sign = x & TVectorDouble.Create(-0.0);
+            TVectorDouble v = TVectorDouble.Abs(x);
+
+            // Determine which region each element falls into
+            TVectorDouble gtR39_16 = TVectorDouble.GreaterThan(v, TVectorDouble.Create(R39_16));
+            TVectorDouble gtR19_16 = TVectorDouble.GreaterThan(v, TVectorDouble.Create(R19_16));
+            TVectorDouble gtR11_16 = TVectorDouble.GreaterThan(v, TVectorDouble.Create(R11_16));
+            TVectorDouble gtR7_16 = TVectorDouble.GreaterThan(v, TVectorDouble.Create(R7_16));
+
+            // Compute reduced argument for each region
+
+            // Region 5: x > 39/16: reduced = -1/v
+            TVectorDouble reduced5 = -TVectorDouble.One / v;
+
+            // Region 4: 19/16 < x <= 39/16: reduced = (v-1.5)/(1+1.5*v)
+            TVectorDouble reduced4 = (v - TVectorDouble.Create(1.5)) / (TVectorDouble.One + TVectorDouble.Create(1.5) * v);
+
+            // Region 3: 11/16 < x <= 19/16: reduced = (v-1)/(1+v)
+            TVectorDouble reduced3 = (v - TVectorDouble.One) / (TVectorDouble.One + v);
+
+            // Region 2: 7/16 < x <= 11/16: reduced = (2*v-1)/(2+v)
+            TVectorDouble reduced2 = (TVectorDouble.Create(2.0) * v - TVectorDouble.One) / (TVectorDouble.Create(2.0) + v);
+
+            // Region 1: x <= 7/16: reduced = v
+            TVectorDouble reduced1 = v;
+
+            // Select reduced argument
+            TVectorDouble reduced = TVectorDouble.ConditionalSelect(gtR39_16, reduced5,
+                                    TVectorDouble.ConditionalSelect(gtR19_16, reduced4,
+                                    TVectorDouble.ConditionalSelect(gtR11_16, reduced3,
+                                    TVectorDouble.ConditionalSelect(gtR7_16, reduced2, reduced1))));
+
+            // Select chi (high part of constant)
+            TVectorDouble chi = TVectorDouble.ConditionalSelect(gtR39_16, TVectorDouble.Create(CHI_INF),
+                               TVectorDouble.ConditionalSelect(gtR19_16, TVectorDouble.Create(CHI_1_5),
+                               TVectorDouble.ConditionalSelect(gtR11_16, TVectorDouble.Create(CHI_1),
+                               TVectorDouble.ConditionalSelect(gtR7_16, TVectorDouble.Create(CHI_HALF), TVectorDouble.Create(CHI_0)))));
+
+            // Select clo (low part of constant)
+            TVectorDouble clo = TVectorDouble.ConditionalSelect(gtR39_16, TVectorDouble.Create(CLO_INF),
+                               TVectorDouble.ConditionalSelect(gtR19_16, TVectorDouble.Create(CLO_1_5),
+                               TVectorDouble.ConditionalSelect(gtR11_16, TVectorDouble.Create(CLO_1),
+                               TVectorDouble.ConditionalSelect(gtR7_16, TVectorDouble.Create(CLO_HALF), TVectorDouble.Create(CLO_0)))));
+
+            // Compute s = reduced^2
+            TVectorDouble s = reduced * reduced;
+
+            // Evaluate numerator polynomial: P0 + s*(P1 + s*(P2 + s*(P3 + s*P4)))
+            TVectorDouble num = TVectorDouble.Create(P4);
+            num = TVectorDouble.MultiplyAddEstimate(num, s, TVectorDouble.Create(P3));
+            num = TVectorDouble.MultiplyAddEstimate(num, s, TVectorDouble.Create(P2));
+            num = TVectorDouble.MultiplyAddEstimate(num, s, TVectorDouble.Create(P1));
+            num = TVectorDouble.MultiplyAddEstimate(num, s, TVectorDouble.Create(P0));
+
+            // Evaluate denominator polynomial: Q0 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4)))
+            TVectorDouble denom = TVectorDouble.Create(Q4);
+            denom = TVectorDouble.MultiplyAddEstimate(denom, s, TVectorDouble.Create(Q3));
+            denom = TVectorDouble.MultiplyAddEstimate(denom, s, TVectorDouble.Create(Q2));
+            denom = TVectorDouble.MultiplyAddEstimate(denom, s, TVectorDouble.Create(Q1));
+            denom = TVectorDouble.MultiplyAddEstimate(denom, s, TVectorDouble.Create(Q0));
+
+            // q = reduced * s * num / denom
+            TVectorDouble q = reduced * s * num / denom;
+
+            // result = chi - ((q - clo) - reduced)
+            TVectorDouble result = chi - ((q - clo) - reduced);
+
+            // Restore sign
+            result ^= sign;
+
+            return result;
+        }
+
+        public static TVectorDouble Atan2Double<TVectorDouble>(TVectorDouble y, TVectorDouble x)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+        {
+            // The AMD AOCL-LibM scalar atan2 implementation (atan2.c) uses a lookup table
+            // (ATAN_TABLE with 241 entries) which cannot be trivially vectorized due to the
+            // cost of gather instructions. Instead, this computes atan2(y,x) using the
+            // already-vectorized AtanDouble implementation with quadrant adjustments.
+            // - atan2(±∞, +∞) = ±pi/4
+            // - atan2(±∞, -∞) = ±3pi/4
+            // - atan2(±y, +∞) = ±0
+            // - atan2(±y, -∞) = ±pi
+
+            const double PI = 3.141592653589793;           // 0x1.921fb54442d18p1
+
+            // Check for x being negative using standard comparison
+            TVectorDouble xLessThanZero = TVectorDouble.LessThan(x, TVectorDouble.Zero);
+
+            // For signed zero handling: check if x is -0 specifically
+            // We detect -0 by checking: x == 0 AND 1/x < 0 (since 1/-0 = -∞)
+            TVectorDouble xIsZero = TVectorDouble.Equals(x, TVectorDouble.Zero);
+            TVectorDouble recipX = TVectorDouble.One / x;
+            TVectorDouble recipXNegative = TVectorDouble.LessThan(recipX, TVectorDouble.Zero);
+            TVectorDouble xIsNegativeOrNegZero = xLessThanZero | (xIsZero & recipXNegative);
+
+            // Check for y sign using same technique
+            TVectorDouble yLessThanZero = TVectorDouble.LessThan(y, TVectorDouble.Zero);
+            TVectorDouble yIsZero = TVectorDouble.Equals(y, TVectorDouble.Zero);
+            TVectorDouble recipY = TVectorDouble.One / y;
+            TVectorDouble recipYNegative = TVectorDouble.LessThan(recipY, TVectorDouble.Zero);
+            TVectorDouble yIsNegativeOrNegZero = yLessThanZero | (yIsZero & recipYNegative);
+
+            // Compute atan(y/x) for the general case
+            TVectorDouble ratio = y / x;
+            TVectorDouble atanResult = AtanDouble<TVectorDouble>(ratio);
+
+            // For x < 0 (or x = -0), we need to adjust by ±π
+            TVectorDouble piAdjust = TVectorDouble.ConditionalSelect(
+                yIsNegativeOrNegZero,
+                TVectorDouble.Create(-PI),
+                TVectorDouble.Create(PI)
+            );
+
+            // Apply pi adjustment when x is negative (or -0)
+            TVectorDouble result = TVectorDouble.ConditionalSelect(
+                xIsNegativeOrNegZero,
+                atanResult + piAdjust,
+                atanResult
+            );
+
+            // Special case: when both y = ±0 and x = ±0
+            // atan2(±0, +0) = ±0
+            // atan2(±0, -0) = ±π
+            TVectorDouble zeroResult = TVectorDouble.ConditionalSelect(yIsNegativeOrNegZero, TVectorDouble.Create(-0.0), TVectorDouble.Zero);
+            TVectorDouble piResult = TVectorDouble.ConditionalSelect(yIsNegativeOrNegZero, TVectorDouble.Create(-PI), TVectorDouble.Create(PI));
+
+            TVectorDouble bothZero = xIsZero & yIsZero;
+            TVectorDouble zeroXResult = TVectorDouble.ConditionalSelect(xIsNegativeOrNegZero, piResult, zeroResult);
+            result = TVectorDouble.ConditionalSelect(bothZero, zeroXResult, result);
+
+            // Special case: when both x and y are infinite
+            // atan2(±∞, +∞) = ±π/4
+            // atan2(±∞, -∞) = ±3π/4
+            const double PI_OVER_4 = 0.78539816339744830961;   // 0x1.921fb54442d18p-1
+            const double THREE_PI_OVER_4 = 2.3561944901923449; // 0x1.2d97c7f3321d2p+1
+            TVectorDouble xIsInf = TVectorDouble.Equals(TVectorDouble.Abs(x), TVectorDouble.Create(double.PositiveInfinity));
+            TVectorDouble yIsInf = TVectorDouble.Equals(TVectorDouble.Abs(y), TVectorDouble.Create(double.PositiveInfinity));
+            TVectorDouble bothInf = xIsInf & yIsInf;
+            TVectorDouble infBaseAngle = TVectorDouble.ConditionalSelect(xIsNegativeOrNegZero, TVectorDouble.Create(THREE_PI_OVER_4), TVectorDouble.Create(PI_OVER_4));
+            TVectorDouble infResult = TVectorDouble.ConditionalSelect(yIsNegativeOrNegZero, -infBaseAngle, infBaseAngle);
+            result = TVectorDouble.ConditionalSelect(bothInf, infResult, result);
+
+            return result;
+        }
+
+        public static TVectorSingle Atan2Single<TVectorSingle, TVectorDouble>(TVectorSingle y, TVectorSingle x)
+            where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+        {
+            // Widens to double and calls Atan2Double for improved accuracy.
+
+            if (TVectorSingle.ElementCount == TVectorDouble.ElementCount)
+            {
+                TVectorDouble dy = Widen<TVectorSingle, TVectorDouble>(y);
+                TVectorDouble dx = Widen<TVectorSingle, TVectorDouble>(x);
+                return Narrow<TVectorDouble, TVectorSingle>(Atan2Double<TVectorDouble>(dy, dx));
+            }
+            else
+            {
+                TVectorDouble dyLo = WidenLower<TVectorSingle, TVectorDouble>(y);
+                TVectorDouble dyHi = WidenUpper<TVectorSingle, TVectorDouble>(y);
+                TVectorDouble dxLo = WidenLower<TVectorSingle, TVectorDouble>(x);
+                TVectorDouble dxHi = WidenUpper<TVectorSingle, TVectorDouble>(x);
+                return Narrow<TVectorDouble, TVectorSingle>(
+                    Atan2Double<TVectorDouble>(dyLo, dxLo),
+                    Atan2Double<TVectorDouble>(dyHi, dxHi)
+                );
+            }
+        }
     }
 }
diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
index ada0999ed91e96..a12882312e5a9b 100644
--- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
+++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
@@ -21,6 +21,8 @@ public static partial class Vector128
         public static bool AnyWhereAllBitsSet<T>(System.Runtime.Intrinsics.Vector128<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<double> Asin(System.Runtime.Intrinsics.Vector128<double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<float> Asin(System.Runtime.Intrinsics.Vector128<float> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<double> Atan2(System.Runtime.Intrinsics.Vector128<double> y, System.Runtime.Intrinsics.Vector128<double> x) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> Atan2(System.Runtime.Intrinsics.Vector128<float> y, System.Runtime.Intrinsics.Vector128<float> x) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<System.Byte> AsByte<T>(this System.Runtime.Intrinsics.Vector128<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<System.Double> AsDouble<T>(this System.Runtime.Intrinsics.Vector128<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<System.Int16> AsInt16<T>(this System.Runtime.Intrinsics.Vector128<T> vector) { throw null; }
@@ -484,6 +486,8 @@ public static partial class Vector256
         public static bool AnyWhereAllBitsSet<T>(System.Runtime.Intrinsics.Vector256<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<double> Asin(System.Runtime.Intrinsics.Vector256<double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<float> Asin(System.Runtime.Intrinsics.Vector256<float> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<double> Atan2(System.Runtime.Intrinsics.Vector256<double> y, System.Runtime.Intrinsics.Vector256<double> x) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<float> Atan2(System.Runtime.Intrinsics.Vector256<float> y, System.Runtime.Intrinsics.Vector256<float> x) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<System.Byte> AsByte<T>(this System.Runtime.Intrinsics.Vector256<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<System.Double> AsDouble<T>(this System.Runtime.Intrinsics.Vector256<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<System.Int16> AsInt16<T>(this System.Runtime.Intrinsics.Vector256<T> vector) { throw null; }
@@ -936,6 +940,8 @@ public static partial class Vector512
         public static bool AnyWhereAllBitsSet<T>(System.Runtime.Intrinsics.Vector512<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> Asin(System.Runtime.Intrinsics.Vector512<double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> Asin(System.Runtime.Intrinsics.Vector512<float> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> Atan2(System.Runtime.Intrinsics.Vector512<double> y, System.Runtime.Intrinsics.Vector512<double> x) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> Atan2(System.Runtime.Intrinsics.Vector512<float> y, System.Runtime.Intrinsics.Vector512<float> x) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<System.Byte> AsByte<T>(this System.Runtime.Intrinsics.Vector512<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<System.Double> AsDouble<T>(this System.Runtime.Intrinsics.Vector512<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<System.Int16> AsInt16<T>(this System.Runtime.Intrinsics.Vector512<T> vector) { throw null; }
@@ -1387,6 +1393,8 @@ public static partial class Vector64
         public static bool AnyWhereAllBitsSet<T>(System.Runtime.Intrinsics.Vector64<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<double> Asin(System.Runtime.Intrinsics.Vector64<double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<float> Asin(System.Runtime.Intrinsics.Vector64<float> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<double> Atan2(System.Runtime.Intrinsics.Vector64<double> y, System.Runtime.Intrinsics.Vector64<double> x) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<float> Atan2(System.Runtime.Intrinsics.Vector64<float> y, System.Runtime.Intrinsics.Vector64<float> x) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<System.Byte> AsByte<T>(this System.Runtime.Intrinsics.Vector64<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<System.Double> AsDouble<T>(this System.Runtime.Intrinsics.Vector64<T> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<System.Int16> AsInt16<T>(this System.Runtime.Intrinsics.Vector64<T> vector) { throw null; }

From ab9e1e14de60fa5d345e689335859901ed98ad56 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 1 Apr 2026 19:45:25 +0000
Subject: [PATCH 2/2] Address PR feedback: use IsNegative/IsZero, add Vector512
 check, add Atan2 tests

Agent-Logs-Url: https://github.com/dotnet/runtime/sessions/c0c9223f-43e5-459d-82fd-210039fb48f8

Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com>
---
 .../tests/System/GenericMathTestMemberData.cs | 68 +++++++++++++++++++
 .../System/Runtime/Intrinsics/Vector256.cs    |  9 ++-
 .../System/Runtime/Intrinsics/VectorMath.cs   | 22 ++----
 .../tests/Vectors/Vector128Tests.cs           | 16 +++++
 .../tests/Vectors/Vector256Tests.cs           | 16 +++++
 .../tests/Vectors/Vector512Tests.cs           | 16 +++++
 .../tests/Vectors/Vector64Tests.cs            | 16 +++++
 7 files changed, 146 insertions(+), 17 deletions(-)

diff --git a/src/libraries/Common/tests/System/GenericMathTestMemberData.cs b/src/libraries/Common/tests/System/GenericMathTestMemberData.cs
index 49feea063c84d7..5e4191c0c534fb 100644
--- a/src/libraries/Common/tests/System/GenericMathTestMemberData.cs
+++ b/src/libraries/Common/tests/System/GenericMathTestMemberData.cs
@@ -250,6 +250,74 @@ public static IEnumerable<object[]> AsinSingle
             }
         }
 
+        public static IEnumerable<object[]> Atan2Double
+        {
+            get
+            {
+                yield return new object[] {  double.NaN,               double.NaN,              double.NaN,                0.0 };
+                yield return new object[] {  double.NaN,               1.0,                     double.NaN,                0.0 };
+                yield return new object[] {  1.0,                      double.NaN,              double.NaN,                0.0 };
+                yield return new object[] {  0.0,                      0.0,                     0.0,                       0.0 };
+                yield return new object[] {  0.0,                     -0.0,                     3.1415926535897932,         0.0 };                                       // atan2(+0, -0) = +pi
+                yield return new object[] { -0.0,                      0.0,                    -0.0,                       0.0 };
+                yield return new object[] { -0.0,                     -0.0,                    -3.1415926535897932,         0.0 };                                       // atan2(-0, -0) = -pi
+                yield return new object[] {  0.0,                      1.0,                     0.0,                       0.0 };
+                yield return new object[] { -0.0,                      1.0,                    -0.0,                       0.0 };
+                yield return new object[] {  0.0,                     -1.0,                     3.1415926535897932,         DoubleCrossPlatformMachineEpsilon * 10 };    // atan2(+0, -x) = +pi
+                yield return new object[] { -0.0,                     -1.0,                    -3.1415926535897932,         DoubleCrossPlatformMachineEpsilon * 10 };    // atan2(-0, -x) = -pi
+                yield return new object[] {  1.0,                      0.0,                     1.5707963267948966,        DoubleCrossPlatformMachineEpsilon * 10 };    // atan2(+y, +0) = +pi/2
+                yield return new object[] { -1.0,                      0.0,                    -1.5707963267948966,        DoubleCrossPlatformMachineEpsilon * 10 };    // atan2(-y, +0) = -pi/2
+                yield return new object[] {  1.0,                      1.0,                     0.78539816339744831,       DoubleCrossPlatformMachineEpsilon };         // atan2(1, 1) = pi/4
+                yield return new object[] { -1.0,                      1.0,                    -0.78539816339744831,       DoubleCrossPlatformMachineEpsilon };         // atan2(-1, 1) = -pi/4
+                yield return new object[] {  1.0,                     -1.0,                     2.3561944901923449,        DoubleCrossPlatformMachineEpsilon * 10 };    // atan2(1, -1) = 3pi/4
+                yield return new object[] { -1.0,                     -1.0,                    -2.3561944901923449,        DoubleCrossPlatformMachineEpsilon * 10 };    // atan2(-1, -1) = -3pi/4
+                yield return new object[] {  double.PositiveInfinity,  double.PositiveInfinity, 0.78539816339744831,       DoubleCrossPlatformMachineEpsilon };         // atan2(+inf, +inf) = pi/4
+                yield return new object[] {  double.PositiveInfinity,  double.NegativeInfinity, 2.3561944901923449,        DoubleCrossPlatformMachineEpsilon * 10 };    // atan2(+inf, -inf) = 3pi/4
+                yield return new object[] {  double.NegativeInfinity,  double.PositiveInfinity,-0.78539816339744831,       DoubleCrossPlatformMachineEpsilon };         // atan2(-inf, +inf) = -pi/4
+                yield return new object[] {  double.NegativeInfinity,  double.NegativeInfinity,-2.3561944901923449,        DoubleCrossPlatformMachineEpsilon * 10 };    // atan2(-inf, -inf) = -3pi/4
+                yield return new object[] {  1.0,                      double.PositiveInfinity, 0.0,                       0.0 };                                      // atan2(+y, +inf) = +0
+                yield return new object[] { -1.0,                      double.PositiveInfinity,-0.0,                       0.0 };                                      // atan2(-y, +inf) = -0
+                yield return new object[] {  1.0,                      double.NegativeInfinity, 3.1415926535897932,        DoubleCrossPlatformMachineEpsilon * 10 };    // atan2(+y, -inf) = +pi
+                yield return new object[] { -1.0,                      double.NegativeInfinity,-3.1415926535897932,        DoubleCrossPlatformMachineEpsilon * 10 };    // atan2(-y, -inf) = -pi
+                yield return new object[] {  double.PositiveInfinity,  1.0,                     1.5707963267948966,        DoubleCrossPlatformMachineEpsilon * 10 };    // atan2(+inf, x) = +pi/2
+                yield return new object[] {  double.NegativeInfinity,  1.0,                    -1.5707963267948966,        DoubleCrossPlatformMachineEpsilon * 10 };    // atan2(-inf, x) = -pi/2
+            }
+        }
+
+        public static IEnumerable<object[]> Atan2Single
+        {
+            get
+            {
+                yield return new object[] {  float.NaN,                float.NaN,               float.NaN,                 0.0f };
+                yield return new object[] {  float.NaN,                1.0f,                    float.NaN,                 0.0f };
+                yield return new object[] {  1.0f,                     float.NaN,               float.NaN,                 0.0f };
+                yield return new object[] {  0.0f,                     0.0f,                    0.0f,                      0.0f };
+                yield return new object[] {  0.0f,                    -0.0f,                    3.14159274f,               0.0f };                                       // atan2(+0, -0) = +pi
+                yield return new object[] { -0.0f,                     0.0f,                   -0.0f,                      0.0f };
+                yield return new object[] { -0.0f,                    -0.0f,                   -3.14159274f,               0.0f };                                       // atan2(-0, -0) = -pi
+                yield return new object[] {  0.0f,                     1.0f,                    0.0f,                      0.0f };
+                yield return new object[] { -0.0f,                     1.0f,                   -0.0f,                      0.0f };
+                yield return new object[] {  0.0f,                    -1.0f,                    3.14159274f,               SingleCrossPlatformMachineEpsilon * 10 };    // atan2(+0, -x) = +pi
+                yield return new object[] { -0.0f,                    -1.0f,                   -3.14159274f,               SingleCrossPlatformMachineEpsilon * 10 };    // atan2(-0, -x) = -pi
+                yield return new object[] {  1.0f,                     0.0f,                    1.57079637f,               SingleCrossPlatformMachineEpsilon * 10 };    // atan2(+y, +0) = +pi/2
+                yield return new object[] { -1.0f,                     0.0f,                   -1.57079637f,               SingleCrossPlatformMachineEpsilon * 10 };    // atan2(-y, +0) = -pi/2
+                yield return new object[] {  1.0f,                     1.0f,                    0.785398163f,              SingleCrossPlatformMachineEpsilon };         // atan2(1, 1) = pi/4
+                yield return new object[] { -1.0f,                     1.0f,                   -0.785398163f,              SingleCrossPlatformMachineEpsilon };         // atan2(-1, 1) = -pi/4
+                yield return new object[] {  1.0f,                    -1.0f,                    2.35619450f,               SingleCrossPlatformMachineEpsilon * 10 };    // atan2(1, -1) = 3pi/4
+                yield return new object[] { -1.0f,                    -1.0f,                   -2.35619450f,               SingleCrossPlatformMachineEpsilon * 10 };    // atan2(-1, -1) = -3pi/4
+                yield return new object[] {  float.PositiveInfinity,   float.PositiveInfinity,  0.785398163f,              SingleCrossPlatformMachineEpsilon };         // atan2(+inf, +inf) = pi/4
+                yield return new object[] {  float.PositiveInfinity,   float.NegativeInfinity,  2.35619450f,               SingleCrossPlatformMachineEpsilon * 10 };    // atan2(+inf, -inf) = 3pi/4
+                yield return new object[] {  float.NegativeInfinity,   float.PositiveInfinity, -0.785398163f,              SingleCrossPlatformMachineEpsilon };         // atan2(-inf, +inf) = -pi/4
+                yield return new object[] {  float.NegativeInfinity,   float.NegativeInfinity, -2.35619450f,               SingleCrossPlatformMachineEpsilon * 10 };    // atan2(-inf, -inf) = -3pi/4
+                yield return new object[] {  1.0f,                     float.PositiveInfinity,  0.0f,                      0.0f };                                      // atan2(+y, +inf) = +0
+                yield return new object[] { -1.0f,                     float.PositiveInfinity, -0.0f,                      0.0f };                                      // atan2(-y, +inf) = -0
+                yield return new object[] {  1.0f,                     float.NegativeInfinity,  3.14159274f,               SingleCrossPlatformMachineEpsilon * 10 };    // atan2(+y, -inf) = +pi
+                yield return new object[] { -1.0f,                     float.NegativeInfinity, -3.14159274f,               SingleCrossPlatformMachineEpsilon * 10 };    // atan2(-y, -inf) = -pi
+                yield return new object[] {  float.PositiveInfinity,   1.0f,                    1.57079637f,               SingleCrossPlatformMachineEpsilon * 10 };    // atan2(+inf, x) = +pi/2
+                yield return new object[] {  float.NegativeInfinity,   1.0f,                   -1.57079637f,               SingleCrossPlatformMachineEpsilon * 10 };    // atan2(-inf, x) = -pi/2
+            }
+        }
+
         public static IEnumerable<object[]> CosDouble
         {
             get
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
index d0449c26d92802..dd0c01f6ee73bb 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
@@ -886,7 +886,14 @@ public static Vector256<float> Atan2(Vector256<float> y, Vector256<float> x)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.Atan2Single<Vector256<float>, Vector512<double>>(y, x);
+                if (Vector512.IsHardwareAccelerated)
+                {
+                    return VectorMath.Atan2Single<Vector256<float>, Vector512<double>>(y, x);
+                }
+                else
+                {
+                    return VectorMath.Atan2Single<Vector256<float>, Vector256<double>>(y, x);
+                }
             }
             else
             {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
index 0b922807e2ee55..af39efc6dbbe64 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
@@ -3362,22 +3362,12 @@ public static TVectorDouble Atan2Double<TVectorDouble>(TVectorDouble y, TVectorD
 
             const double PI = 3.141592653589793;           // 0x1.921fb54442d18p1
 
-            // Check for x being negative using standard comparison
-            TVectorDouble xLessThanZero = TVectorDouble.LessThan(x, TVectorDouble.Zero);
-
-            // For signed zero handling: check if x is -0 specifically
-            // We detect -0 by checking: x == 0 AND 1/x < 0 (since 1/-0 = -∞)
-            TVectorDouble xIsZero = TVectorDouble.Equals(x, TVectorDouble.Zero);
-            TVectorDouble recipX = TVectorDouble.One / x;
-            TVectorDouble recipXNegative = TVectorDouble.LessThan(recipX, TVectorDouble.Zero);
-            TVectorDouble xIsNegativeOrNegZero = xLessThanZero | (xIsZero & recipXNegative);
-
-            // Check for y sign using same technique
-            TVectorDouble yLessThanZero = TVectorDouble.LessThan(y, TVectorDouble.Zero);
-            TVectorDouble yIsZero = TVectorDouble.Equals(y, TVectorDouble.Zero);
-            TVectorDouble recipY = TVectorDouble.One / y;
-            TVectorDouble recipYNegative = TVectorDouble.LessThan(recipY, TVectorDouble.Zero);
-            TVectorDouble yIsNegativeOrNegZero = yLessThanZero | (yIsZero & recipYNegative);
+            // Use ISimdVector helpers which treat -0 as negative and detect zero
+            TVectorDouble xIsZero = TVectorDouble.IsZero(x);
+            TVectorDouble xIsNegativeOrNegZero = TVectorDouble.IsNegative(x);
+
+            TVectorDouble yIsZero = TVectorDouble.IsZero(y);
+            TVectorDouble yIsNegativeOrNegZero = TVectorDouble.IsNegative(y);
 
             // Compute atan(y/x) for the general case
             TVectorDouble ratio = y / x;
diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs
index 6246c1231c32d2..91d883e89a1b9a 100644
--- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs
+++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs
@@ -5321,6 +5321,22 @@ public void AsinSingleTest(float value, float expectedResult, float variance)
             AssertEqual(Vector128.Create(expectedResult), actualResult, Vector128.Create(variance));
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.Atan2Double), MemberType = typeof(GenericMathTestMemberData))]
+        public void Atan2DoubleTest(double y, double x, double expectedResult, double variance)
+        {
+            Vector128<double> actualResult = Vector128.Atan2(Vector128.Create(y), Vector128.Create(x));
+            AssertEqual(Vector128.Create(expectedResult), actualResult, Vector128.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.Atan2Single), MemberType = typeof(GenericMathTestMemberData))]
+        public void Atan2SingleTest(float y, float x, float expectedResult, float variance)
+        {
+            Vector128<float> actualResult = Vector128.Atan2(Vector128.Create(y), Vector128.Create(x));
+            AssertEqual(Vector128.Create(expectedResult), actualResult, Vector128.Create(variance));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.CosDouble), MemberType = typeof(GenericMathTestMemberData))]
         public void CosDoubleTest(double value, double expectedResult, double variance)
diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs
index 728a87900f314a..9d895597a7f9ce 100644
--- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs
+++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs
@@ -6497,6 +6497,22 @@ public void AsinSingleTest(float value, float expectedResult, float variance)
             AssertEqual(Vector256.Create(expectedResult), actualResult, Vector256.Create(variance));
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.Atan2Double), MemberType = typeof(GenericMathTestMemberData))]
+        public void Atan2DoubleTest(double y, double x, double expectedResult, double variance)
+        {
+            Vector256<double> actualResult = Vector256.Atan2(Vector256.Create(y), Vector256.Create(x));
+            AssertEqual(Vector256.Create(expectedResult), actualResult, Vector256.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.Atan2Single), MemberType = typeof(GenericMathTestMemberData))]
+        public void Atan2SingleTest(float y, float x, float expectedResult, float variance)
+        {
+            Vector256<float> actualResult = Vector256.Atan2(Vector256.Create(y), Vector256.Create(x));
+            AssertEqual(Vector256.Create(expectedResult), actualResult, Vector256.Create(variance));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.CosDouble), MemberType = typeof(GenericMathTestMemberData))]
         public void CosDoubleTest(double value, double expectedResult, double variance)
diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs
index d3c430020b5225..5b2d95c30f57c7 100644
--- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs
+++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs
@@ -6280,6 +6280,22 @@ public void AsinSingleTest(float value, float expectedResult, float variance)
             AssertEqual(Vector512.Create(expectedResult), actualResult, Vector512.Create(variance));
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.Atan2Double), MemberType = typeof(GenericMathTestMemberData))]
+        public void Atan2DoubleTest(double y, double x, double expectedResult, double variance)
+        {
+            Vector512<double> actualResult = Vector512.Atan2(Vector512.Create(y), Vector512.Create(x));
+            AssertEqual(Vector512.Create(expectedResult), actualResult, Vector512.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.Atan2Single), MemberType = typeof(GenericMathTestMemberData))]
+        public void Atan2SingleTest(float y, float x, float expectedResult, float variance)
+        {
+            Vector512<float> actualResult = Vector512.Atan2(Vector512.Create(y), Vector512.Create(x));
+            AssertEqual(Vector512.Create(expectedResult), actualResult, Vector512.Create(variance));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.CosDouble), MemberType = typeof(GenericMathTestMemberData))]
         public void CosDoubleTest(double value, double expectedResult, double variance)
diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs
index be91c3325549fb..5d68cb9de7df27 100644
--- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs
+++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs
@@ -4595,6 +4595,22 @@ public void AsinSingleTest(float value, float expectedResult, float variance)
             AssertEqual(Vector64.Create(expectedResult), actualResult, Vector64.Create(variance));
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.Atan2Double), MemberType = typeof(GenericMathTestMemberData))]
+        public void Atan2DoubleTest(double y, double x, double expectedResult, double variance)
+        {
+            Vector64<double> actualResult = Vector64.Atan2(Vector64.Create(y), Vector64.Create(x));
+            AssertEqual(Vector64.Create(expectedResult), actualResult, Vector64.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.Atan2Single), MemberType = typeof(GenericMathTestMemberData))]
+        public void Atan2SingleTest(float y, float x, float expectedResult, float variance)
+        {
+            Vector64<float> actualResult = Vector64.Atan2(Vector64.Create(y), Vector64.Create(x));
+            AssertEqual(Vector64.Create(expectedResult), actualResult, Vector64.Create(variance));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.CosDouble), MemberType = typeof(GenericMathTestMemberData))]
         public void CosDoubleTest(double value, double expectedResult, double variance)