From 1c723b2d4f124845f848c722c7ad4928a22360d0 Mon Sep 17 00:00:00 2001 From: kzrnm Date: Sun, 29 Mar 2026 19:34:11 +0900 Subject: [PATCH 1/6] Use Math.BigMul instead of UInt128.Multiply --- .../src/System/Number.BigInteger.cs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Runtime.Numerics/src/System/Number.BigInteger.cs b/src/libraries/System.Runtime.Numerics/src/System/Number.BigInteger.cs index 5d5acec3f395bd..1865bf6491eac5 100644 --- a/src/libraries/System.Runtime.Numerics/src/System/Number.BigInteger.cs +++ b/src/libraries/System.Runtime.Numerics/src/System/Number.BigInteger.cs @@ -549,9 +549,14 @@ static nuint MultiplyAdd(Span bits, nuint multiplier, nuint addValue) { for (int i = 0; i < bits.Length; i++) { - UInt128 p = (UInt128)bits[i] * multiplier + carry; - bits[i] = (nuint)(ulong)p; - carry = (nuint)(ulong)(p >> 64); + ulong high = Math.BigMul(bits[i], multiplier, out ulong low); + ulong lowWithAdd = low + carry; + bits[i] = (nuint)lowWithAdd; + if (lowWithAdd < low) + { + high++; + } + carry = (nuint)high; } } else From 3c5a378838b6da06d09b5822c49e2a988307c8ca Mon Sep 17 00:00:00 2001 From: kzrnm Date: Wed, 1 Apr 2026 01:12:13 +0900 Subject: [PATCH 2/6] MulAdd & BigMul --- .../src/System/Number.BigInteger.cs | 41 ++----------------- .../Numerics/BigIntegerCalculator.PowMod.cs | 19 +++++---- .../Numerics/BigIntegerCalculator.SquMul.cs | 4 +- .../Numerics/BigIntegerCalculator.Utils.cs | 23 +++++++++++ 4 files changed, 39 insertions(+), 48 deletions(-) diff --git a/src/libraries/System.Runtime.Numerics/src/System/Number.BigInteger.cs b/src/libraries/System.Runtime.Numerics/src/System/Number.BigInteger.cs index 1865bf6491eac5..bbf1f420e3ceb9 100644 --- a/src/libraries/System.Runtime.Numerics/src/System/Number.BigInteger.cs +++ b/src/libraries/System.Runtime.Numerics/src/System/Number.BigInteger.cs @@ -545,28 +545,9 @@ static nuint MultiplyAdd(Span bits, nuint multiplier, nuint addValue) { nuint carry = addValue; - if (nint.Size == 8) + for (int i = 0; i < bits.Length; i++) { - for (int i = 0; i < bits.Length; i++) - { - ulong high = Math.BigMul(bits[i], multiplier, out ulong low); - ulong lowWithAdd = low + carry; - bits[i] = (nuint)lowWithAdd; - if (lowWithAdd < low) - { - high++; - } - carry = (nuint)high; - } - } - else - { - for (int i = 0; i < bits.Length; i++) - { - ulong p = (ulong)multiplier * bits[i] + carry; - bits[i] = (uint)p; - carry = (uint)(p >> 32); - } + carry = BigIntegerCalculator.MulAdd(bits[i], multiplier, carry, out bits[i]); } return carry; @@ -1495,23 +1476,9 @@ public void MultiplyPowerOfTen(ReadOnlySpan left, int trailingZeroCount, { nuint multiplier = UInt32PowersOfTen[remainingTrailingZeroCount]; nuint carry = 0; - if (nint.Size == 8) - { - for (int i = 0; i < bits2.Length; i++) - { - UInt128 p = (UInt128)multiplier * bits2[i] + carry; - bits2[i] = (nuint)(ulong)p; - carry = (nuint)(ulong)(p >> 64); - } - } - else + for (int i = 0; i < bits2.Length; i++) { - for (int i = 0; i < bits2.Length; i++) - { - ulong p = (ulong)multiplier * bits2[i] + carry; - bits2[i] = (uint)p; - carry = (uint)(p >> 32); - } + carry = BigIntegerCalculator.MulAdd(bits2[i], multiplier, carry, out bits2[i]); } if (carry != 0) diff --git a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.PowMod.cs b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.PowMod.cs index ae20c61a1eb536..124a2467617478 100644 --- a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.PowMod.cs +++ b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.PowMod.cs @@ -187,12 +187,12 @@ private static nuint PowCore(nuint value, ReadOnlySpan power, nuint modul { if ((p & 1) == 1) { - UInt128 prod = (UInt128)(ulong)result * (ulong)value; + UInt128 prod = Math.BigMul(result, value); result = (nuint)(ulong)(prod % (ulong)modulus); } { - UInt128 sq = (UInt128)(ulong)value * (ulong)value; + UInt128 sq = Math.BigMul(value, value); value = (nuint)(ulong)(sq % (ulong)modulus); } } @@ -231,13 +231,13 @@ private static nuint PowCore(nuint value, nuint power, nuint modulus, nuint resu { if ((power & 1) == 1) { - UInt128 prod = (UInt128)(ulong)result * (ulong)value; + UInt128 prod = Math.BigMul(result, value); result = (nuint)(ulong)(prod % (ulong)modulus); } if (power != 1) { - UInt128 sq = (UInt128)(ulong)value * (ulong)value; + UInt128 sq = Math.BigMul(value, value); value = (nuint)(ulong)(sq % (ulong)modulus); } } @@ -684,19 +684,20 @@ private static int MontgomeryReduce(Span value, ReadOnlySpan modul { nuint m = value[i] * n0inv; nuint carry = 0; + Span v2 = value.Slice(i, k); - for (int j = 0; j < k; j++) + for (int j = 0; j < v2.Length; j++) { if (nint.Size == 8) { - UInt128 p = (UInt128)m * modulus[j] + value[i + j] + carry; - value[i + j] = (nuint)(ulong)p; + UInt128 p = Math.BigMul(m, modulus[j]) + v2[j] + carry; + v2[j] = (nuint)(ulong)p; carry = (nuint)(ulong)(p >> 64); } else { - ulong p = (ulong)m * modulus[j] + value[i + j] + carry; - value[i + j] = (uint)p; + ulong p = (ulong)m * modulus[j] + v2[j] + carry; + v2[j] = (uint)p; carry = (uint)(p >> 32); } } diff --git a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.SquMul.cs b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.SquMul.cs index ee501d657048ab..29a56636d4986f 100644 --- a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.SquMul.cs +++ b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.SquMul.cs @@ -166,7 +166,7 @@ static void Naive(ReadOnlySpan value, Span bits) for (int j = 0; j < i; j++) { UInt128 digit1 = (UInt128)(ulong)bits[i + j] + carry; - UInt128 digit2 = (UInt128)(ulong)value[j] * (ulong)v; + UInt128 digit2 = Math.BigMul(value[j], v); bits[i + j] = (nuint)(ulong)(digit1 + (digit2 << 1)); // We need digit1 + 2*digit2, but that could overflow UInt128. // Instead, compute (digit2 + digit1/2) >> 63 which gives the @@ -174,7 +174,7 @@ static void Naive(ReadOnlySpan value, Span bits) carry = (digit2 + (digit1 >> 1)) >> 63; } - UInt128 digits = (UInt128)(ulong)v * (ulong)v + carry; + UInt128 digits = Math.BigMul(v, v) + carry; bits[i + i] = (nuint)(ulong)digits; bits[i + i + 1] = (nuint)(ulong)(digits >> 64); } diff --git a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs index 024d30f148ee50..b3ccfe7174d961 100644 --- a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs +++ b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs @@ -210,6 +210,29 @@ internal static nuint DivRem(nuint hi, nuint lo, nuint divisor, out nuint remain } } + /// + /// Widening add: left + right + addend -> (return:low). + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static nuint MulAdd(nuint left, nuint right, nuint addend, out nuint low) + { + if (nint.Size == 8) + { + ulong high = Math.BigMul(left, right, out ulong lowUL); + low = (nuint)lowUL + addend; + if (low < addend) + { + high++; + } + return (nuint)high; + } + else + { + ulong p = (ulong)left * right + addend; + low = (uint)p; + return (uint)(p >> 32); + } + } /// /// Multiply by scalar: result[0..left.Length] = left * multiplier. /// Returns the carry out. Unrolled by 4 on 64-bit. From d3b67cda1670066b51608b19235c652d20a4d028 Mon Sep 17 00:00:00 2001 From: kzrnm Date: Wed, 1 Apr 2026 01:20:24 +0900 Subject: [PATCH 3/6] Mul1 --- .../Numerics/BigIntegerCalculator.SquMul.cs | 33 +++++++++++- .../Numerics/BigIntegerCalculator.Utils.cs | 51 ------------------- 2 files changed, 32 insertions(+), 52 deletions(-) diff --git a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.SquMul.cs b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.SquMul.cs index 29a56636d4986f..2fe40950a39d09 100644 --- a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.SquMul.cs +++ b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.SquMul.cs @@ -201,11 +201,42 @@ static void Naive(ReadOnlySpan value, Span bits) } } + /// + /// Multiply by scalar: bits[0..left.Length] = left * right. + /// Returns the carry out. Unrolled by 4 on 64-bit. + /// public static void Multiply(ReadOnlySpan left, nuint right, Span bits) { Debug.Assert(bits.Length == left.Length + 1); - nuint carry = Mul1(bits, left, right); + int i = 0; + nuint carry = 0; + + if (nint.Size == 8) + { + for (; i + 3 < left.Length; i += 4) + { + carry = MulAdd(left[i ], right, carry, out bits[i ]); + carry = MulAdd(left[i + 1], right, carry, out bits[i + 1]); + carry = MulAdd(left[i + 2], right, carry, out bits[i + 2]); + carry = MulAdd(left[i + 3], right, carry, out bits[i + 3]); + } + + for (; i < left.Length; i++) + { + carry = MulAdd(left[i], right, carry, out bits[i]); + } + } + else + { + for (; i < left.Length; i++) + { + ulong product = (ulong)left[i] * right + carry; + bits[i] = (uint)product; + carry = (uint)(product >> 32); + } + } + bits[left.Length] = carry; } diff --git a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs index b3ccfe7174d961..8906b06a6ddbb0 100644 --- a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs +++ b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs @@ -233,57 +233,6 @@ internal static nuint MulAdd(nuint left, nuint right, nuint addend, out nuint lo return (uint)(p >> 32); } } - /// - /// Multiply by scalar: result[0..left.Length] = left * multiplier. - /// Returns the carry out. Unrolled by 4 on 64-bit. - /// Unlike MulAdd1, this writes to result rather than accumulating. - /// - internal static nuint Mul1(Span result, ReadOnlySpan left, nuint multiplier) - { - Debug.Assert(result.Length >= left.Length); - - int length = left.Length; - int i = 0; - nuint carry = 0; - - if (nint.Size == 8) - { - for (; i + 3 < length; i += 4) - { - UInt128 p0 = (UInt128)(ulong)left[i] * (ulong)multiplier + (ulong)carry; - result[i] = (nuint)(ulong)p0; - - UInt128 p1 = (UInt128)(ulong)left[i + 1] * (ulong)multiplier + (ulong)(p0 >> 64); - result[i + 1] = (nuint)(ulong)p1; - - UInt128 p2 = (UInt128)(ulong)left[i + 2] * (ulong)multiplier + (ulong)(p1 >> 64); - result[i + 2] = (nuint)(ulong)p2; - - UInt128 p3 = (UInt128)(ulong)left[i + 3] * (ulong)multiplier + (ulong)(p2 >> 64); - result[i + 3] = (nuint)(ulong)p3; - - carry = (nuint)(ulong)(p3 >> 64); - } - - for (; i < length; i++) - { - UInt128 product = (UInt128)(ulong)left[i] * (ulong)multiplier + (ulong)carry; - result[i] = (nuint)(ulong)product; - carry = (nuint)(ulong)(product >> 64); - } - } - else - { - for (; i < length; i++) - { - ulong product = (ulong)left[i] * multiplier + carry; - result[i] = (uint)product; - carry = (uint)(product >> 32); - } - } - - return carry; - } /// /// Fused multiply-accumulate by scalar: result[0..left.Length] += left * multiplier. From 0efb32078a4d295518e7712fd383fd2dd9712c71 Mon Sep 17 00:00:00 2001 From: kzrnm Date: Wed, 1 Apr 2026 01:39:30 +0900 Subject: [PATCH 4/6] SubMul1 --- .../Numerics/BigIntegerCalculator.DivRem.cs | 69 ++++++++++++++++- .../Numerics/BigIntegerCalculator.Utils.cs | 77 ------------------- 2 files changed, 67 insertions(+), 79 deletions(-) diff --git a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.DivRem.cs b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.DivRem.cs index 119c241460e192..d0af540d2372c4 100644 --- a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.DivRem.cs +++ b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.DivRem.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; +using System.Runtime.CompilerServices; namespace System.Numerics { @@ -282,11 +283,75 @@ private static nuint AddDivisor(Span left, ReadOnlySpan right) return carry; } - private static nuint SubtractDivisor(Span left, ReadOnlySpan right, nuint q) + /// + /// Fused subtract-multiply by scalar: left[0..right.Length] -= right * multiplier. + /// Returns the borrow out. Unrolled by 4 on 64-bit. + /// + private static nuint SubtractDivisor(Span left, ReadOnlySpan right, nuint multiplier) { Debug.Assert(left.Length >= right.Length); - return SubMul1(left, right, q); + int i = 0; + nuint carry = 0; + + if (nint.Size == 8) + { + for (; i + 3 < right.Length; i += 4) + { + carry = SubtractMul(ref left[i], right[i], multiplier, carry); + carry = SubtractMul(ref left[i + 1], right[i + 1], multiplier, carry); + carry = SubtractMul(ref left[i + 2], right[i + 2], multiplier, carry); + carry = SubtractMul(ref left[i + 3], right[i + 3], multiplier, carry); + } + + for (; i < right.Length; i++) + { + carry = SubtractMul(ref left[i], right[i], multiplier, carry); + } + } + else + { + for (; i < right.Length; i++) + { + ulong product = (ulong)right[i] * multiplier + carry; + uint lo = (uint)product; + uint hi = (uint)(product >> 32); + + uint orig = (uint)left[i]; + left[i] = orig - lo; + hi += (orig < lo) ? 1u : 0; + + carry = hi; + } + } + + return carry; + + static nuint SubtractMul(ref nuint left, nuint right, nuint multiplier, nuint addend) + { + if (nint.Size == 8) + { + UInt128 prod = Math.BigMul(right, multiplier) + (ulong)addend; + nuint lo = (nuint)(ulong)prod; + nuint hi = (nuint)(ulong)(prod >> 64); + hi += (left < lo) ? (nuint)1 : 0; + left -= lo; + return hi; + } + else + { + ulong product = (ulong)right * multiplier + addend; + uint lo = (uint)product; + uint hi = (uint)(product >> 32); + + if (left < lo) + { + ++hi; + } + left -= lo; + return hi; + } + } } private static bool DivideGuessTooBig(nuint q, nuint valHi1, nuint valHi0, diff --git a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs index 8906b06a6ddbb0..f974ad0deaa69b 100644 --- a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs +++ b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs @@ -288,82 +288,5 @@ internal static nuint MulAdd1(Span result, ReadOnlySpan left, nuin return carry; } - - /// - /// Fused subtract-multiply by scalar: result[0..right.Length] -= right * multiplier. - /// Returns the borrow out. Unrolled by 4 on 64-bit. - /// - internal static nuint SubMul1(Span result, ReadOnlySpan right, nuint multiplier) - { - Debug.Assert(result.Length >= right.Length); - - int length = right.Length; - int i = 0; - nuint carry = 0; - - if (nint.Size == 8) - { - for (; i + 3 < length; i += 4) - { - UInt128 prod0 = (UInt128)(ulong)right[i] * (ulong)multiplier + (ulong)carry; - nuint lo0 = (nuint)(ulong)prod0; - nuint hi0 = (nuint)(ulong)(prod0 >> 64); - nuint orig0 = result[i]; - result[i] = orig0 - lo0; - hi0 += (orig0 < lo0) ? (nuint)1 : 0; - - UInt128 prod1 = (UInt128)(ulong)right[i + 1] * (ulong)multiplier + (ulong)hi0; - nuint lo1 = (nuint)(ulong)prod1; - nuint hi1 = (nuint)(ulong)(prod1 >> 64); - nuint orig1 = result[i + 1]; - result[i + 1] = orig1 - lo1; - hi1 += (orig1 < lo1) ? (nuint)1 : 0; - - UInt128 prod2 = (UInt128)(ulong)right[i + 2] * (ulong)multiplier + (ulong)hi1; - nuint lo2 = (nuint)(ulong)prod2; - nuint hi2 = (nuint)(ulong)(prod2 >> 64); - nuint orig2 = result[i + 2]; - result[i + 2] = orig2 - lo2; - hi2 += (orig2 < lo2) ? (nuint)1 : 0; - - UInt128 prod3 = (UInt128)(ulong)right[i + 3] * (ulong)multiplier + (ulong)hi2; - nuint lo3 = (nuint)(ulong)prod3; - nuint hi3 = (nuint)(ulong)(prod3 >> 64); - nuint orig3 = result[i + 3]; - result[i + 3] = orig3 - lo3; - hi3 += (orig3 < lo3) ? (nuint)1 : 0; - - carry = hi3; - } - - for (; i < length; i++) - { - UInt128 product = (UInt128)(ulong)right[i] * (ulong)multiplier + (ulong)carry; - nuint lo = (nuint)(ulong)product; - nuint hi = (nuint)(ulong)(product >> 64); - nuint orig = result[i]; - result[i] = orig - lo; - hi += (orig < lo) ? (nuint)1 : 0; - carry = hi; - } - } - else - { - for (; i < length; i++) - { - ulong product = (ulong)right[i] * multiplier + carry; - uint lo = (uint)product; - uint hi = (uint)(product >> 32); - - uint orig = (uint)result[i]; - result[i] = orig - lo; - hi += (orig < lo) ? 1u : 0; - - carry = hi; - } - } - - return carry; - } } } From e1a6debd1dc8c36f42f03d059ebd133fcb90294f Mon Sep 17 00:00:00 2001 From: kzrnm Date: Wed, 1 Apr 2026 02:59:35 +0900 Subject: [PATCH 5/6] MulAdd1 --- .../Numerics/BigIntegerCalculator.SquMul.cs | 59 ++++++++++++++++++- .../Numerics/BigIntegerCalculator.Utils.cs | 55 ----------------- 2 files changed, 56 insertions(+), 58 deletions(-) diff --git a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.SquMul.cs b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.SquMul.cs index 2fe40950a39d09..7c49136091e352 100644 --- a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.SquMul.cs +++ b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.SquMul.cs @@ -216,7 +216,7 @@ public static void Multiply(ReadOnlySpan left, nuint right, Span b { for (; i + 3 < left.Length; i += 4) { - carry = MulAdd(left[i ], right, carry, out bits[i ]); + carry = MulAdd(left[i], right, carry, out bits[i]); carry = MulAdd(left[i + 1], right, carry, out bits[i + 1]); carry = MulAdd(left[i + 2], right, carry, out bits[i + 2]); carry = MulAdd(left[i + 3], right, carry, out bits[i + 3]); @@ -559,9 +559,62 @@ static void Naive(ReadOnlySpan left, ReadOnlySpan right, Span result = bits.Slice(i); + nuint carry = AddProduct(result, left, right[i]); + result[left.Length] = carry; + } + } + + static nuint AddProduct(Span result, ReadOnlySpan left, nuint multiplier) + { + // Fused multiply-accumulate by scalar: result[0..left.Length] += left * multiplier. + // Returns the carry out. Unrolled by 4 on 64-bit to overlap multiply latencies. + Debug.Assert(result.Length >= left.Length); + + int i = 0; + nuint carry = 0; + + if (nint.Size == 8) + { + // Unroll by 4: mulx has 3-5 cycle latency but 1 cycle throughput, + // so issuing 4 multiplies allows the CPU to pipeline them while + // carry chains complete sequentially behind. + for (; i + 3 < left.Length; i += 4) + { + UInt128 p0 = Math.BigMul(left[i], multiplier) + (ulong)result[i] + (ulong)carry; + result[i] = (nuint)(ulong)p0; + + UInt128 p1 = Math.BigMul(left[i + 1], multiplier) + (ulong)result[i + 1] + (ulong)(p0 >> 64); + result[i + 1] = (nuint)(ulong)p1; + + UInt128 p2 = Math.BigMul(left[i + 2], multiplier) + (ulong)result[i + 2] + (ulong)(p1 >> 64); + result[i + 2] = (nuint)(ulong)p2; + + UInt128 p3 = Math.BigMul(left[i + 3], multiplier) + (ulong)result[i + 3] + (ulong)(p2 >> 64); + result[i + 3] = (nuint)(ulong)p3; + + carry = (nuint)(ulong)(p3 >> 64); + } + + for (; i < left.Length; i++) + { + UInt128 product = Math.BigMul(left[i], multiplier) + (ulong)result[i] + (ulong)carry; + result[i] = (nuint)(ulong)product; + carry = (nuint)(ulong)(product >> 64); + } + } + else + { + for (; i < left.Length; i++) + { + ulong product = (ulong)left[i] * multiplier + + result[i] + carry; + result[i] = (uint)product; + carry = (uint)(product >> 32); + } } + + return carry; } } diff --git a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs index f974ad0deaa69b..e5c4ba99ae10c3 100644 --- a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs +++ b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.Utils.cs @@ -233,60 +233,5 @@ internal static nuint MulAdd(nuint left, nuint right, nuint addend, out nuint lo return (uint)(p >> 32); } } - - /// - /// Fused multiply-accumulate by scalar: result[0..left.Length] += left * multiplier. - /// Returns the carry out. Unrolled by 4 on 64-bit to overlap multiply latencies. - /// - internal static nuint MulAdd1(Span result, ReadOnlySpan left, nuint multiplier) - { - Debug.Assert(result.Length >= left.Length); - - int length = left.Length; - int i = 0; - nuint carry = 0; - - if (nint.Size == 8) - { - // Unroll by 4: mulx has 3-5 cycle latency but 1 cycle throughput, - // so issuing 4 multiplies allows the CPU to pipeline them while - // carry chains complete sequentially behind. - for (; i + 3 < length; i += 4) - { - UInt128 p0 = (UInt128)(ulong)left[i] * (ulong)multiplier + (ulong)result[i] + (ulong)carry; - result[i] = (nuint)(ulong)p0; - - UInt128 p1 = (UInt128)(ulong)left[i + 1] * (ulong)multiplier + (ulong)result[i + 1] + (ulong)(p0 >> 64); - result[i + 1] = (nuint)(ulong)p1; - - UInt128 p2 = (UInt128)(ulong)left[i + 2] * (ulong)multiplier + (ulong)result[i + 2] + (ulong)(p1 >> 64); - result[i + 2] = (nuint)(ulong)p2; - - UInt128 p3 = (UInt128)(ulong)left[i + 3] * (ulong)multiplier + (ulong)result[i + 3] + (ulong)(p2 >> 64); - result[i + 3] = (nuint)(ulong)p3; - - carry = (nuint)(ulong)(p3 >> 64); - } - - for (; i < length; i++) - { - UInt128 product = (UInt128)(ulong)left[i] * (ulong)multiplier + (ulong)result[i] + (ulong)carry; - result[i] = (nuint)(ulong)product; - carry = (nuint)(ulong)(product >> 64); - } - } - else - { - for (; i < length; i++) - { - ulong product = (ulong)left[i] * multiplier - + result[i] + carry; - result[i] = (uint)product; - carry = (uint)(product >> 32); - } - } - - return carry; - } } } From e612f5d603f26375382719c7f74c2b6355fe26b3 Mon Sep 17 00:00:00 2001 From: kzrnm Date: Sat, 4 Apr 2026 04:34:53 +0900 Subject: [PATCH 6/6] Update comment --- .../src/System/Numerics/BigIntegerCalculator.DivRem.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.DivRem.cs b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.DivRem.cs index d0af540d2372c4..f0c8366a262eb9 100644 --- a/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.DivRem.cs +++ b/src/libraries/System.Runtime.Numerics/src/System/Numerics/BigIntegerCalculator.DivRem.cs @@ -284,7 +284,7 @@ private static nuint AddDivisor(Span left, ReadOnlySpan right) } /// - /// Fused subtract-multiply by scalar: left[0..right.Length] -= right * multiplier. + /// Subtract the product of right and multiplier from left: left[0..right.Length] -= right * multiplier. /// Returns the borrow out. Unrolled by 4 on 64-bit. /// private static nuint SubtractDivisor(Span left, ReadOnlySpan right, nuint multiplier)