diff --git a/src/libraries/System.IO.Hashing/src/System.IO.Hashing.csproj b/src/libraries/System.IO.Hashing/src/System.IO.Hashing.csproj index 0a9790f49767db..1bd6ca3117b89c 100644 --- a/src/libraries/System.IO.Hashing/src/System.IO.Hashing.csproj +++ b/src/libraries/System.IO.Hashing/src/System.IO.Hashing.csproj @@ -33,6 +33,7 @@ System.IO.Hashing.XxHash32 + diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.Vectorized.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.Vectorized.cs new file mode 100644 index 00000000000000..88d064912d9d41 --- /dev/null +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.Vectorized.cs @@ -0,0 +1,389 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; + +namespace System.IO.Hashing; + +public sealed partial class Adler32 +{ + private static bool IsVectorizable(ReadOnlySpan source) + => Vector128.IsHardwareAccelerated && source.Length >= Vector128.Count; + + private static uint UpdateVectorized(uint adler, ReadOnlySpan source) + => Adler32Simd.UpdateVectorized(adler, source); +} + +file static class Adler32Simd +{ + // VMax represents the maximum number of 16-byte vectors we can process before reducing + // mod 65521. This is analogous to NMax in the scalar code, however because the accumulated + // values are distributed across vector elements, we can process more bytes before possible + // overflow in any individual element. For this implementation, the max is actually 460 + // vectors, but we choose 448, because it divides evenly by any reasonable block size. + public const uint VMax = 448; + + private static ReadOnlySpan MaskBytes => [ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + ]; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static uint UpdateVectorized(uint adler, ReadOnlySpan source) + { + if (Vector256.IsHardwareAccelerated && Avx2.IsSupported) + { + return UpdateCore(adler, source); + } + + if (Ssse3.IsSupported) + { + return UpdateCore(adler, source); + } + + if (AdvSimd.Arm64.IsSupported) + { + if (Dp.IsSupported) + { + return UpdateCore(adler, source); + } + + return UpdateCore(adler, source); + } + + return UpdateCore(adler, source); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static uint UpdateCore(uint adler, ReadOnlySpan source) + where TSimdStrategy : struct, ISimdStrategy + where TAccumulate : struct, ISimdAccumulate + where TDotProduct : struct, ISimdDotProduct + { + Debug.Assert(source.Length >= Vector128.Count); + + ref byte bufRef = ref MemoryMarshal.GetReference(source); + uint totalLength = (uint)source.Length; + uint totalVectors = totalLength / (uint)Vector128.Count; + + uint loopVectors = totalVectors & ~1u; + uint tailVectors = totalVectors - loopVectors; + uint tailLength = totalLength - totalVectors * (uint)Vector128.Count; + + uint s1 = (ushort)adler; + uint s2 = adler >>> 16; + + Vector128 vs1 = Vector128.CreateScalar(s1); + Vector128 vs2 = Vector128.CreateScalar(s2); + + (vs1, vs2) = TSimdStrategy.VectorLoop(vs1, vs2, ref bufRef, loopVectors); + bufRef = ref Unsafe.Add(ref bufRef, loopVectors * (uint)Vector128.Count); + + Vector128 weights = Vector128.CreateSequence((byte)16, unchecked((byte)-1)); + + if (tailVectors != 0) + { + Debug.Assert(tailVectors == 1); + + Vector128 bytes = Vector128.LoadUnsafe(ref bufRef); + bufRef = ref Unsafe.Add(ref bufRef, (uint)Vector128.Count); + + Vector128 vps = vs1; + + vs1 = TAccumulate.Accumulate(vs1, bytes); + vs2 = TDotProduct.DotProduct(vs2, bytes, weights); + + vs2 += vps << 4; + } + + if (tailLength != 0) + { + Debug.Assert(tailLength < (uint)Vector128.Count); + + Vector128 bytes = Vector128.LoadUnsafe(ref Unsafe.Subtract(ref bufRef, (uint)Vector128.Count - tailLength)); + bytes &= Vector128.LoadUnsafe(ref MemoryMarshal.GetReference(MaskBytes), tailLength); + + Vector128 vps = vs1; + + vs1 = TAccumulate.Accumulate(vs1, bytes); + vs2 = TDotProduct.DotProduct(vs2, bytes, weights); + + vs2 += vps * Vector128.Create(tailLength); + } + + s1 = Vector128.Sum(vs1) % Adler32.ModBase; + s2 = Vector128.Sum(vs2) % Adler32.ModBase; + + return s1 | (s2 << 16); + } +} + +file struct AdlerVector128 : ISimdStrategy +{ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128 QuickModBase(Vector128 values) + { + // Calculating the residual mod 65521 is impractical in SIMD, however we can reduce by + // enough to prevent overflow without changing the final result of a modulo performed later. + // + // Essentially, the high word of the accumulator represents the number of times it has + // wrapped to 65536. + // 65536 % 65521 = 15, which is what would be carried forward from the high word. + // We can simply multiply the high word by 15 and add that to the low word to perform + // the reduction, resulting in a maximum possible residual of 0xFFFF0. + // + // This is further optimized to: `high * 16 - high + low` + // and implemented as: `(high << 4) + (low - high)`. + + Vector128 vlo = values & (Vector128.AllBitsSet >>> 16); + Vector128 vhi = values >>> 16; + return (vhi << 4) + (vlo - vhi); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (Vector128 vs1, Vector128 vs2) VectorLoop(Vector128 vs1, Vector128 vs2, ref byte sourceRef, uint vectors) + where TAccumulate : struct, ISimdAccumulate + where TDotProduct : struct, ISimdDotProduct + { + Debug.Assert(uint.IsEvenInteger(vectors)); + + const uint blockSize = 2; + + Vector128 weights1 = Vector128.CreateSequence((byte)32, unchecked((byte)-1)); + Vector128 weights2 = Vector128.CreateSequence((byte)16, unchecked((byte)-1)); + + while (vectors >= blockSize) + { + Vector128 vs3 = Vector128.Zero; + Vector128 vps = Vector128.Zero; + + uint blocks = uint.Min(vectors, Adler32Simd.VMax) / blockSize; + vectors -= blocks * blockSize; + + do + { + Vector128 bytes1 = Vector128.LoadUnsafe(ref sourceRef); + Vector128 bytes2 = Vector128.LoadUnsafe(ref sourceRef, (uint)Vector128.Count); + sourceRef = ref Unsafe.Add(ref sourceRef, (uint)Vector128.Count * 2); + + vps += vs1; + + vs1 = TAccumulate.Accumulate(vs1, bytes1, bytes2); + vs2 = TDotProduct.DotProduct(vs2, bytes1, weights1); + vs3 = TDotProduct.DotProduct(vs3, bytes2, weights2); + } + while (--blocks != 0); + + vs2 += vps << 5; + vs2 += vs3; + + vs1 = QuickModBase(vs1); + vs2 = QuickModBase(vs2); + } + + return (vs1, vs2); + } +} + +file struct AdlerVector256 : ISimdStrategy +{ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 QuickModBase(Vector256 values) + { + Vector256 vlo = values & (Vector256.AllBitsSet >>> 16); + Vector256 vhi = values >>> 16; + return (vhi << 4) + (vlo - vhi); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Accumulate(Vector256 sums, Vector256 bytes) + => Avx2.SumAbsoluteDifferences(bytes, Vector256.Zero).AsUInt32() + sums; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Accumulate(Vector256 sums, Vector256 bytes1, Vector256 bytes2) + { + Vector256 zero = Vector256.Zero; + Vector256 sad = Avx2.SumAbsoluteDifferences(bytes1, zero).AsUInt32(); + return sad + Avx2.SumAbsoluteDifferences(bytes2, zero).AsUInt32() + sums; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 DotProduct(Vector256 addend, Vector256 left, Vector256 right) + { + Vector256 mad = Avx2.MultiplyAddAdjacent(left, right.AsSByte()); + return Avx2.MultiplyAddAdjacent(mad, Vector256.One).AsUInt32() + addend; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (Vector128 vs1, Vector128 vs2) VectorLoop(Vector128 vs1, Vector128 vs2, ref byte sourceRef, uint vectors) + where TAccumulate : struct, ISimdAccumulate + where TDotProduct : struct, ISimdDotProduct + { + Debug.Assert(uint.IsEvenInteger(vectors)); + + const uint blockSize = 4; + + Vector256 weights1 = Vector256.CreateSequence((byte)64, unchecked((byte)-1)); + Vector256 weights2 = Vector256.CreateSequence((byte)32, unchecked((byte)-1)); + + Vector256 ws1 = vs1.ToVector256Unsafe(); + Vector256 ws2 = vs2.ToVector256Unsafe(); + + while (vectors >= blockSize) + { + Vector256 ws3 = Vector256.Zero; + Vector256 wps = Vector256.Zero; + + uint blocks = uint.Min(vectors, Adler32Simd.VMax) / blockSize; + vectors -= blocks * blockSize; + + do + { + Vector256 bytes1 = Vector256.LoadUnsafe(ref sourceRef); + Vector256 bytes2 = Vector256.LoadUnsafe(ref sourceRef, (uint)Vector256.Count); + sourceRef = ref Unsafe.Add(ref sourceRef, (uint)Vector256.Count * 2); + + wps += ws1; + + ws1 = Accumulate(ws1, bytes1, bytes2); + ws2 = DotProduct(ws2, bytes1, weights1); + ws3 = DotProduct(ws3, bytes2, weights2); + } + while (--blocks != 0); + + ws2 += wps << 6; + ws2 += ws3; + + ws1 = QuickModBase(ws1); + ws2 = QuickModBase(ws2); + } + + if (vectors != 0) + { + Debug.Assert(vectors == 2); + + Vector256 bytes = Vector256.LoadUnsafe(ref sourceRef); + Vector256 wps = ws1; + + ws1 = Accumulate(ws1, bytes); + ws2 = DotProduct(ws2, bytes, weights2); + + ws2 += wps << 5; + } + + vs1 = ws1.GetLower() + ws1.GetUpper(); + vs2 = ws2.GetLower() + ws2.GetUpper(); + + return (vs1, vs2); + } +} + +file struct AccumulateX86 : ISimdAccumulate +{ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Accumulate(Vector128 sums, Vector128 bytes) + => Sse2.SumAbsoluteDifferences(bytes, Vector128.Zero).AsUInt32() + sums; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Accumulate(Vector128 sums, Vector128 bytes1, Vector128 bytes2) + { + Vector128 zero = Vector128.Zero; + Vector128 sad = Sse2.SumAbsoluteDifferences(bytes1, zero).AsUInt32(); + return sad + Sse2.SumAbsoluteDifferences(bytes2, zero).AsUInt32() + sums; + } +} + +file struct AccumulateArm64 : ISimdAccumulate +{ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Accumulate(Vector128 sums, Vector128 bytes) + => AdvSimd.Arm64.AddAcrossWidening(bytes).AsUInt32().ToVector128Unsafe() + sums; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Accumulate(Vector128 sums, Vector128 bytes1, Vector128 bytes2) + => AdvSimd.AddPairwiseWideningAndAdd(sums, AdvSimd.AddPairwiseWideningAndAdd(AdvSimd.AddPairwiseWidening(bytes1), bytes2)); +} + +file struct AccumulateXplat : ISimdAccumulate +{ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Accumulate(Vector128 sums, Vector128 bytes) + { + (Vector128 bl, Vector128 bh) = Vector128.Widen(bytes); + (Vector128 sl, Vector128 sh) = Vector128.Widen(bl + bh); + return sums + sl + sh; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Accumulate(Vector128 sums, Vector128 bytes1, Vector128 bytes2) + { + (Vector128 b1l, Vector128 b1h) = Vector128.Widen(bytes1); + (Vector128 b2l, Vector128 b2h) = Vector128.Widen(bytes2); + (Vector128 sl, Vector128 sh) = Vector128.Widen((b1l + b1h) + (b2l + b2h)); + return sums + sl + sh; + } +} + +file struct DotProductX86 : ISimdDotProduct +{ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 DotProduct(Vector128 addend, Vector128 left, Vector128 right) + { + Vector128 mad = Ssse3.MultiplyAddAdjacent(left, right.AsSByte()); + return Sse2.MultiplyAddAdjacent(mad, Vector128.One).AsUInt32() + addend; + } +} + +file struct DotProductArm64 : ISimdDotProduct +{ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 DotProduct(Vector128 addend, Vector128 left, Vector128 right) + { + Vector128 mad = AdvSimd.MultiplyWideningLower(left.GetLower(), right.GetLower()); + mad = AdvSimd.MultiplyWideningUpperAndAdd(mad, left, right); + return AdvSimd.AddPairwiseWideningAndAdd(addend, mad); + } +} + +file struct DotProductArm64Dp : ISimdDotProduct +{ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 DotProduct(Vector128 addend, Vector128 left, Vector128 right) + => Dp.DotProduct(addend, left, right); +} + +file struct DotProductXplat : ISimdDotProduct +{ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 DotProduct(Vector128 addend, Vector128 left, Vector128 right) + { + (Vector128 ll, Vector128 lh) = Vector128.Widen(left); + (Vector128 rl, Vector128 rh) = Vector128.Widen(right); + (Vector128 ml, Vector128 mh) = Vector128.Widen(ll * rl + lh * rh); + return addend + ml + mh; + } +} + +file interface ISimdAccumulate +{ + static abstract Vector128 Accumulate(Vector128 sums, Vector128 bytes); + + static abstract Vector128 Accumulate(Vector128 sums, Vector128 bytes1, Vector128 bytes2); +} + +file interface ISimdDotProduct +{ + static abstract Vector128 DotProduct(Vector128 addend, Vector128 left, Vector128 right); +} + +file interface ISimdStrategy +{ + static abstract (Vector128 vs1, Vector128 vs2) VectorLoop(Vector128 vs1, Vector128 vs2, ref byte sourceRef, uint vectors) + where TAccumulate : struct, ISimdAccumulate + where TDotProduct : struct, ISimdDotProduct; +} diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.cs index da54cdb3372b19..e6e9956d56c190 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.cs @@ -5,11 +5,6 @@ using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -#if NET -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; -using System.Runtime.Intrinsics.X86; -#endif namespace System.IO.Hashing { @@ -30,7 +25,7 @@ public sealed partial class Adler32 : NonCryptographicHashAlgorithm private uint _adler = InitialState; /// Largest prime smaller than 65536. - private const uint ModBase = 65521; + internal const uint ModBase = 65521; /// NMax is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 private const int NMax = 5552; @@ -192,21 +187,9 @@ private static uint Update(uint adler, ReadOnlySpan source) } #if NET - if (BitConverter.IsLittleEndian && - Vector128.IsHardwareAccelerated && - source.Length >= Vector128.Count * 2) + if (IsVectorizable(source)) { - if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && source.Length >= Vector512.Count) - { - return UpdateVector512(adler, source); - } - - if (Vector256.IsHardwareAccelerated && Avx2.IsSupported && source.Length >= Vector256.Count) - { - return UpdateVector256(adler, source); - } - - return UpdateVector128(adler, source); + return UpdateVectorized(adler, source); } #endif @@ -236,250 +219,5 @@ private static uint UpdateScalar(uint adler, ReadOnlySpan source) return (s2 << 16) | s1; } - -#if NET - [MethodImpl(MethodImplOptions.NoInlining)] - private static uint UpdateVector128(uint adler, ReadOnlySpan source) - { - Debug.Assert(source.Length >= Vector128.Count * 2); - - const int BlockSize = 32; // two Vector128 loads - - uint s1 = adler & 0xFFFF; - uint s2 = (adler >> 16) & 0xFFFF; - - ref byte sourceRef = ref MemoryMarshal.GetReference(source); - int length = source.Length; - - Vector128 tap1 = Vector128.Create(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17); - Vector128 tap2 = Vector128.Create(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); - - do - { - int n = Math.Min(length, NMax); - int blocks = n / BlockSize; - n = blocks * BlockSize; - length -= n; - - Vector128 vs1 = Vector128.Zero; - Vector128 vs2 = Vector128.CreateScalar(s2); - Vector128 vps = Vector128.CreateScalar(s1 * (uint)blocks); - - do - { - Vector128 bytes1 = Vector128.LoadUnsafe(ref sourceRef); - Vector128 bytes2 = Vector128.LoadUnsafe(ref sourceRef, 16); - sourceRef = ref Unsafe.Add(ref sourceRef, BlockSize); - - vps += vs1; - - if (Ssse3.IsSupported) - { - vs1 += Sse2.SumAbsoluteDifferences(bytes1, Vector128.Zero).AsUInt32(); - vs1 += Sse2.SumAbsoluteDifferences(bytes2, Vector128.Zero).AsUInt32(); - - vs2 += Sse2.MultiplyAddAdjacent(Ssse3.MultiplyAddAdjacent(bytes1, tap1), Vector128.One).AsUInt32(); - vs2 += Sse2.MultiplyAddAdjacent(Ssse3.MultiplyAddAdjacent(bytes2, tap2), Vector128.One).AsUInt32(); - } - else if (AdvSimd.IsSupported) - { - // Widening byte sum (equivalent of SumAbsoluteDifferences against zero) - vs1 = AdvSimd.AddPairwiseWideningAndAdd( - vs1, - AdvSimd.AddPairwiseWideningAndAdd( - AdvSimd.AddPairwiseWidening(bytes1), - bytes2)); - - // Widening multiply + horizontal add (equivalent of MultiplyAddAdjacent chain). - // Because weights are all positive (1-32), unsigned byte * unsigned byte multiply is valid. - Vector128 wprod1 = AdvSimd.MultiplyWideningLower(bytes1.GetLower(), tap1.AsByte().GetLower()); - wprod1 = AdvSimd.MultiplyWideningUpperAndAdd(wprod1, bytes1, tap1.AsByte()); - vs2 = AdvSimd.AddPairwiseWideningAndAdd(vs2, wprod1); - - Vector128 wprod2 = AdvSimd.MultiplyWideningLower(bytes2.GetLower(), tap2.AsByte().GetLower()); - wprod2 = AdvSimd.MultiplyWideningUpperAndAdd(wprod2, bytes2, tap2.AsByte()); - vs2 = AdvSimd.AddPairwiseWideningAndAdd(vs2, wprod2); - } - else - { - (Vector128 lo1, Vector128 hi1) = Vector128.Widen(bytes1); - (Vector128 lo2, Vector128 hi2) = Vector128.Widen(bytes2); - (Vector128 sumLo, Vector128 sumHi) = Vector128.Widen(lo1 + hi1 + lo2 + hi2); - vs1 += sumLo + sumHi; - vs2 += WeightedSumWidening128(bytes1, tap1) + WeightedSumWidening128(bytes2, tap2); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static Vector128 WeightedSumWidening128(Vector128 data, Vector128 weights) - { - (Vector128 dLo, Vector128 dHi) = Vector128.Widen(data); - (Vector128 wLo, Vector128 wHi) = Vector128.Widen(weights); - - (Vector128 pLo1, Vector128 pHi1) = Vector128.Widen(dLo.AsInt16() * wLo); - (Vector128 pLo2, Vector128 pHi2) = Vector128.Widen(dHi.AsInt16() * wHi); - - return (pLo1 + pHi1 + pLo2 + pHi2).AsUInt32(); - } - } - } - while (--blocks > 0); - - vs2 += vps << 5; - - s1 += Vector128.Sum(vs1); - s2 = Vector128.Sum(vs2); - - s1 %= ModBase; - s2 %= ModBase; - } - while (length >= BlockSize); - - if (length > 0) - { - UpdateScalarTail(ref sourceRef, length, ref s1, ref s2); - } - - return (s2 << 16) | s1; - } - - [MethodImpl(MethodImplOptions.NoInlining)] - private static uint UpdateVector256(uint adler, ReadOnlySpan source) - { - Debug.Assert(source.Length >= Vector256.Count); - - const int BlockSize = 32; - - uint s1 = adler & 0xFFFF; - uint s2 = (adler >> 16) & 0xFFFF; - - ref byte sourceRef = ref MemoryMarshal.GetReference(source); - int length = source.Length; - - Vector256 weights = Vector256.Create(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); - - do - { - int n = Math.Min(length, NMax); - int blocks = n / BlockSize; - n = blocks * BlockSize; - length -= n; - - Vector256 vs1 = Vector256.CreateScalar(s1); - Vector256 vs2 = Vector256.CreateScalar(s2); - Vector256 vs3 = Vector256.Zero; - - do - { - Vector256 data = Vector256.LoadUnsafe(ref sourceRef); - sourceRef = ref Unsafe.Add(ref sourceRef, BlockSize); - - Vector256 vs1_0 = vs1; - vs1 += Avx2.SumAbsoluteDifferences(data, Vector256.Zero).AsUInt32(); - vs3 += vs1_0; - - Vector256 mad = Avx2.MultiplyAddAdjacent(data, weights); - vs2 += Avx2.MultiplyAddAdjacent(mad, Vector256.One).AsUInt32(); - } - while (--blocks > 0); - - vs3 <<= 5; - vs2 += vs3; - - s1 = (uint)Vector256.Sum(vs1.AsUInt64()); // SumAbsoluteDifferences stores the results in the even lanes - s2 = Vector256.Sum(vs2); - - s1 %= ModBase; - s2 %= ModBase; - } - while (length >= BlockSize); - - if (length > 0) - { - UpdateScalarTail(ref sourceRef, length, ref s1, ref s2); - } - - return (s2 << 16) | s1; - } - - [MethodImpl(MethodImplOptions.NoInlining)] - private static uint UpdateVector512(uint adler, ReadOnlySpan source) - { - Debug.Assert(source.Length >= Vector512.Count); - - const int BlockSize = 64; - - uint s1 = adler & 0xFFFF; - uint s2 = (adler >> 16) & 0xFFFF; - - ref byte sourceRef = ref MemoryMarshal.GetReference(source); - int length = source.Length; - - Vector512 weights = Vector512.Create( - 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, - 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); - - do - { - int n = Math.Min(length, NMax); - int blocks = n / BlockSize; - n = blocks * BlockSize; - length -= n; - - Vector512 vs1 = Vector512.CreateScalar(s1); - Vector512 vs2 = Vector512.CreateScalar(s2); - Vector512 vs3 = Vector512.Zero; - - do - { - Vector512 data = Vector512.LoadUnsafe(ref sourceRef); - sourceRef = ref Unsafe.Add(ref sourceRef, BlockSize); - - Vector512 vs1_0 = vs1; - vs1 += Avx512BW.SumAbsoluteDifferences(data, Vector512.Zero).AsUInt32(); - vs3 += vs1_0; - vs2 += Avx512BW.MultiplyAddAdjacent(Avx512BW.MultiplyAddAdjacent(data, weights), Vector512.One).AsUInt32(); - - Vector256 sumLo = Avx2.SumAbsoluteDifferences(data.GetLower(), Vector256.Zero).AsUInt32(); - vs2 += Vector512.Create(sumLo << 5, Vector256.Zero); - } - while (--blocks > 0); - - vs3 <<= 6; - vs2 += vs3; - - s1 = (uint)Vector512.Sum(vs1.AsUInt64()); - s2 = Vector512.Sum(vs2); - - s1 %= ModBase; - s2 %= ModBase; - } - while (length >= BlockSize); - - if (length >= Vector256.Count) - { - return UpdateVector256((s2 << 16) | s1, MemoryMarshal.CreateReadOnlySpan(ref sourceRef, length)); - } - - if (length > 0) - { - UpdateScalarTail(ref sourceRef, length, ref s1, ref s2); - } - - return (s2 << 16) | s1; - } - - private static void UpdateScalarTail(ref byte sourceRef, int length, ref uint s1, ref uint s2) - { - Debug.Assert(length is > 0 and < NMax); - - foreach (byte b in MemoryMarshal.CreateReadOnlySpan(ref sourceRef, length)) - { - s1 += b; - s2 += s1; - } - - s1 %= ModBase; - s2 %= ModBase; - } -#endif } } diff --git a/src/libraries/System.IO.Hashing/tests/Adler32Tests.cs b/src/libraries/System.IO.Hashing/tests/Adler32Tests.cs index 12ec92693d3d51..bc365c9b2ffeee 100644 --- a/src/libraries/System.IO.Hashing/tests/Adler32Tests.cs +++ b/src/libraries/System.IO.Hashing/tests/Adler32Tests.cs @@ -152,34 +152,55 @@ public void VerifyHashToUInt32(TestCase testCase) } [Theory] - [InlineData(5553, 0xAA40476Bu)] - [InlineData(11104, 0xA2778E87u)] - public void LargeInput_ExceedsNMax(int length, uint expected) + [InlineData(5553)] + [InlineData(11104)] + [InlineData(65536)] + public void LargeInput_ExceedsNMax(int length) { - byte[] data = new byte[length]; - for (int i = 0; i < data.Length; i++) - { - data[i] = (byte)('a' + (i % 26)); - } - - Assert.Equal(expected, Adler32.HashToUInt32(data)); + // This test ensures that Adler32 optimizations involving delayed modulo + // do not overflow a 32-bit intermediate at any point. var alg = new Adler32(); + + // The maximum possible value of an Adler32 checksum is 0xFFF0FFF0, + // which has both components just below the modulo value (0xFFF0 == 65520). + // A sequence of 65519 ones will generate this value. + + byte[] primer = new byte[65519]; + primer.AsSpan().Fill(1); + alg.Append(primer); + + Assert.Equal(0xFFF0FFF0, alg.GetCurrentHashAsUInt32()); + + // Starting from an already-maxed checksum, a stream of 5553 max value + // bytes will overflow if not reduced by mod 65521 before the last byte. + // Of course, once overflowed, the result will be incorrect for any larger + // input as well. + + byte[] data = new byte[length]; + data.AsSpan().Fill(byte.MaxValue); alg.Append(data); + + uint expected = ReferenceAdler32(data, 0xFFF0FFF0); Assert.Equal(expected, alg.GetCurrentHashAsUInt32()); } /// - /// Tests a wide variety of lengths to exercise scalar, Vector128, Vector256, and Vector512 + /// Tests a wide variety of lengths to exercise scalar, Vector128, and Vector256 /// code paths as well as their transitions and tail handling. /// [Theory] [InlineData(1)] [InlineData(2)] [InlineData(7)] + [InlineData(8)] + [InlineData(9)] [InlineData(15)] [InlineData(16)] [InlineData(17)] + [InlineData(23)] + [InlineData(24)] + [InlineData(25)] [InlineData(31)] [InlineData(32)] [InlineData(33)] @@ -223,25 +244,6 @@ public void VariousLengths_MatchesReference(int length) Assert.Equal(expected, alg.GetCurrentHashAsUInt32()); } - /// - /// Tests with all-0xFF bytes, which maximizes accumulator values and stresses - /// overflow-safe behavior in the vectorized paths. - /// - [Theory] - [InlineData(32)] - [InlineData(64)] - [InlineData(128)] - [InlineData(256)] - [InlineData(5552)] - [InlineData(5553)] - public void AllMaxBytes_MatchesReference(int length) - { - byte[] data = new byte[length]; - data.AsSpan().Fill(0xFF); - - Assert.Equal(ReferenceAdler32(data), Adler32.HashToUInt32(data)); - } - /// /// Tests incremental appending with various chunk sizes to verify that the /// vectorized paths produce the same result regardless of how data is fed in.