diff --git a/src/libraries/System.IO.Hashing/src/System.IO.Hashing.csproj b/src/libraries/System.IO.Hashing/src/System.IO.Hashing.csproj
index 06555e0e92fc41..14970fa6744bf5 100644
--- a/src/libraries/System.IO.Hashing/src/System.IO.Hashing.csproj
+++ b/src/libraries/System.IO.Hashing/src/System.IO.Hashing.csproj
@@ -37,7 +37,10 @@ System.IO.Hashing.XxHash32
+
+
+
diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.Table.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.Table.cs
index ad5970a1fddf23..db156dde0054e0 100644
--- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.Table.cs
+++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.Table.cs
@@ -2,6 +2,9 @@
// The .NET Foundation licenses this file to you under the MIT license.
using System.Diagnostics;
+#if NET
+using System.Runtime.Intrinsics;
+#endif
namespace System.IO.Hashing
{
@@ -59,18 +62,33 @@ private static uint[] GenerateLookupTable(uint polynomial, bool reflectInput)
return table;
}
- private sealed class ReflectedTableBasedCrc32 : Crc32ParameterSet
+ private sealed partial class ReflectedTableBasedCrc32 : Crc32ParameterSet
{
private readonly uint[] _lookupTable;
+ partial void InitializeVectorized();
+
internal ReflectedTableBasedCrc32(uint polynomial, uint initialValue, uint finalXorValue)
: base(polynomial, initialValue, finalXorValue, reflectValues: true)
{
_lookupTable = GenerateLookupTable(polynomial, reflectInput: true);
+ InitializeVectorized();
}
internal override uint Update(uint value, ReadOnlySpan source)
{
+#if NET
+ if (_canVectorize && source.Length >= Vector128.Count)
+ {
+ return UpdateVectorized(value, source);
+ }
+#endif
+
+ return UpdateScalar(value, source);
+ }
+
+ private uint UpdateScalar(uint value, ReadOnlySpan source)
+ {
uint[] lookupTable = _lookupTable;
uint crc = value;
@@ -86,18 +104,33 @@ internal override uint Update(uint value, ReadOnlySpan source)
}
}
- private sealed class ForwardTableBasedCrc32 : Crc32ParameterSet
+ private sealed partial class ForwardTableBasedCrc32 : Crc32ParameterSet
{
private readonly uint[] _lookupTable;
+ partial void InitializeVectorized();
+
internal ForwardTableBasedCrc32(uint polynomial, uint initialValue, uint finalXorValue)
: base(polynomial, initialValue, finalXorValue, reflectValues: false)
{
_lookupTable = GenerateLookupTable(polynomial, reflectInput: false);
+ InitializeVectorized();
}
internal override uint Update(uint value, ReadOnlySpan source)
{
+#if NET
+ if (_canVectorize && source.Length >= Vector128.Count)
+ {
+ return UpdateVectorized(value, source);
+ }
+#endif
+
+ return UpdateScalar(value, source);
+ }
+
+ private uint UpdateScalar(uint value, ReadOnlySpan source)
+ {
uint[] lookupTable = _lookupTable;
uint crc = value;
diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.Vectorized.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.Vectorized.cs
new file mode 100644
index 00000000000000..b38bc0a93905f1
--- /dev/null
+++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.Vectorized.cs
@@ -0,0 +1,277 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#if NET
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using static System.IO.Hashing.VectorHelper;
+
+namespace System.IO.Hashing
+{
+ public partial class Crc32ParameterSet
+ {
+ private partial class ReflectedTableBasedCrc32
+ {
+ // Precomputed constants for PCLMULQDQ-based folding.
+ private bool _canVectorize;
+ private ulong _k1, _k2; // 4-way fold constants
+ private ulong _k3, _k4; // 1-way fold constants
+ private ulong _k5; // 128-to-64 fold constant
+ private ulong _pStar, _mu; // Barrett reduction constants
+
+ partial void InitializeVectorized()
+ {
+ if (!BitConverter.IsLittleEndian || !VectorHelper.IsSupported)
+ return;
+
+ ulong polynomial = Polynomial;
+ CrcPolynomialHelper.UInt640 fullPoly = new((1UL << 32) | polynomial);
+ int polyDeg = 32;
+
+ // Reflected folding constants: reverse_bits(x^power mod fullPoly, polyDeg+1)
+ _k1 = CrcPolynomialHelper.ReverseBits(
+ CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 4 * 128 + polyDeg), polyDeg + 1);
+ _k2 = CrcPolynomialHelper.ReverseBits(
+ CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 4 * 128 - polyDeg), polyDeg + 1);
+ _k3 = CrcPolynomialHelper.ReverseBits(
+ CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 128 + polyDeg), polyDeg + 1);
+ _k4 = CrcPolynomialHelper.ReverseBits(
+ CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 128 - polyDeg), polyDeg + 1);
+ _k5 = CrcPolynomialHelper.ReverseBits(
+ CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 2 * polyDeg), polyDeg + 1);
+
+ // Barrett reduction constants
+ _pStar = CrcPolynomialHelper.ReverseBits((1UL << polyDeg) | polynomial, polyDeg + 1);
+ _mu = CrcPolynomialHelper.ReverseBits(
+ CrcPolynomialHelper.ComputeBarrettConstant(fullPoly, 2 * polyDeg), polyDeg + 1);
+
+ _canVectorize = true;
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private uint UpdateVectorized(uint crc, ReadOnlySpan source)
+ {
+ Debug.Assert(_canVectorize);
+ Debug.Assert(source.Length >= Vector128.Count);
+
+ ref byte srcRef = ref MemoryMarshal.GetReference(source);
+ int length = source.Length;
+
+ Vector128 kConstants;
+ Vector128 x1;
+ Vector128 x2;
+
+ if (length >= Vector128.Count * 8)
+ {
+ x1 = Vector128.LoadUnsafe(ref srcRef).AsUInt64();
+ x2 = Vector128.LoadUnsafe(ref srcRef, 16).AsUInt64();
+ Vector128 x3 = Vector128.LoadUnsafe(ref srcRef, 32).AsUInt64();
+ Vector128 x4 = Vector128.LoadUnsafe(ref srcRef, 48).AsUInt64();
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4);
+ length -= Vector128.Count * 4;
+
+ x1 ^= Vector128.CreateScalar(crc).AsUInt64();
+
+ kConstants = Vector128.Create(_k1, _k2);
+
+ do
+ {
+ Vector128 y5 = Vector128.LoadUnsafe(ref srcRef).AsUInt64();
+ Vector128 y6 = Vector128.LoadUnsafe(ref srcRef, 16).AsUInt64();
+ Vector128 y7 = Vector128.LoadUnsafe(ref srcRef, 32).AsUInt64();
+ Vector128 y8 = Vector128.LoadUnsafe(ref srcRef, 48).AsUInt64();
+
+ x1 = FoldPolynomialPair(y5, x1, kConstants);
+ x2 = FoldPolynomialPair(y6, x2, kConstants);
+ x3 = FoldPolynomialPair(y7, x3, kConstants);
+ x4 = FoldPolynomialPair(y8, x4, kConstants);
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4);
+ length -= Vector128.Count * 4;
+ } while (length >= Vector128.Count * 4);
+
+ kConstants = Vector128.Create(_k3, _k4);
+ x1 = FoldPolynomialPair(x2, x1, kConstants);
+ x1 = FoldPolynomialPair(x3, x1, kConstants);
+ x1 = FoldPolynomialPair(x4, x1, kConstants);
+ }
+ else
+ {
+ Debug.Assert(length >= 16);
+
+ x1 = Vector128.LoadUnsafe(ref srcRef).AsUInt64();
+ x1 ^= Vector128.CreateScalar(crc).AsUInt64();
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count);
+ length -= Vector128.Count;
+ }
+
+ kConstants = Vector128.Create(_k3, _k4);
+
+ while (length >= Vector128.Count)
+ {
+ x1 = FoldPolynomialPair(Vector128.LoadUnsafe(ref srcRef).AsUInt64(), x1, kConstants);
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count);
+ length -= Vector128.Count;
+ }
+
+ // Fold 128 bits to 64 bits.
+ Vector128 bitmask = Vector128.Create(~0, 0, ~0, 0).AsUInt64();
+ x1 = ShiftRightBytesInVector(x1, 8) ^
+ CarrylessMultiplyLower(x1, Vector128.CreateScalar(_k4));
+ x1 = CarrylessMultiplyLower(x1 & bitmask, Vector128.CreateScalar(_k5)) ^
+ ShiftRightBytesInVector(x1, 4);
+
+ // Reduce to 32 bits via Barrett reduction.
+ kConstants = Vector128.Create(_pStar, _mu);
+ x2 = CarrylessMultiplyLeftLowerRightUpper(x1 & bitmask, kConstants) & bitmask;
+ x2 = CarrylessMultiplyLower(x2, kConstants);
+ x1 ^= x2;
+
+ uint result = x1.AsUInt32().GetElement(1);
+ return length > 0
+ ? UpdateScalar(result, MemoryMarshal.CreateReadOnlySpan(ref srcRef, length))
+ : result;
+ }
+ }
+
+ private partial class ForwardTableBasedCrc32
+ {
+ // Precomputed constants for PCLMULQDQ-based folding.
+ private bool _canVectorize;
+ private ulong _k1, _k2; // 4-way fold constants
+ private ulong _k3, _k4; // 1-way fold constants
+ private ulong _k5; // 128-to-64 fold constant
+ private ulong _poly, _mu; // Barrett reduction constants
+
+ partial void InitializeVectorized()
+ {
+ if (!VectorHelper.IsSupported)
+ return;
+
+ ulong polynomial = Polynomial;
+ CrcPolynomialHelper.UInt640 fullPoly = new((1UL << 32) | polynomial);
+
+ // Forward folding constants: x^power mod fullPoly
+ _k1 = CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 4 * 128);
+ _k2 = CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 4 * 128 + 64);
+ _k3 = CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 128);
+ _k4 = CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 128 + 64);
+ _k5 = CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 128);
+
+ // Barrett reduction constants
+ _poly = polynomial;
+ _mu = CrcPolynomialHelper.ComputeBarrettConstant(fullPoly, 2 * 32) & 0xFFFFFFFF;
+
+ _canVectorize = true;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector128 LoadFromSourceByteSwapped(ref byte source, nuint elementOffset)
+ {
+ Vector128 vector = Vector128.LoadUnsafe(ref source, elementOffset);
+
+ if (BitConverter.IsLittleEndian)
+ {
+ vector = Vector128.Shuffle(vector,
+ Vector128.Create((byte)0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08,
+ 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00));
+ }
+
+ return vector.AsUInt64();
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private uint UpdateVectorized(uint crc, ReadOnlySpan source)
+ {
+ Debug.Assert(_canVectorize);
+ Debug.Assert(source.Length >= Vector128.Count);
+
+ ref byte srcRef = ref MemoryMarshal.GetReference(source);
+ int length = source.Length;
+
+ Vector128 x7;
+ Vector128 kConstants;
+
+ if (length >= Vector128.Count * 8)
+ {
+ Vector128 x0 = LoadFromSourceByteSwapped(ref srcRef, 0);
+ Vector128 x1 = LoadFromSourceByteSwapped(ref srcRef, 16);
+ Vector128 x2 = LoadFromSourceByteSwapped(ref srcRef, 32);
+ x7 = LoadFromSourceByteSwapped(ref srcRef, 48);
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4);
+ length -= Vector128.Count * 4;
+
+ x0 ^= ShiftLowerToUpper(Vector128.CreateScalar((ulong)crc));
+
+ kConstants = Vector128.Create(_k1, _k2);
+
+ do
+ {
+ Vector128 y1 = LoadFromSourceByteSwapped(ref srcRef, 0);
+ Vector128 y2 = LoadFromSourceByteSwapped(ref srcRef, 16);
+ Vector128 y3 = LoadFromSourceByteSwapped(ref srcRef, 32);
+ Vector128 y4 = LoadFromSourceByteSwapped(ref srcRef, 48);
+
+ x0 = FoldPolynomialPair(y1, x0, kConstants);
+ x1 = FoldPolynomialPair(y2, x1, kConstants);
+ x2 = FoldPolynomialPair(y3, x2, kConstants);
+ x7 = FoldPolynomialPair(y4, x7, kConstants);
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4);
+ length -= Vector128.Count * 4;
+ } while (length >= Vector128.Count * 4);
+
+ kConstants = Vector128.Create(_k3, _k4);
+ x7 = FoldPolynomialPair(x7, x0, kConstants);
+ x7 = FoldPolynomialPair(x7, x1, kConstants);
+ x7 = FoldPolynomialPair(x7, x2, kConstants);
+ }
+ else
+ {
+ Debug.Assert(length >= 16);
+
+ x7 = LoadFromSourceByteSwapped(ref srcRef, 0);
+ x7 ^= ShiftLowerToUpper(Vector128.CreateScalar((ulong)crc));
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count);
+ length -= Vector128.Count;
+ }
+
+ kConstants = Vector128.Create(_k3, _k4);
+
+ while (length >= Vector128.Count)
+ {
+ x7 = FoldPolynomialPair(LoadFromSourceByteSwapped(ref srcRef, 0), x7, kConstants);
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count);
+ length -= Vector128.Count;
+ }
+
+ // Compute CRC of a 128-bit value and fold to the upper 64-bits.
+ x7 = CarrylessMultiplyLeftUpperRightLower(x7, Vector128.CreateScalar(_k5)) ^
+ ShiftLowerToUpper(x7);
+
+ // Barrett reduction.
+ kConstants = Vector128.Create(_mu, _poly);
+ Vector128 temp = x7;
+ x7 = CarrylessMultiplyLeftUpperRightLower(x7, kConstants) ^ (x7 & Vector128.Create(0UL, ~0UL));
+ x7 = CarrylessMultiplyUpper(x7, kConstants);
+ x7 ^= temp;
+
+ uint result = (uint)x7.GetElement(0);
+ return length > 0
+ ? UpdateScalar(result, MemoryMarshal.CreateReadOnlySpan(ref srcRef, length))
+ : result;
+ }
+ }
+ }
+}
+
+#endif
diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.Table.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.Table.cs
index 820fc8093fbdc9..d3025ae85808f6 100644
--- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.Table.cs
+++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.Table.cs
@@ -2,6 +2,9 @@
// The .NET Foundation licenses this file to you under the MIT license.
using System.Diagnostics;
+#if NET
+using System.Runtime.Intrinsics;
+#endif
namespace System.IO.Hashing
{
@@ -59,18 +62,33 @@ private static ulong[] GenerateLookupTable(ulong polynomial, bool reflectInput)
return table;
}
- private sealed class ReflectedTableBasedCrc64 : Crc64ParameterSet
+ private sealed partial class ReflectedTableBasedCrc64 : Crc64ParameterSet
{
private readonly ulong[] _lookupTable;
+ partial void InitializeVectorized();
+
internal ReflectedTableBasedCrc64(ulong polynomial, ulong initialValue, ulong finalXorValue)
: base(polynomial, initialValue, finalXorValue, reflectValues: true)
{
_lookupTable = GenerateLookupTable(polynomial, reflectInput: true);
+ InitializeVectorized();
}
internal override ulong Update(ulong value, ReadOnlySpan data)
{
+#if NET
+ if (_canVectorize && data.Length >= Vector128.Count)
+ {
+ return UpdateVectorized(value, data);
+ }
+#endif
+
+ return UpdateScalar(value, data);
+ }
+
+ private ulong UpdateScalar(ulong value, ReadOnlySpan data)
+ {
ulong[] lookupTable = _lookupTable;
ulong crc = value;
@@ -86,18 +104,33 @@ internal override ulong Update(ulong value, ReadOnlySpan data)
}
}
- private sealed class ForwardTableBasedCrc64 : Crc64ParameterSet
+ private sealed partial class ForwardTableBasedCrc64 : Crc64ParameterSet
{
private readonly ulong[] _lookupTable;
+ partial void InitializeVectorized();
+
internal ForwardTableBasedCrc64(ulong polynomial, ulong initialValue, ulong finalXorValue)
: base(polynomial, initialValue, finalXorValue, reflectValues: false)
{
_lookupTable = GenerateLookupTable(polynomial, reflectInput: false);
+ InitializeVectorized();
}
internal override ulong Update(ulong value, ReadOnlySpan data)
{
+#if NET
+ if (_canVectorize && data.Length >= Vector128.Count)
+ {
+ return UpdateVectorized(value, data);
+ }
+#endif
+
+ return UpdateScalar(value, data);
+ }
+
+ private ulong UpdateScalar(ulong value, ReadOnlySpan data)
+ {
ulong[] lookupTable = _lookupTable;
ulong crc = value;
diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.Vectorized.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.Vectorized.cs
new file mode 100644
index 00000000000000..5b34cd59cb06e7
--- /dev/null
+++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.Vectorized.cs
@@ -0,0 +1,293 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#if NET
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using static System.IO.Hashing.VectorHelper;
+
+namespace System.IO.Hashing
+{
+ public partial class Crc64ParameterSet
+ {
+ private partial class ReflectedTableBasedCrc64
+ {
+ // Precomputed constants for PCLMULQDQ-based folding.
+ private bool _canVectorize;
+ private ulong _k1, _k2; // 4-way fold constants
+ private ulong _k3, _k4; // 1-way fold constants
+ private ulong _k5; // 128-to-64 fold constant
+ private ulong _pStar, _mu; // Barrett reduction constants
+
+ partial void InitializeVectorized()
+ {
+ if (!BitConverter.IsLittleEndian || !VectorHelper.IsSupported)
+ return;
+
+ ulong polynomial = Polynomial;
+ int polyDeg = 64;
+
+ // Build 65-bit full polynomial: x^64 + polynomial
+ CrcPolynomialHelper.UInt640 fullPoly = new(polynomial);
+ {
+ CrcPolynomialHelper.UInt640 highBit = new(1);
+ highBit.ShiftLeftEquals(polyDeg);
+ fullPoly.XorEquals(ref highBit);
+ }
+
+ // Reflected folding constants: reverse_bits(x^power mod fullPoly, polyDeg+1)
+ _k1 = CrcPolynomialHelper.ReverseBits(
+ CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 4 * 128 + polyDeg), polyDeg + 1);
+ _k2 = CrcPolynomialHelper.ReverseBits(
+ CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 4 * 128 - polyDeg), polyDeg + 1);
+ _k3 = CrcPolynomialHelper.ReverseBits(
+ CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 128 + polyDeg), polyDeg + 1);
+ _k4 = CrcPolynomialHelper.ReverseBits(
+ CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 128 - polyDeg), polyDeg + 1);
+ _k5 = CrcPolynomialHelper.ReverseBits(
+ CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 2 * polyDeg), polyDeg + 1);
+
+ // Barrett reduction constants
+ // P* = reverse_bits(fullPoly, polyDeg+1)
+ // For 64-bit CRC: fullPoly = x^64 + polynomial (65 bits).
+ // reverse_bits(polynomial, 65) where bit 64 maps to bit 0:
+ // lower 64 bits of the reversed value, plus the leading 1 maps to bit 0.
+ _pStar = CrcPolynomialHelper.ReverseBits(polynomial, polyDeg) | 1;
+ _mu = CrcPolynomialHelper.ReverseBits(
+ CrcPolynomialHelper.ComputeBarrettConstant(fullPoly, 2 * polyDeg), polyDeg + 1);
+
+ _canVectorize = true;
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private ulong UpdateVectorized(ulong crc, ReadOnlySpan data)
+ {
+ Debug.Assert(_canVectorize);
+ Debug.Assert(data.Length >= Vector128.Count);
+
+ ref byte srcRef = ref MemoryMarshal.GetReference(data);
+ int length = data.Length;
+
+ Vector128 kConstants;
+ Vector128 x1;
+ Vector128 x2;
+
+ if (length >= Vector128.Count * 8)
+ {
+ x1 = Vector128.LoadUnsafe(ref srcRef).AsUInt64();
+ x2 = Vector128.LoadUnsafe(ref srcRef, 16).AsUInt64();
+ Vector128 x3 = Vector128.LoadUnsafe(ref srcRef, 32).AsUInt64();
+ Vector128 x4 = Vector128.LoadUnsafe(ref srcRef, 48).AsUInt64();
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4);
+ length -= Vector128.Count * 4;
+
+ x1 ^= Vector128.CreateScalar(crc);
+
+ kConstants = Vector128.Create(_k1, _k2);
+
+ do
+ {
+ Vector128 y5 = Vector128.LoadUnsafe(ref srcRef).AsUInt64();
+ Vector128 y6 = Vector128.LoadUnsafe(ref srcRef, 16).AsUInt64();
+ Vector128 y7 = Vector128.LoadUnsafe(ref srcRef, 32).AsUInt64();
+ Vector128 y8 = Vector128.LoadUnsafe(ref srcRef, 48).AsUInt64();
+
+ x1 = FoldPolynomialPair(y5, x1, kConstants);
+ x2 = FoldPolynomialPair(y6, x2, kConstants);
+ x3 = FoldPolynomialPair(y7, x3, kConstants);
+ x4 = FoldPolynomialPair(y8, x4, kConstants);
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4);
+ length -= Vector128.Count * 4;
+ } while (length >= Vector128.Count * 4);
+
+ kConstants = Vector128.Create(_k3, _k4);
+ x1 = FoldPolynomialPair(x2, x1, kConstants);
+ x1 = FoldPolynomialPair(x3, x1, kConstants);
+ x1 = FoldPolynomialPair(x4, x1, kConstants);
+ }
+ else
+ {
+ Debug.Assert(length >= 16);
+
+ x1 = Vector128.LoadUnsafe(ref srcRef).AsUInt64();
+ x1 ^= Vector128.CreateScalar(crc);
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count);
+ length -= Vector128.Count;
+ }
+
+ kConstants = Vector128.Create(_k3, _k4);
+
+ while (length >= Vector128.Count)
+ {
+ x1 = FoldPolynomialPair(Vector128.LoadUnsafe(ref srcRef).AsUInt64(), x1, kConstants);
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count);
+ length -= Vector128.Count;
+ }
+
+ // Fold 128 bits to 64 bits.
+ x1 = ShiftRightBytesInVector(x1, 8) ^
+ CarrylessMultiplyLower(x1, Vector128.CreateScalar(_k4));
+
+ // Barrett reduction.
+ kConstants = Vector128.Create(_pStar, _mu);
+ x2 = CarrylessMultiplyLower(x1, kConstants);
+ x2 = CarrylessMultiplyLeftLowerRightUpper(x2, kConstants);
+ x1 ^= x2;
+
+ ulong result = x1.GetElement(0);
+ return length > 0
+ ? UpdateScalar(result, MemoryMarshal.CreateReadOnlySpan(ref srcRef, length))
+ : result;
+ }
+ }
+
+ private partial class ForwardTableBasedCrc64
+ {
+ // Precomputed constants for PCLMULQDQ-based folding.
+ private bool _canVectorize;
+ private ulong _k1, _k2; // 4-way fold constants
+ private ulong _k3, _k4; // 1-way fold constants
+ private ulong _k5; // 128-to-64 fold constant
+ private ulong _poly, _mu; // Barrett reduction constants
+
+ partial void InitializeVectorized()
+ {
+ if (!VectorHelper.IsSupported)
+ return;
+
+ ulong polynomial = Polynomial;
+ int polyDeg = 64;
+
+ // Build 65-bit full polynomial: x^64 + polynomial
+ CrcPolynomialHelper.UInt640 fullPoly = new(polynomial);
+ {
+ CrcPolynomialHelper.UInt640 highBit = new(1);
+ highBit.ShiftLeftEquals(polyDeg);
+ fullPoly.XorEquals(ref highBit);
+ }
+
+ // Forward folding constants: x^power mod fullPoly
+ _k1 = CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 4 * 128);
+ _k2 = CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 4 * 128 + 64);
+ _k3 = CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 128);
+ _k4 = CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 128 + 64);
+ _k5 = CrcPolynomialHelper.ComputeFoldingConstant(fullPoly, 128);
+
+ // Barrett reduction constants
+ _poly = polynomial;
+ _mu = CrcPolynomialHelper.ComputeBarrettConstant(fullPoly, 2 * polyDeg) & ((1UL << polyDeg) - 1);
+
+ _canVectorize = true;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector128 LoadFromSourceByteSwapped(ref byte source, nuint elementOffset)
+ {
+ Vector128 vector = Vector128.LoadUnsafe(ref source, elementOffset);
+
+ if (BitConverter.IsLittleEndian)
+ {
+ vector = Vector128.Shuffle(vector,
+ Vector128.Create((byte)0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08,
+ 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00));
+ }
+
+ return vector.AsUInt64();
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private ulong UpdateVectorized(ulong crc, ReadOnlySpan data)
+ {
+ Debug.Assert(_canVectorize);
+ Debug.Assert(data.Length >= Vector128.Count);
+
+ ref byte srcRef = ref MemoryMarshal.GetReference(data);
+ int length = data.Length;
+
+ Vector128 x7;
+ Vector128 kConstants;
+
+ if (length >= Vector128.Count * 8)
+ {
+ Vector128 x0 = LoadFromSourceByteSwapped(ref srcRef, 0);
+ Vector128 x1 = LoadFromSourceByteSwapped(ref srcRef, 16);
+ Vector128 x2 = LoadFromSourceByteSwapped(ref srcRef, 32);
+ x7 = LoadFromSourceByteSwapped(ref srcRef, 48);
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4);
+ length -= Vector128.Count * 4;
+
+ x0 ^= ShiftLowerToUpper(Vector128.CreateScalar(crc));
+
+ kConstants = Vector128.Create(_k1, _k2);
+
+ do
+ {
+ Vector128 y1 = LoadFromSourceByteSwapped(ref srcRef, 0);
+ Vector128 y2 = LoadFromSourceByteSwapped(ref srcRef, 16);
+ Vector128 y3 = LoadFromSourceByteSwapped(ref srcRef, 32);
+ Vector128 y4 = LoadFromSourceByteSwapped(ref srcRef, 48);
+
+ x0 = FoldPolynomialPair(y1, x0, kConstants);
+ x1 = FoldPolynomialPair(y2, x1, kConstants);
+ x2 = FoldPolynomialPair(y3, x2, kConstants);
+ x7 = FoldPolynomialPair(y4, x7, kConstants);
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4);
+ length -= Vector128.Count * 4;
+ } while (length >= Vector128.Count * 4);
+
+ kConstants = Vector128.Create(_k3, _k4);
+ x7 = FoldPolynomialPair(x7, x0, kConstants);
+ x7 = FoldPolynomialPair(x7, x1, kConstants);
+ x7 = FoldPolynomialPair(x7, x2, kConstants);
+ }
+ else
+ {
+ Debug.Assert(length >= 16);
+
+ x7 = LoadFromSourceByteSwapped(ref srcRef, 0);
+ x7 ^= ShiftLowerToUpper(Vector128.CreateScalar(crc));
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count);
+ length -= Vector128.Count;
+ }
+
+ kConstants = Vector128.Create(_k3, _k4);
+
+ while (length >= Vector128.Count)
+ {
+ x7 = FoldPolynomialPair(LoadFromSourceByteSwapped(ref srcRef, 0), x7, kConstants);
+
+ srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count);
+ length -= Vector128.Count;
+ }
+
+ // Compute CRC of a 128-bit value and fold to the upper 64-bits.
+ x7 = CarrylessMultiplyLeftUpperRightLower(x7, Vector128.CreateScalar(_k5)) ^
+ ShiftLowerToUpper(x7);
+
+ // Barrett reduction.
+ kConstants = Vector128.Create(_mu, _poly);
+ Vector128 temp = x7;
+ x7 = CarrylessMultiplyLeftUpperRightLower(x7, kConstants) ^ (x7 & Vector128.Create(0UL, ~0UL));
+ x7 = CarrylessMultiplyUpper(x7, kConstants);
+ x7 ^= temp;
+
+ ulong result = x7.GetElement(0);
+ return length > 0
+ ? UpdateScalar(result, MemoryMarshal.CreateReadOnlySpan(ref srcRef, length))
+ : result;
+ }
+ }
+ }
+}
+
+#endif
diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/CrcPolynomialHelper.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/CrcPolynomialHelper.cs
new file mode 100644
index 00000000000000..a88b901d75ce72
--- /dev/null
+++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/CrcPolynomialHelper.cs
@@ -0,0 +1,173 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#if NET
+
+using System.Buffers.Binary;
+using System.Runtime.CompilerServices;
+
+namespace System.IO.Hashing
+{
+ ///
+ /// Provides GF(2) polynomial arithmetic for computing CRC folding constants.
+ ///
+ internal static class CrcPolynomialHelper
+ {
+ ///
+ /// Computes x^ mod in GF(2).
+ ///
+ /// The polynomial (with leading bit) to reduce by.
+ /// The power of x.
+ /// The remainder, which has fewer bits than .
+ internal static ulong ComputeFoldingConstant(UInt640 poly, int power)
+ {
+ int polyDeg = poly.Degree;
+
+ UInt640 value = new(1);
+ value.ShiftLeftEquals(power);
+
+ while (value.Degree >= polyDeg)
+ {
+ int shift = value.Degree - polyDeg;
+ UInt640 polyShifted = poly;
+ polyShifted.ShiftLeftEquals(shift);
+ value.XorEquals(ref polyShifted);
+ }
+
+ return value.ToUInt64();
+ }
+
+ ///
+ /// Computes floor(x^ / ) in GF(2).
+ ///
+ /// The polynomial (with leading bit) to divide by.
+ /// The power of x.
+ /// The quotient.
+ internal static ulong ComputeBarrettConstant(UInt640 poly, int power)
+ {
+ int polyDeg = poly.Degree;
+
+ UInt640 value = new(1);
+ value.ShiftLeftEquals(power);
+
+ UInt640 quotient = default;
+
+ while (value.Degree >= polyDeg)
+ {
+ int shift = value.Degree - polyDeg;
+ UInt640 polyShifted = poly;
+ polyShifted.ShiftLeftEquals(shift);
+ value.XorEquals(ref polyShifted);
+
+ UInt640 bit = new(1);
+ bit.ShiftLeftEquals(shift);
+ quotient.XorEquals(ref bit);
+ }
+
+ return quotient.ToUInt64();
+ }
+
+ ///
+ /// Reverses the lowest bits of .
+ ///
+ internal static ulong ReverseBits(ulong value, int width)
+ {
+ ulong result = 0;
+
+ for (int i = 0; i < width; i++)
+ {
+ if ((value & (1UL << i)) != 0)
+ {
+ result |= 1UL << (width - 1 - i);
+ }
+ }
+
+ return result;
+ }
+
+ ///
+ /// A 640-bit unsigned integer for GF(2) polynomial arithmetic.
+ ///
+ [InlineArray(Length)]
+ internal struct UInt640
+ {
+ private const int Length = 10;
+ private ulong _element;
+
+ internal UInt640(ulong value)
+ {
+ this = default;
+ this[0] = value;
+ }
+
+ internal readonly int Degree
+ {
+ get
+ {
+ for (int i = Length - 1; i >= 0; i--)
+ {
+ if (this[i] != 0)
+ {
+ return (i * 64) + (63 - BitOperations.LeadingZeroCount(this[i]));
+ }
+ }
+
+ return -1;
+ }
+ }
+
+ internal void ShiftLeftEquals(int count)
+ {
+ int wordShift = count >> 6; // count / 64
+ int bitShift = count & 63; // count % 64
+
+ if (wordShift > 0)
+ {
+ for (int i = Length - 1; i >= wordShift; i--)
+ {
+ this[i] = this[i - wordShift];
+ }
+
+ for (int i = wordShift - 1; i >= 0; i--)
+ {
+ this[i] = 0;
+ }
+ }
+
+ if (bitShift > 0)
+ {
+ for (int i = Length - 1; i > 0; i--)
+ {
+ this[i] = (this[i] << bitShift) | (this[i - 1] >> (64 - bitShift));
+ }
+
+ this[0] <<= bitShift;
+ }
+ }
+
+ internal void XorEquals(ref UInt640 other)
+ {
+ for (int i = 0; i < Length; i++)
+ {
+ this[i] ^= other[i];
+ }
+ }
+
+ internal readonly ulong ToUInt64() => this[0];
+ }
+
+ ///
+ /// Polyfill for .
+ ///
+ private static class BitOperations
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal static int LeadingZeroCount(ulong value)
+ {
+ return System.Numerics.BitOperations.LeadingZeroCount(value);
+ }
+ }
+ }
+}
+
+#endif