-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Closed
Labels
area-System.MemoryenhancementProduct code improvement that does NOT require public API changes/additionsProduct code improvement that does NOT require public API changes/additionstenet-performancePerformance related issuePerformance related issue
Milestone
Description
SequenceEqual performs much better than a naive linear search for any buffer with length >= 8.
However, the linear search does better for buffers of length 0-4 (and is on par for 5-7).
It would be great if it was always better, even for smaller buffers. Is that possible to achieve (of course, without regressing perf for larger buffers)?
Specifically Span<byte>.SequenceEqual.
runtime/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs
Lines 421 to 437 in 83e30cc
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | |
| public static bool SequenceEqual<T>(this Span<T> span, ReadOnlySpan<T> other) where T : IEquatable<T> | |
| { | |
| int length = span.Length; | |
| if (RuntimeHelpers.IsBitwiseEquatable<T>()) | |
| { | |
| nuint size = (nuint)Unsafe.SizeOf<T>(); | |
| return length == other.Length && | |
| SpanHelpers.SequenceEqual( | |
| ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(span)), | |
| ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(other)), | |
| ((nuint)length) * size); // If this multiplication overflows, the Span we got overflows the entire address range. There's no happy outcome for this api in such a case so we choose not to take the overhead of checking. | |
| } | |
| return length == other.Length && SpanHelpers.SequenceEqual(ref MemoryMarshal.GetReference(span), ref MemoryMarshal.GetReference(other), length); | |
| } |
SequenceEqualThreshold benchmark
public class SequenceEqualThreshold
{
private byte[] _input;
private byte[] _expected;
[Params(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)]
public int Length;
[GlobalSetup]
public void Setup()
{
var builder = new StringBuilder();
for (int i = 0; i < Length; i++)
{
builder.Append("a");
}
string input = builder.ToString();
_input = Encoding.UTF8.GetBytes(input);
_expected = Encoding.UTF8.GetBytes(input);
}
[Benchmark(Baseline = true)]
public bool UseLinearSearch()
{
if (_input.Length != _expected.Length)
{
return false;
}
for (int i = 0; i < _input.Length; i++)
{
if (_input[i] != _expected[i])
{
return false;
}
}
return true;
}
[Benchmark]
public bool UseSequenceEqual()
{
return _input.AsSpan().SequenceEqual(_expected);
}
}Benchmark results
BenchmarkDotNet=v0.12.0, OS=Windows 10.0.19041
Intel Core i7-6700 CPU 3.40GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
.NET Core SDK=5.0.100-alpha1-015914
[Host] : .NET Core 5.0.0 (CoreCLR 5.0.19.56303, CoreFX 5.0.19.56306), X64 RyuJIT
Job-YKNBMP : .NET Core 5.0.0 (CoreCLR 5.0.19.56303, CoreFX 5.0.19.56306), X64 RyuJIT
PowerPlanMode=00000000-0000-0000-0000-000000000000 MaxIterationCount=10 MinIterationCount=5
WarmupCount=3
| Method | Length | Mean | Error | StdDev | Median | Min | Max | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| UseLinearSearch | 1 | 1.351 ns | 0.1262 ns | 0.0751 ns | 1.324 ns | 1.264 ns | 1.511 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | 1 | 4.491 ns | 0.7360 ns | 0.4868 ns | 4.418 ns | 3.986 ns | 5.459 ns | 3.38 | 0.44 | - | - | - | - |
| UseLinearSearch | 2 | 2.031 ns | 0.0687 ns | 0.0178 ns | 2.029 ns | 2.014 ns | 2.055 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | 2 | 4.773 ns | 0.4279 ns | 0.2547 ns | 4.750 ns | 4.401 ns | 5.326 ns | 2.30 | 0.10 | - | - | - | - |
| UseLinearSearch | 3 | 3.799 ns | 0.2879 ns | 0.1904 ns | 3.792 ns | 3.506 ns | 4.057 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | 3 | 5.020 ns | 0.2169 ns | 0.1291 ns | 4.994 ns | 4.854 ns | 5.307 ns | 1.31 | 0.05 | - | - | - | - |
| UseLinearSearch | 4 | 4.940 ns | 0.1176 ns | 0.0700 ns | 4.921 ns | 4.838 ns | 5.038 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | 4 | 5.386 ns | 0.1460 ns | 0.0966 ns | 5.372 ns | 5.256 ns | 5.524 ns | 1.09 | 0.02 | - | - | - | - |
| UseLinearSearch | 5 | 5.632 ns | 0.1036 ns | 0.0269 ns | 5.630 ns | 5.605 ns | 5.674 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | 5 | 6.005 ns | 0.1360 ns | 0.0711 ns | 6.023 ns | 5.842 ns | 6.064 ns | 1.06 | 0.02 | - | - | - | - |
| UseLinearSearch | 6 | 7.025 ns | 0.1526 ns | 0.0908 ns | 6.993 ns | 6.919 ns | 7.188 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | 6 | 6.756 ns | 0.5756 ns | 0.3010 ns | 6.680 ns | 6.366 ns | 7.340 ns | 0.96 | 0.05 | - | - | - | - |
| UseLinearSearch | 7 | 7.992 ns | 0.6298 ns | 0.4166 ns | 7.901 ns | 7.456 ns | 8.866 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | 7 | 6.867 ns | 0.4282 ns | 0.2833 ns | 6.834 ns | 6.516 ns | 7.451 ns | 0.86 | 0.07 | - | - | - | - |
| UseLinearSearch | 8 | 8.392 ns | 0.1844 ns | 0.0819 ns | 8.391 ns | 8.269 ns | 8.522 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | 8 | 3.467 ns | 0.1260 ns | 0.0750 ns | 3.450 ns | 3.353 ns | 3.619 ns | 0.42 | 0.01 | - | - | - | - |
| UseLinearSearch | 9 | 8.667 ns | 0.6494 ns | 0.1686 ns | 8.599 ns | 8.554 ns | 8.965 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | 9 | 4.055 ns | 0.1404 ns | 0.0734 ns | 4.057 ns | 3.933 ns | 4.144 ns | 0.46 | 0.01 | - | - | - | - |
| UseLinearSearch | 10 | 11.770 ns | 1.8212 ns | 1.2046 ns | 11.852 ns | 9.831 ns | 13.787 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | 10 | 4.605 ns | 0.8492 ns | 0.5054 ns | 4.519 ns | 3.977 ns | 5.570 ns | 0.40 | 0.08 | - | - | - | - |
| UseLinearSearch | 11 | 12.292 ns | 1.7350 ns | 1.1476 ns | 12.342 ns | 9.277 ns | 13.356 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | 11 | 4.291 ns | 0.6357 ns | 0.3325 ns | 4.323 ns | 3.743 ns | 4.661 ns | 0.34 | 0.03 | - | - | - | - |
| UseLinearSearch | 12 | 10.635 ns | 0.7017 ns | 0.4641 ns | 10.520 ns | 10.048 ns | 11.304 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | 12 | 4.086 ns | 0.5284 ns | 0.2764 ns | 4.190 ns | 3.721 ns | 4.360 ns | 0.38 | 0.02 | - | - | - | - |
This shows up in implementations within the JSON context like comparing known ASCII property names (which tend to be small).
Processing JSON metadata properties benchmark
[BenchmarkCategory(Categories.CoreFX, Categories.JSON)]
public class SequenceEqualMetadataSampleTest
{
private byte[] _name;
[Params("", "$", "$id", "$ref", "$values",
"a", "foo", "food", "foods", "1234567",
"$if", "$res", "$valueZ",
"large string with a bunch of things in it",
"$large string with a bunch of things in it")]
public string Name;
[GlobalSetup]
public void Setup()
{
_name = Encoding.UTF8.GetBytes(Name);
}
[Benchmark(Baseline = true)]
public void UseLinearSearch()
{
ReturnLinear(_name);
}
[Benchmark]
public void UseSequenceEqual()
{
ReturnSequenceEqual(_name);
}
private static MetadataPropertyName ReturnLinear(ReadOnlySpan<byte> propertyName)
{
if (propertyName.Length > 0 && propertyName[0] == '$')
{
switch (propertyName.Length)
{
case 3:
if (propertyName[1] == 'i' &&
propertyName[2] == 'd')
{
return MetadataPropertyName.Id;
}
break;
case 4:
if (propertyName[1] == 'r' &&
propertyName[2] == 'e' &&
propertyName[3] == 'f')
{
return MetadataPropertyName.Ref;
}
break;
case 7:
if (propertyName[1] == 'v' &&
propertyName[2] == 'a' &&
propertyName[3] == 'l' &&
propertyName[4] == 'u' &&
propertyName[5] == 'e' &&
propertyName[6] == 's')
{
return MetadataPropertyName.Values;
}
break;
}
}
return MetadataPropertyName.NoMetadata;
}
private static readonly byte[] _id = Encoding.UTF8.GetBytes("$id");
private static readonly byte[] _ref = Encoding.UTF8.GetBytes("$ref");
private static readonly byte[] _values = Encoding.UTF8.GetBytes("$values");
private static MetadataPropertyName ReturnSequenceEqual(ReadOnlySpan<byte> propertyName)
{
switch (propertyName.Length)
{
case 3:
if (propertyName.SequenceEqual(_id))
{
return MetadataPropertyName.Id;
}
break;
case 4:
if (propertyName.SequenceEqual(_ref))
{
return MetadataPropertyName.Ref;
}
break;
case 7:
if (propertyName.SequenceEqual(_values))
{
return MetadataPropertyName.Values;
}
break;
}
return MetadataPropertyName.NoMetadata;
}
private enum MetadataPropertyName
{
NoMetadata,
Values,
Id,
Ref,
}
}Benchmark results
BenchmarkDotNet=v0.12.0, OS=Windows 10.0.19041
Intel Core i7-6700 CPU 3.40GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
.NET Core SDK=5.0.100-alpha1-015914
[Host] : .NET Core 5.0.0 (CoreCLR 5.0.19.56303, CoreFX 5.0.19.56306), X64 RyuJIT
Job-WQXUIX : .NET Core 5.0.0 (CoreCLR 5.0.19.56303, CoreFX 5.0.19.56306), X64 RyuJIT
PowerPlanMode=00000000-0000-0000-0000-000000000000 MaxIterationCount=10 MinIterationCount=5
WarmupCount=3
| Method | Name | Mean | Error | StdDev | Median | Min | Max | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| UseLinearSearch | **** | 2.740 ns | 0.2049 ns | 0.1356 ns | 2.721 ns | 2.573 ns | 2.991 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | 2.019 ns | 0.1023 ns | 0.0677 ns | 2.015 ns | 1.912 ns | 2.159 ns | 0.74 | 0.05 | - | - | - | - | |
| UseLinearSearch | $ | 2.896 ns | 0.1635 ns | 0.1082 ns | 2.889 ns | 2.745 ns | 3.093 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | $ | 1.969 ns | 0.0692 ns | 0.0247 ns | 1.961 ns | 1.943 ns | 2.008 ns | 0.68 | 0.03 | - | - | - | - |
| UseLinearSearch | $id | 3.809 ns | 0.6716 ns | 0.4443 ns | 3.793 ns | 3.237 ns | 4.692 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | $id | 6.681 ns | 0.4008 ns | 0.2651 ns | 6.675 ns | 6.310 ns | 7.112 ns | 1.77 | 0.21 | - | - | - | - |
| UseLinearSearch | $if | 4.001 ns | 0.6197 ns | 0.4099 ns | 3.955 ns | 3.486 ns | 4.679 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | $if | 7.995 ns | 0.3194 ns | 0.2112 ns | 7.994 ns | 7.723 ns | 8.447 ns | 2.02 | 0.22 | - | - | - | - |
| UseLinearSearch | $large string(...) things in it [42] | 2.895 ns | 0.1503 ns | 0.0994 ns | 2.917 ns | 2.762 ns | 3.026 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | $large string(...) things in it [42] | 2.026 ns | 0.0628 ns | 0.0224 ns | 2.024 ns | 1.999 ns | 2.052 ns | 0.70 | 0.03 | - | - | - | - |
| UseLinearSearch | $ref | 3.412 ns | 0.0897 ns | 0.0320 ns | 3.418 ns | 3.359 ns | 3.445 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | $ref | 7.791 ns | 0.1721 ns | 0.1138 ns | 7.816 ns | 7.567 ns | 7.957 ns | 2.28 | 0.04 | - | - | - | - |
| UseLinearSearch | $res | 3.413 ns | 0.1726 ns | 0.1142 ns | 3.395 ns | 3.257 ns | 3.633 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | $res | 8.513 ns | 0.2731 ns | 0.1807 ns | 8.509 ns | 8.238 ns | 8.775 ns | 2.50 | 0.09 | - | - | - | - |
| UseLinearSearch | $valueZ | 4.000 ns | 0.1121 ns | 0.0741 ns | 3.988 ns | 3.901 ns | 4.117 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | $valueZ | 8.213 ns | 0.3252 ns | 0.2151 ns | 8.156 ns | 8.000 ns | 8.695 ns | 2.05 | 0.07 | - | - | - | - |
| UseLinearSearch | $values | 3.596 ns | 0.2866 ns | 0.1896 ns | 3.521 ns | 3.350 ns | 3.952 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | $values | 8.112 ns | 0.1673 ns | 0.0597 ns | 8.105 ns | 8.036 ns | 8.206 ns | 2.20 | 0.11 | - | - | - | - |
| UseLinearSearch | 1234567 | 2.678 ns | 0.4838 ns | 0.2879 ns | 2.652 ns | 2.407 ns | 3.319 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | 1234567 | 5.977 ns | 0.4423 ns | 0.1149 ns | 5.931 ns | 5.902 ns | 6.180 ns | 2.10 | 0.14 | - | - | - | - |
| UseLinearSearch | a | 2.484 ns | 0.2543 ns | 0.1682 ns | 2.423 ns | 2.312 ns | 2.797 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | a | 2.211 ns | 0.2269 ns | 0.1501 ns | 2.221 ns | 1.966 ns | 2.481 ns | 0.89 | 0.08 | - | - | - | - |
| UseLinearSearch | foo | 2.286 ns | 0.1816 ns | 0.1201 ns | 2.298 ns | 2.138 ns | 2.493 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | foo | 5.850 ns | 0.3409 ns | 0.0885 ns | 5.827 ns | 5.790 ns | 6.005 ns | 2.46 | 0.10 | - | - | - | - |
| UseLinearSearch | food | 2.244 ns | 0.0763 ns | 0.0339 ns | 2.239 ns | 2.208 ns | 2.290 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | food | 6.162 ns | 0.5158 ns | 0.3412 ns | 6.088 ns | 5.714 ns | 6.683 ns | 2.81 | 0.13 | - | - | - | - |
| UseLinearSearch | foods | 2.315 ns | 0.2423 ns | 0.1603 ns | 2.252 ns | 2.134 ns | 2.556 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | foods | 2.366 ns | 0.1825 ns | 0.1207 ns | 2.367 ns | 2.198 ns | 2.605 ns | 1.03 | 0.10 | - | - | - | - |
| UseLinearSearch | large string (...) things in it [41] | 2.206 ns | 0.1903 ns | 0.1259 ns | 2.144 ns | 2.079 ns | 2.454 ns | 1.00 | 0.00 | - | - | - | - |
| UseSequenceEqual | large string (...) things in it [41] | 2.033 ns | 0.2422 ns | 0.1602 ns | 1.958 ns | 1.880 ns | 2.335 ns | 0.92 | 0.09 | - | - | - | - |
@benaadams, @GrabYourPitchforks - got any suggestions?
FYI @jozkee, @steveharter
ycrumeyrolle
Metadata
Metadata
Assignees
Labels
area-System.MemoryenhancementProduct code improvement that does NOT require public API changes/additionsProduct code improvement that does NOT require public API changes/additionstenet-performancePerformance related issuePerformance related issue