From c12bee7306c32057c3d555df5ed40041a07806aa Mon Sep 17 00:00:00 2001 From: dadhi Date: Thu, 29 May 2025 16:31:28 +0200 Subject: [PATCH 01/32] some funny initial results of the bm of by ref access of Switch vs AsSpan --- src/FastExpressionCompiler/ILReader.cs | 10 ++ src/FastExpressionCompiler/ImTools.cs | 89 ++++++++++ .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 158 ++++++++++++------ .../Program.cs | 3 +- 4 files changed, 208 insertions(+), 52 deletions(-) diff --git a/src/FastExpressionCompiler/ILReader.cs b/src/FastExpressionCompiler/ILReader.cs index c3be6db5..4d3fc781 100644 --- a/src/FastExpressionCompiler/ILReader.cs +++ b/src/FastExpressionCompiler/ILReader.cs @@ -13,8 +13,10 @@ using System.Linq; #if LIGHT_EXPRESSION +using FastExpressionCompiler.LightExpression.ImTools; namespace FastExpressionCompiler.LightExpression.ILDecoder; #else +using FastExpressionCompiler.ImTools; namespace FastExpressionCompiler.ILDecoder; #endif @@ -334,6 +336,14 @@ internal ILInstruction(int offset, OpCode opCode) } } +// todo: @wip +/// Data-oriented structure SOA of IL instructions. +public struct ILs +{ + public SmallList> Offset; + public SmallList> OpCodes; +} + public sealed class InlineNoneInstruction : ILInstruction { public override OperandType OperandType => OperandType.InlineNone; diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 8096c185..d5588265 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -235,6 +235,22 @@ public static ref T GetSurePresentItemRef(this ref SmallListReturns a surely present item ref by its index + [MethodImpl((MethodImplOptions)256)] + public static ref T GetSurePresentItemRef2(this ref SmallList list, int index) + where TStack : struct, IStack + { + Debug.Assert(list.Count != 0); + Debug.Assert(index < list.Count); + + var stackCap = list.StackCapacity; + if (index < stackCap) + return ref list._stack.AsSpan()[index]; + + Debug.Assert(list._rest != null); + return ref list._rest[index - stackCap]; + } + /// Returns last present item ref, assumes that the list is not empty! [MethodImpl((MethodImplOptions)256)] public static ref TItem GetLastSurePresentItem(this ref SmallList4 source) => @@ -686,6 +702,79 @@ public Span AsSpan() => #endif } +// todo: @wip +/// Implementation of `IStack` for 4 items on stack +[StructLayout(LayoutKind.Sequential, Pack = 1)] +public struct Stack16 : IStack> +{ + /// Count of items on stack + public const int StackCapacity = 16; + + internal T _it0, _it1, _it2, _it3, _it4, _it5, _it6, _it7; + internal T _it8, _it9, _it10, _it11, _it12, _it13, _it14, _it15; + + /// + public int Capacity => StackCapacity; + + /// + [UnscopedRef] + [MethodImpl((MethodImplOptions)256)] + public ref T GetSurePresentRef(int index) + { + Debug.Assert(index < StackCapacity); + switch (index) + { + case 0: return ref _it0; + case 1: return ref _it1; + case 2: return ref _it2; + default: return ref _it3; + } + } + + /// + public T this[int index] + { + [MethodImpl((MethodImplOptions)256)] + get + { + Debug.Assert(index < StackCapacity); + return index switch + { + 0 => _it0, + 1 => _it1, + 2 => _it2, + _ => _it3, + }; + } + [MethodImpl((MethodImplOptions)256)] + set => Set(index, in value); + } + + /// Sets the value by the index + [MethodImpl((MethodImplOptions)256)] + public void Set(int index, in T value) + { + Debug.Assert(index < StackCapacity); + switch (index) + { + case 0: _it0 = value; break; + case 1: _it1 = value; break; + case 2: _it2 = value; break; + default: _it3 = value; break; + } + } + + /// + [UnscopedRef] + [MethodImpl((MethodImplOptions)256)] + public Span AsSpan() => +#if NETSTANDARD2_0_OR_GREATER || NET472 + StackTools>.LazyCompiledAsSpanDelegate.Value(ref this, StackCapacity); +#else + MemoryMarshal.CreateSpan(ref Unsafe.As, T>(ref this), StackCapacity); +#endif +} + // todo: @wip /// Generic version of SmallList abstracted for how much items are on stack public struct SmallList diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index 5599d223..8cb90bad 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -3,9 +3,10 @@ using System.Runtime.InteropServices; using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Diagnosers; +using FastExpressionCompiler.ImTools; + +namespace FastExpressionCompiler.Benchmarks; -namespace FastExpressionCompiler.Benchmarks -{ /* BenchmarkDotNet v0.13.7, Windows 11 (10.0.22621.1992/22H2/2022Update/SunValley2) 11th Gen Intel Core i7-1185G7 3.00GHz, 1 CPU, 8 logical and 4 physical cores @@ -22,68 +23,123 @@ .NET SDK 7.0.307 | ArrayResize | 4 | 24.71 ns | 2.497 ns | 7.165 ns | 22.67 ns | 1.59 | 0.39 | 0.0140 | 47 | 0 | 0 | 88 B | 1.00 | */ - [MemoryDiagnoser] - [HardwareCounters(HardwareCounter.CacheMisses, HardwareCounter.BranchMispredictions, HardwareCounter.BranchInstructions)] - public class ArrayCopy_vs_ArrayResize_vs_ForLoop +[MemoryDiagnoser] +[HardwareCounters(HardwareCounter.CacheMisses, HardwareCounter.BranchMispredictions, HardwareCounter.BranchInstructions)] +public class ArrayCopy_vs_ArrayResize_vs_ForLoop +{ + [Params(4)] + // [Params(4, 8)] + public int Count; + + public Type[] Items; + + [GlobalSetup] + public void Init() { - [Params(4)] - // [Params(4, 8)] - public int Count; + Items = new Type[Count]; + for (var i = 1; i < Count; i++) + Items[i] = GetType(); - public Type[] Items; + Items[0] = typeof(string); + } - [GlobalSetup] - public void Init() - { - Items = new Type[Count]; - for (var i = 1; i < Count; i++) - Items[i] = GetType(); + [Benchmark(Baseline = true)] + public Type[] ArrayCopy() + { + var source = Items; + var target = new Type[source.Length << 1]; + Array.Copy(source, 0, target, 0, source.Length); + return target; + } - Items[0] = typeof(string); - } + [Benchmark] + public Type[] ManualForLoop() + { + var source = Items; + var target = new Type[source.Length << 1]; + for (var i = 0; i < source.Length; i++) + target[i] = source[i]; + return target; + } - [Benchmark(Baseline = true)] - public Type[] ArrayCopy() + [Benchmark] + public Type[] MarshallingForLoop() + { + var count = Items.Length; + ref var source = ref MemoryMarshal.GetArrayDataReference(Items); + ref var sourceNoMore = ref Unsafe.Add(ref source, count); + var targetArr = new Type[count << 1]; + ref var target = ref MemoryMarshal.GetArrayDataReference(targetArr); + while (Unsafe.IsAddressLessThan(ref source, ref sourceNoMore)) { - var source = Items; - var target = new Type[source.Length << 1]; - Array.Copy(source, 0, target, 0, source.Length); - return target; + target = source; + target = ref Unsafe.Add(ref target, 1); + source = ref Unsafe.Add(ref source, 1); } + return targetArr; + } - [Benchmark] - public Type[] ManualForLoop() - { - var source = Items; - var target = new Type[source.Length << 1]; - for (var i = 0; i < source.Length; i++) - target[i] = source[i]; - return target; - } + [Benchmark] + public Type[] ArrayResize() + { + var target = Items; + Array.Resize(ref target, target.Length << 1); + return target; + } +} + +[MemoryDiagnoser, RankColumn, Orderer(BenchmarkDotNet.Order.SummaryOrderPolicy.FastestToSlowest)] +[HardwareCounters(HardwareCounter.CacheMisses, HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] +public class SmallList_Switch_vs_AsSpan_ByRef_Access +{ + /* + ## Baseline: hmm, why AsSpan is faster even if it is utilized only by half of the acces, the other part hits the heap? + + BenchmarkDotNet v0.15.0, Windows 11 (10.0.26100.4061/24H2/2024Update/HudsonValley) + Intel Core i9-8950HK CPU 2.90GHz (Coffee Lake), 1 CPU, 12 logical and 6 physical cores + .NET SDK 9.0.203 + [Host] : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2 + DefaultJob : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2 + + + | Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | BranchInstructions/Op | BranchMispredictions/Op | CacheMisses/Op | Allocated | Alloc Ratio | + |------------------------ |----------:|----------:|----------:|------:|--------:|-----:|----------------------:|------------------------:|---------------:|----------:|------------:| + | Double_and_Sum_AsSpan | 9.959 ns | 0.2341 ns | 0.4567 ns | 0.64 | 0.04 | 1 | 29 | 0 | 0 | - | NA | + | Double_and_Sum_BySwitch | 15.605 ns | 0.3465 ns | 0.7532 ns | 1.00 | 0.07 | 2 | 35 | 0 | 0 | - | NA | */ - [Benchmark] - public Type[] MarshallingForLoop() + SmallList> _smallList; + + [GlobalSetup] + public void Init() + { + // 4 on stack and 4 on heap + for (var i = 0; i < 8; i++) + _smallList.Add(i); + } + + [Benchmark(Baseline = true)] + public int Double_and_Sum_BySwitch() + { + var sum = 0; + for (var i = 0; i < _smallList.Count; i++) { - var count = Items.Length; - ref var source = ref MemoryMarshal.GetArrayDataReference(Items); - ref var sourceNoMore = ref Unsafe.Add(ref source, count); - var targetArr = new Type[count << 1]; - ref var target = ref MemoryMarshal.GetArrayDataReference(targetArr); - while (Unsafe.IsAddressLessThan(ref source, ref sourceNoMore)) - { - target = source; - target = ref Unsafe.Add(ref target, 1); - source = ref Unsafe.Add(ref source, 1); - } - return targetArr; + ref var n = ref _smallList.GetSurePresentItemRef(i); + n += n; + sum += n; } + return sum; + } - [Benchmark] - public Type[] ArrayResize() + [Benchmark] + public int Double_and_Sum_AsSpan() + { + var sum = 0; + for (var i = 0; i < _smallList.Count; i++) { - var target = Items; - Array.Resize(ref target, target.Length << 1); - return target; + ref var n = ref _smallList.GetSurePresentItemRef2(i); + n += n; + sum += n; } + return sum; } } diff --git a/test/FastExpressionCompiler.Benchmarks/Program.cs b/test/FastExpressionCompiler.Benchmarks/Program.cs index 5f268002..7b067ad2 100644 --- a/test/FastExpressionCompiler.Benchmarks/Program.cs +++ b/test/FastExpressionCompiler.Benchmarks/Program.cs @@ -21,7 +21,7 @@ public static void Main() // BenchmarkRunner.Run(); // not included in README.md, may be it needs to // BenchmarkRunner.Run(); - BenchmarkRunner.Run(); + // BenchmarkRunner.Run(); //-------------------------------------------- @@ -50,6 +50,7 @@ public static void Main() //BenchmarkRunner.Run(); //BenchmarkRunner.Run(); + BenchmarkRunner.Run(); // BenchmarkRunner.Run(); //var a = new NestedLambdasVsVars(); From 2bcfb4abb356d8569f4fa926f4c7cb5f4c490843 Mon Sep 17 00:00:00 2001 From: dadhi Date: Thu, 29 May 2025 16:39:26 +0200 Subject: [PATCH 02/32] improv AsSpanCompiled for net472 --- src/FastExpressionCompiler/ImTools.cs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index d5588265..84f4a7bc 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -538,12 +538,14 @@ internal static class StackTools internal delegate Span AsSpanDelegate(ref TStack stack, int capacity); - internal static AsSpanDelegate CompileAsSpanDelegate() + internal static readonly AsSpanDelegate AsSpanCompiled; + + static StackTools() { var dynamicMethod = new DynamicMethod( - "", + string.Empty, typeof(Span), - new[] { typeof(TStack).MakeByRefType(), typeof(int) }, // todo: @perf pool this thing + [typeof(TStack).MakeByRefType(), typeof(int)], // todo: @perf pool this thing typeof(TStack), true ); @@ -558,11 +560,8 @@ internal static AsSpanDelegate CompileAsSpanDelegate() il.Emit(OpCodes.Newobj, SpanConstructor); il.Emit(OpCodes.Ret); - return (AsSpanDelegate)dynamicMethod.CreateDelegate(typeof(AsSpanDelegate)); + AsSpanCompiled = (AsSpanDelegate)dynamicMethod.CreateDelegate(typeof(AsSpanDelegate)); } - - // todo: @perf do we even need a lazy here? - internal static readonly Lazy LazyCompiledAsSpanDelegate = new(CompileAsSpanDelegate); #endif } @@ -625,7 +624,7 @@ public void Set(int index, in T value) [MethodImpl((MethodImplOptions)256)] public Span AsSpan() => #if NETSTANDARD2_0_OR_GREATER || NET472 - StackTools>.LazyCompiledAsSpanDelegate.Value(ref this, StackCapacity); + StackTools>.AsSpanCompiled(ref this, StackCapacity); #else MemoryMarshal.CreateSpan(ref Unsafe.As, T>(ref this), StackCapacity); #endif @@ -696,7 +695,7 @@ public void Set(int index, in T value) [MethodImpl((MethodImplOptions)256)] public Span AsSpan() => #if NETSTANDARD2_0_OR_GREATER || NET472 - StackTools>.LazyCompiledAsSpanDelegate.Value(ref this, StackCapacity); + StackTools>.AsSpanCompiled(ref this, StackCapacity); #else MemoryMarshal.CreateSpan(ref Unsafe.As, T>(ref this), StackCapacity); #endif @@ -769,7 +768,7 @@ public void Set(int index, in T value) [MethodImpl((MethodImplOptions)256)] public Span AsSpan() => #if NETSTANDARD2_0_OR_GREATER || NET472 - StackTools>.LazyCompiledAsSpanDelegate.Value(ref this, StackCapacity); + StackTools>.AsSpanCompiled(ref this, StackCapacity); #else MemoryMarshal.CreateSpan(ref Unsafe.As, T>(ref this), StackCapacity); #endif From ef6241bd7560374409d9cbd74b4726522a474127 Mon Sep 17 00:00:00 2001 From: dadhi Date: Fri, 30 May 2025 10:27:48 +0200 Subject: [PATCH 03/32] testing Add because the bm throws the exception --- src/FastExpressionCompiler/ImTools.cs | 60 +++++++++++-------- .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 23 +++++++ .../Program.cs | 3 +- ..._repeated_calls_to_ConcurrentDictionary.cs | 21 ++++++- .../Program.cs | 2 +- 5 files changed, 82 insertions(+), 27 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 84f4a7bc..0cc46545 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -211,10 +211,10 @@ public static ref TItem GetSurePresentItemRef(this ref Stack4 sour Debug.Assert(index < source.Capacity); switch (index) { - case 0: return ref source._it0; - case 1: return ref source._it1; - case 2: return ref source._it2; - case 3: return ref source._it3; + case 0: return ref source._i0; + case 1: return ref source._i1; + case 2: return ref source._i2; + case 3: return ref source._i3; default: return ref RefTools.GetNullRef(); } } @@ -572,7 +572,7 @@ public struct Stack2 : IStack> /// Count of items on stack public const int StackCapacity = 2; - internal T _it0, _it1; + internal T _i0, _i1; /// public int Capacity => StackCapacity; @@ -585,8 +585,8 @@ public ref T GetSurePresentRef(int index) Debug.Assert(index < StackCapacity); switch (index) { - case 0: return ref _it0; - default: return ref _it1; + case 0: return ref _i0; + default: return ref _i1; } } @@ -599,8 +599,8 @@ public T this[int index] Debug.Assert(index < StackCapacity); return index switch { - 0 => _it0, - _ => _it1, + 0 => _i0, + _ => _i1, }; } [MethodImpl((MethodImplOptions)256)] @@ -614,8 +614,8 @@ public void Set(int index, in T value) Debug.Assert(index < StackCapacity); switch (index) { - case 0: _it0 = value; break; - default: _it1 = value; break; + case 0: _i0 = value; break; + default: _i1 = value; break; } } @@ -637,7 +637,7 @@ public struct Stack4 : IStack> /// Count of items on stack public const int StackCapacity = 4; - internal T _it0, _it1, _it2, _it3; + internal T _i0, _i1, _i2, _i3; /// public int Capacity => StackCapacity; @@ -650,10 +650,10 @@ public ref T GetSurePresentRef(int index) Debug.Assert(index < StackCapacity); switch (index) { - case 0: return ref _it0; - case 1: return ref _it1; - case 2: return ref _it2; - default: return ref _it3; + case 0: return ref _i0; + case 1: return ref _i1; + case 2: return ref _i2; + default: return ref _i3; } } @@ -666,10 +666,10 @@ public T this[int index] Debug.Assert(index < StackCapacity); return index switch { - 0 => _it0, - 1 => _it1, - 2 => _it2, - _ => _it3, + 0 => _i0, + 1 => _i1, + 2 => _i2, + _ => _i3, }; } [MethodImpl((MethodImplOptions)256)] @@ -683,10 +683,10 @@ public void Set(int index, in T value) Debug.Assert(index < StackCapacity); switch (index) { - case 0: _it0 = value; break; - case 1: _it1 = value; break; - case 2: _it2 = value; break; - default: _it3 = value; break; + case 0: _i0 = value; break; + case 1: _i1 = value; break; + case 2: _i2 = value; break; + default: _i3 = value; break; } } @@ -829,6 +829,18 @@ public void Add(in TItem item) else SmallList.AddDefaultAndGetRef(ref _rest, index - stackCap) = item; } + + /// Adds the item to the end of the list aka the Stack.Push + [MethodImpl((MethodImplOptions)256)] + public void Add2(in TItem item) + { + var index = _count++; + var stackCap = _stack.Capacity; + if (index < stackCap) + _stack.AsSpan()[index] = item; + else + SmallList.AddDefaultAndGetRef(ref _rest, index - stackCap) = item; + } } /// List with the number of first items (4) stored inside its struct and the rest in the growable array. diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index 8cb90bad..c36b7336 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -143,3 +143,26 @@ public int Double_and_Sum_AsSpan() return sum; } } + +[MemoryDiagnoser, RankColumn, Orderer(BenchmarkDotNet.Order.SummaryOrderPolicy.FastestToSlowest)] +[HardwareCounters(HardwareCounter.CacheMisses, HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] +public class SmallList_Switch_vs_AsSpan_ByRef_Add +{ + SmallList> _smallList; + + [Benchmark(Baseline = true)] + public SmallList> Add_BySwitch() + { + for (var i = 0; i < 8; ++i) + _smallList.Add(i); + return _smallList; + } + + [Benchmark] + public SmallList> Add_AsSpan() + { + for (var i = 0; i < 8; ++i) + _smallList.Add2(i); + return _smallList; + } +} diff --git a/test/FastExpressionCompiler.Benchmarks/Program.cs b/test/FastExpressionCompiler.Benchmarks/Program.cs index 7b067ad2..2e51ca76 100644 --- a/test/FastExpressionCompiler.Benchmarks/Program.cs +++ b/test/FastExpressionCompiler.Benchmarks/Program.cs @@ -50,7 +50,8 @@ public static void Main() //BenchmarkRunner.Run(); //BenchmarkRunner.Run(); - BenchmarkRunner.Run(); + BenchmarkRunner.Run(); + // BenchmarkRunner.Run(); // BenchmarkRunner.Run(); //var a = new NestedLambdasVsVars(); diff --git a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs index dc2d5646..b87c231e 100644 --- a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs +++ b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs @@ -1,11 +1,13 @@ using System; -using System.Collections.Concurrent; + #if LIGHT_EXPRESSION +using FastExpressionCompiler.LightExpression.ImTools; using static FastExpressionCompiler.LightExpression.Expression; namespace FastExpressionCompiler.LightExpression.IssueTests; #else using System.Linq.Expressions; +using FastExpressionCompiler.ImTools; using static System.Linq.Expressions.Expression; namespace FastExpressionCompiler.IssueTests; #endif @@ -14,6 +16,7 @@ public struct Issue476_System_ExecutionEngineException_with_nullables_on_repeate { public void Run(TestRun t) { + TestSmallList(t); Original_case(t); } @@ -45,4 +48,20 @@ public void Original_case(TestContext t) t.IsTrue(ff(notNull)); t.IsFalse(ff(aNull)); } + + SmallList> _smallList; + + public void TestSmallList(TestContext t) + { + for (var i = 0; i < 8; ++i) + _smallList.Add2(i); + + var sum = 0; + for (var i = 0; i < _smallList.Count; i++) + { + ref var n = ref _smallList.GetSurePresentItemRef2(i); + n += n; + sum += n; + } + } } \ No newline at end of file diff --git a/test/FastExpressionCompiler.TestsRunner/Program.cs b/test/FastExpressionCompiler.TestsRunner/Program.cs index ec6020aa..45082583 100644 --- a/test/FastExpressionCompiler.TestsRunner/Program.cs +++ b/test/FastExpressionCompiler.TestsRunner/Program.cs @@ -14,7 +14,7 @@ public static void Main() // ILGeneratorTools.DisableILGeneratorPooling = true; // LightExpression.ILGeneratorTools.DisableILGeneratorPooling = true; - new LightExpression.IssueTests.Issue347_InvalidProgramException_on_compiling_an_expression_that_returns_a_record_which_implements_IList().Run(); + // new LightExpression.IssueTests.Issue347_InvalidProgramException_on_compiling_an_expression_that_returns_a_record_which_implements_IList().Run(); // new LightExpression.UnitTests.NestedLambdasSharedToExpressionCodeStringTest().Run(); // new LightExpression.IssueTests.Issue274_Failing_Expressions_in_Linq2DB().Run(); From 1bc3da6ecbdec260055c19d6fe50464f03b8e6f7 Mon Sep 17 00:00:00 2001 From: dadhi Date: Fri, 30 May 2025 16:20:08 +0200 Subject: [PATCH 04/32] fine --- src/FastExpressionCompiler/ImTools.cs | 55 +++++++++++++++++++ .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 44 ++++++++++++--- ..._repeated_calls_to_ConcurrentDictionary.cs | 12 ++-- 3 files changed, 95 insertions(+), 16 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 0cc46545..33cff4d0 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -411,6 +411,61 @@ public bool MoveNext() public void Dispose() { } } + /// Enumerates all the items + [MethodImpl((MethodImplOptions)256)] + public static SmallListEnumerable Enumerate(this ref SmallList list) + where TStack : struct, IStack + => new SmallListEnumerable(list); + + /// Enumerable on stack, without allocations + public struct SmallListEnumerable : IEnumerable + where TStack : struct, IStack + { + private readonly SmallList _list; + /// Constructor + public SmallListEnumerable(SmallList list) => _list = list; + /// + [MethodImpl((MethodImplOptions)256)] + public SmallListEnumerator GetEnumerator() => new SmallListEnumerator(_list); + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + } + + /// Enumerator on stack, without allocations + public struct SmallListEnumerator : IEnumerator, IEnumerator + where TStack : struct, IStack + { + private readonly SmallList _list; + private int _index; + internal SmallListEnumerator(SmallList list) + { + _list = list; + _index = -1; + } + private T _current; + /// + public T Current => _current; + object IEnumerator.Current => _current; + /// + [MethodImpl((MethodImplOptions)256)] + public bool MoveNext() + { + var index = ++_index; + if (index < _list.Count) + { + _current = index < _list.StackCapacity + ? _list._stack.GetSurePresentRef(index) + : _list._rest[index - _list.StackCapacity]; + return true; + } + return false; + } + /// + public void Reset() => _index = -1; + /// + public void Dispose() { } + } + /// Returns surely present item ref by its index [MethodImpl((MethodImplOptions)256)] public static ref TItem GetSurePresentItemRef(this ref SmallList2 source, int index) diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index c36b7336..b11f0405 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -148,21 +148,47 @@ public int Double_and_Sum_AsSpan() [HardwareCounters(HardwareCounter.CacheMisses, HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] public class SmallList_Switch_vs_AsSpan_ByRef_Add { - SmallList> _smallList; + /* + ## Strange baseline + + BenchmarkDotNet v0.15.0, Windows 11 (10.0.26100.4202/24H2/2024Update/HudsonValley) + Intel Core i9-8950HK CPU 2.90GHz (Coffee Lake), 1 CPU, 12 logical and 6 physical cores + .NET SDK 9.0.203 + [Host] : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2 + DefaultJob : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2 + + | Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | BranchInstructions/Op | CacheMisses/Op | BranchMispredictions/Op | Gen0 | Allocated | Alloc Ratio | + |------------- |---------:|---------:|---------:|------:|--------:|-----:|----------------------:|---------------:|------------------------:|-------:|----------:|------------:| + | Add_AsSpan | 38.59 ns | 0.833 ns | 2.417 ns | 0.92 | 0.08 | 1 | 78 | 1 | 0 | 0.0063 | 40 B | 1.00 | + | Add_BySwitch | 41.96 ns | 0.876 ns | 2.458 ns | 1.00 | 0.08 | 2 | 80 | 1 | 0 | 0.0063 | 40 B | 1.00 | + */ [Benchmark(Baseline = true)] - public SmallList> Add_BySwitch() + public int Add_BySwitch() { - for (var i = 0; i < 8; ++i) - _smallList.Add(i); - return _smallList; + SmallList> list = default; + + for (var n = 8; n > 0; --n) + list.Add(n + 3); + + var sum = 0; + foreach (var n in list.Enumerate()) + sum += n; + return sum; } [Benchmark] - public SmallList> Add_AsSpan() + public int Add_AsSpan() { - for (var i = 0; i < 8; ++i) - _smallList.Add2(i); - return _smallList; + SmallList> list = default; + + for (var n = 8; n > 0; --n) + list.Add2(n + 3); + + + var sum = 0; + foreach (var n in list.Enumerate()) + sum += n; + return sum; } } diff --git a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs index b87c231e..fb0313e6 100644 --- a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs +++ b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs @@ -56,12 +56,10 @@ public void TestSmallList(TestContext t) for (var i = 0; i < 8; ++i) _smallList.Add2(i); - var sum = 0; - for (var i = 0; i < _smallList.Count; i++) - { - ref var n = ref _smallList.GetSurePresentItemRef2(i); - n += n; - sum += n; - } + var doubleSum = 0; + foreach (var n in _smallList.Enumerate()) + doubleSum += n + n; + + t.AreEqual(112, doubleSum); } } \ No newline at end of file From 9becea9e88262c36c4635c66242d4265679ed853 Mon Sep 17 00:00:00 2001 From: dadhi Date: Sat, 31 May 2025 13:30:21 +0200 Subject: [PATCH 05/32] replaced SmallList4 with SmallList>; using ref Indexer instead of Span for things in FEC; benchmarks --- .../Expression.cs | 8 +- ...tExpressionCompiler.LightExpression.csproj | 4 - .../FastExpressionCompiler.cs | 40 +- .../FastExpressionCompiler.csproj | 5 - src/FastExpressionCompiler/ImTools.cs | 948 +++++------------- src/FastExpressionCompiler/TestTools.cs | 8 +- .../AccessByRef_vs_ByIGetRefStructImpl.cs | 84 +- .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 45 +- .../Program.cs | 4 +- ..._repeated_calls_to_ConcurrentDictionary.cs | 4 +- 10 files changed, 338 insertions(+), 812 deletions(-) diff --git a/src/FastExpressionCompiler.LightExpression/Expression.cs b/src/FastExpressionCompiler.LightExpression/Expression.cs index 4d834f09..9411ea70 100644 --- a/src/FastExpressionCompiler.LightExpression/Expression.cs +++ b/src/FastExpressionCompiler.LightExpression/Expression.cs @@ -81,7 +81,8 @@ public virtual bool TryEmit(CompilerFlags flags, ref ClosureInfo closure, IParam public virtual bool IsCustomToCSharpString => false; [RequiresUnreferencedCode(Trimming.Message)] - public virtual StringBuilder CustomToCSharpString(StringBuilder sb, EnclosedIn enclosedIn, ref SmallList4 named, + public virtual StringBuilder CustomToCSharpString(StringBuilder sb, EnclosedIn enclosedIn, + ref SmallList> named, int lineIndent = 0, bool stripNamespace = false, Func printType = null, int indentSpaces = 4, ObjectToCode notRecognizedToCode = null) => sb; @@ -3352,7 +3353,8 @@ public override bool TryEmit(CompilerFlags flags, ref ClosureInfo closure, IPara [RequiresUnreferencedCode(Trimming.Message)] public override StringBuilder CustomToCSharpString(StringBuilder sb, - EnclosedIn enclosedIn, ref SmallList4 named, + EnclosedIn enclosedIn, + ref SmallList> named, int lineIndent = 0, bool stripNamespace = false, Func printType = null, int indentSpaces = 4, ObjectToCode notRecognizedToCode = null) { @@ -5283,7 +5285,7 @@ internal System.Linq.Expressions.LabelTarget ToSystemLabelTarget(ref SmallList named = default; + SmallList> named = default; return new StringBuilder().AppendLabelName(this, ref named).ToString(); } } diff --git a/src/FastExpressionCompiler.LightExpression/FastExpressionCompiler.LightExpression.csproj b/src/FastExpressionCompiler.LightExpression/FastExpressionCompiler.LightExpression.csproj index e7b6035e..9e2ca958 100644 --- a/src/FastExpressionCompiler.LightExpression/FastExpressionCompiler.LightExpression.csproj +++ b/src/FastExpressionCompiler.LightExpression/FastExpressionCompiler.LightExpression.csproj @@ -94,9 +94,5 @@ - - - - diff --git a/src/FastExpressionCompiler/FastExpressionCompiler.cs b/src/FastExpressionCompiler/FastExpressionCompiler.cs index 9883b537..5df4398f 100644 --- a/src/FastExpressionCompiler/FastExpressionCompiler.cs +++ b/src/FastExpressionCompiler/FastExpressionCompiler.cs @@ -737,8 +737,8 @@ public struct ClosureInfo /// Tracks the use of the variables in the blocks stack per variable, /// (uint) contains (ushort) BlockIndex in the upper bits and (ushort) VarIndex in the lower bits. /// to determine if variable is the local variable and in what block it's defined - private SmallMap4, - RefEq, SmallMap4.SingleArrayEntries, RefEq> + private SmallMap4>, + RefEq, SmallMap4.SingleArrayEntries>, RefEq> > _varInBlockMap; /// The map of inlined invocations collected in TryCollect and then used in TryEmit @@ -754,7 +754,7 @@ public struct ClosureInfo internal bool HasComplexExpression; /// The stack for the lambda invocation and the labels bound to them - internal SmallList4 LambdaInvokeStackLabels; + internal SmallList> LambdaInvokeStackLabels; /// Tracks of how many gotos, labels referencing the specific target, they may be the same gotos expression, /// because the gotos may be reused multiple times in the big expression @@ -952,12 +952,13 @@ public int GetDefinedLocalVarOrDefault(ParameterExpression varParamExpr) } } - internal static ref LabelInfo GetLabelOrInvokeIndexByTarget(ref this SmallList4 labels, object labelTarget, out bool found) + internal static ref LabelInfo GetLabelOrInvokeIndexByTarget( + ref this SmallList> labels, object labelTarget, out bool found) { var count = labels.Count; for (var i = 0; i < count; ++i) // todo: @perf make this loop into the SmallList method to avoid index check on each item { - ref var label = ref labels.GetSurePresentItemRef(i); + ref var label = ref labels[i]; if (label.Target == labelTarget) { found = true; @@ -2467,7 +2468,7 @@ private static bool TryEmitNew(Expression expr, IReadOnlyList paramExprs, IL } else { - SmallList4 argVars = default; + SmallList> argVars = default; for (var i = 0; i < argCount; ++i) { var argExpr = argExprs.GetArgument(i); @@ -4977,7 +4978,7 @@ private static bool TryEmitMethodCall(Expression expr, // don't forget to store the object into the variable first, before emitting the arguments var objVar = objExpr == null ? -1 : EmitStoreLocalVariable(il, objExpr.Type); - SmallList4 argVars = default; + SmallList> argVars = default; for (var i = 0; i < methodParams.Length; i++) { var argExpr = callArgs.GetArgument(i); @@ -9128,7 +9129,7 @@ internal static StringBuilder ToExpressionString(this ParameterExpression pe, St { if (paramsExprs.TryGetIndex(out var i, pe, paramsExprs.Count, default(RefEq))) { - SmallList4 named = default; + SmallList> named = default; return sb .Append("p[").Append(i) .Append(" // (") @@ -9149,7 +9150,7 @@ internal static StringBuilder ToExpressionString(this LabelTarget lt, StringBuil { if (labelTargets.TryGetIndex(out var i, lt, labelTargets.Count, default(RefEq))) { - SmallList4 named = default; + SmallList> named = default; return sb.Append("l[").Append(i) .Append(" // (").AppendName(lt, lt.Name, lt.Type.ToCode(stripNamespace, printType), ref named, lt.GetHashCode()).Append(')') .NewLineIndent(lineIndent).Append(']'); @@ -9613,7 +9614,7 @@ public static string ToCSharpString(this Expression expr, ObjectToCode notRecogn public static StringBuilder ToCSharpString(this Expression e, StringBuilder sb, int lineIndent = 0, bool stripNamespace = false, Func printType = null, int indentSpaces = 4, ObjectToCode notRecognizedToCode = null) { - var named = new SmallList4(); + SmallList> named = default; return e.ToCSharpString(sb, EnclosedIn.ParensByDefault, ref named, lineIndent, stripNamespace, printType, indentSpaces, notRecognizedToCode); } @@ -9653,7 +9654,7 @@ private static StringBuilder InsertTopFFuncDefinitionOnce(StringBuilder sb) => : sb; internal static StringBuilder ToCSharpString(this Expression e, - StringBuilder sb, EnclosedIn enclosedIn, ref SmallList4 named, + StringBuilder sb, EnclosedIn enclosedIn, ref SmallList> named, int lineIndent = 0, bool stripNamespace = false, Func printType = null, int indentSpaces = 4, ObjectToCode notRecognizedToCode = null, bool isReturnByRef = false) { @@ -10070,7 +10071,7 @@ internal static StringBuilder ToCSharpString(this Expression e, lineIndent = sb.GetRealLineIndent(lineIndent); var returnsValue = e.Type != typeof(void); - void PrintPart(Expression part, ref SmallList4 named) + void PrintPart(Expression part, ref SmallList> named) { var incIndent = lineIndent + indentSpaces; if (part is BlockExpression pb) @@ -10474,7 +10475,7 @@ private static bool AvoidParens(EnclosedIn enclosedIn) => enclosedIn == EnclosedIn.Block | // statement in a block don't need the parens as well enclosedIn == EnclosedIn.Return; - private static StringBuilder ToCSharpBlock(this Expression expr, StringBuilder sb, ref SmallList4 named, + private static StringBuilder ToCSharpBlock(this Expression expr, StringBuilder sb, ref SmallList> named, int lineIndent, bool stripNamespace, Func printType, int indentSpaces, ObjectToCode notRecognizedToCode) { sb.NewLineIndent(lineIndent).Append('{'); @@ -10492,7 +10493,7 @@ private static StringBuilder ToCSharpBlock(this Expression expr, StringBuilder s } private static StringBuilder ToCSharpExpression(this Expression expr, - StringBuilder sb, EnclosedIn enclosedIn, ref SmallList4 named, bool newLineExpr, + StringBuilder sb, EnclosedIn enclosedIn, ref SmallList> named, bool newLineExpr, int lineIndent, bool stripNamespace, Func printType, int indentSpaces, ObjectToCode notRecognizedToCode) { if (!expr.NodeType.IsBlockLike()) @@ -10563,7 +10564,7 @@ internal static int GetRealLineIndent(this StringBuilder sb, int defaultIndent) private const string NotSupportedExpression = "// NOT_SUPPORTED_EXPRESSION: "; private static StringBuilder ToCSharpString(this IReadOnlyList bindings, - StringBuilder sb, EnclosedIn enclosedIn, ref SmallList4 named, + StringBuilder sb, EnclosedIn enclosedIn, ref SmallList> named, int lineIndent = 0, bool stripNamespace = false, Func printType = null, int indentSpaces = 4, ObjectToCode notRecognizedToCode = null) { var count = bindings.Count; @@ -10619,7 +10620,8 @@ private static StringBuilder ToCSharpString(this IReadOnlyList bi return sb; } - private static StringBuilder BlockToCSharpString(this BlockExpression b, StringBuilder sb, ref SmallList4 named, + private static StringBuilder BlockToCSharpString(this BlockExpression b, StringBuilder sb, + ref SmallList> named, int lineIndent = 0, bool stripNamespace = false, Func printType = null, int indentSpaces = 4, ObjectToCode notRecognizedToCode = null, bool inTheLastBlock = false, BinaryExpression blockResultAssignment = null, bool containerIgnoresResult = false // in case of the container is lambda which is the Action/void delegate and ignores result, we don't need the `return` - it will be invalid c# @@ -10919,7 +10921,8 @@ public struct NamedWithIndex public int Index; } - internal static StringBuilder AppendName(this StringBuilder sb, object parOrTarget, string name, string typeCode, ref SmallList4 named, + internal static StringBuilder AppendName(this StringBuilder sb, object parOrTarget, string name, string typeCode, + ref SmallList> named, int noNameIndex = 0) { var nameIndex = 0; @@ -10959,7 +10962,8 @@ internal static StringBuilder AppendName(this StringBuilder sb, object parOrTarg return sb.Append(validTypeIdent).Append('_').Append(noNameIndex); } - internal static StringBuilder AppendLabelName(this StringBuilder sb, LabelTarget target, ref SmallList4 named) => + internal static StringBuilder AppendLabelName(this StringBuilder sb, LabelTarget target, + ref SmallList> named) => sb.AppendName(target, target.Name, target.Type.ToCode(stripNamespace: true), ref named); /// Returns the standard name (alias) for the well-known primitive type, e.g. Int16 -> short diff --git a/src/FastExpressionCompiler/FastExpressionCompiler.csproj b/src/FastExpressionCompiler/FastExpressionCompiler.csproj index 4dd40ec6..eb44d973 100644 --- a/src/FastExpressionCompiler/FastExpressionCompiler.csproj +++ b/src/FastExpressionCompiler/FastExpressionCompiler.csproj @@ -84,9 +84,4 @@ - - - - - diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 33cff4d0..977580a6 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -26,6 +26,11 @@ THE SOFTWARE. // ReSharper disable once InconsistentNaming #nullable disable +#if !NETSTANDARD2_0_OR_GREATER && !NET472 +#define SUPPORTS_UNSAFE +#define SUPPORTS_CREATE_SPAN +#endif + #if LIGHT_EXPRESSION namespace FastExpressionCompiler.LightExpression.ImTools; #else @@ -41,11 +46,6 @@ namespace FastExpressionCompiler.ImTools; using System.Runtime.InteropServices; using System.Diagnostics.CodeAnalysis; -#if NETSTANDARD2_0_OR_GREATER || NET472 -using System.Reflection.Emit; -using System.Reflection; -#endif - using static SmallMap4; /// Helpers and polyfills for the missing things in the old .NET versions @@ -56,7 +56,7 @@ public static class RefTools [MethodImpl((MethodImplOptions)256)] public static ref T GetNullRef() => #if NET6_0_OR_GREATER - ref Unsafe.NullRef(); + ref Unsafe.NullRef(); #else ref _missing; internal static T _missing = default; @@ -186,229 +186,142 @@ public static int GetIndexOrAdd(this ref SmallList source, in return count; } - /// Returns surely present item ref by its index + /// Good stuff [MethodImpl((MethodImplOptions)256)] - public static ref TItem GetSurePresentItemRef(this ref SmallList4 source, int index) + public static void InitCount(this ref SmallList source, int count) + where TStack : struct, IStack { - Debug.Assert(source.Count != 0); - Debug.Assert(index < source.Count); - switch (index) - { - case 0: return ref source._it0; - case 1: return ref source._it1; - case 2: return ref source._it2; - case 3: return ref source._it3; - default: - Debug.Assert(source._rest != null, $"Expecting deeper items are already existing on stack at index: {index}"); - return ref source._rest[index - SmallList4.StackCapacity]; - } - } + Debug.Assert(count > 0, "Count should be more than 0"); + Debug.Assert(source._count == 0, "Initially the count should be 0"); - /// Returns a surely present item ref by its index - [MethodImpl((MethodImplOptions)256)] - public static ref TItem GetSurePresentItemRef(this ref Stack4 source, int index) - { - Debug.Assert(index < source.Capacity); - switch (index) - { - case 0: return ref source._i0; - case 1: return ref source._i1; - case 2: return ref source._i2; - case 3: return ref source._i3; - default: return ref RefTools.GetNullRef(); - } + // Add the StackCapacity empty space at the end, we may use it later for BuildToArray. + // The actual source Capacity will be StackCapacity + count. + if (count > source.StackCapacity) + source._rest = new T[count]; + source._count = count; } /// Returns a surely present item ref by its index [MethodImpl((MethodImplOptions)256)] - public static ref T GetSurePresentItemRef(this ref SmallList list, int index) + public static ref T GetSurePresentItemRef(this ref SmallList source, int index) where TStack : struct, IStack { - Debug.Assert(list.Count != 0); - Debug.Assert(index < list.Count); + Debug.Assert(source.Count != 0); + Debug.Assert(index < source.Count); - var stackCap = list.StackCapacity; + var stackCap = source.StackCapacity; if (index < stackCap) - return ref list._stack.GetSurePresentRef(index); + return ref source._stack[index]; - Debug.Assert(list._rest != null); - return ref list._rest[index - stackCap]; + Debug.Assert(source._rest != null); + return ref source._rest[index - stackCap]; } +#if SUPPORTS_CREATE_SPAN /// Returns a surely present item ref by its index [MethodImpl((MethodImplOptions)256)] - public static ref T GetSurePresentItemRef2(this ref SmallList list, int index) + public static ref T GetSurePresentItemRef2(this ref SmallList source, int index) where TStack : struct, IStack { - Debug.Assert(list.Count != 0); - Debug.Assert(index < list.Count); + Debug.Assert(source.Count != 0); + Debug.Assert(index < source.Count); - var stackCap = list.StackCapacity; + var stackCap = source.StackCapacity; if (index < stackCap) - return ref list._stack.AsSpan()[index]; + return ref source._stack.AsSpan()[index]; - Debug.Assert(list._rest != null); - return ref list._rest[index - stackCap]; + Debug.Assert(source._rest != null); + return ref source._rest[index - stackCap]; } +#endif /// Returns last present item ref, assumes that the list is not empty! [MethodImpl((MethodImplOptions)256)] - public static ref TItem GetLastSurePresentItem(this ref SmallList4 source) => + public static ref T GetLastSurePresentItem(this ref SmallList source) + where TStack : struct, IStack => ref source.GetSurePresentItemRef(source._count - 1); - /// Appends the default item to the end of the list and returns the reference to it. + /// Removes the last item from the list aka the Stack Pop. Assumes that the list is not empty! [MethodImpl((MethodImplOptions)256)] - public static ref TItem AddDefaultAndGetRef(this ref SmallList4 source) + public static void RemoveLastSurePresentItem(this ref SmallList source) + where TStack : struct, IStack { - var index = source._count++; - switch (index) - { - case 0: return ref source._it0; - case 1: return ref source._it1; - case 2: return ref source._it2; - case 3: return ref source._it3; - default: - return ref AddDefaultAndGetRef(ref source._rest, index - SmallList4.StackCapacity); - } + Debug.Assert(source._count != 0); + var index = --source._count; + source[index] = default; } - /// Looks for the item in the list and return its index if found or -1 for the absent item + ///Clears the list, but keeps the already allocated array on heap to reuse in the future [MethodImpl((MethodImplOptions)256)] - public static int TryGetIndex(this ref SmallList4 source, TItem it, TEq eq = default) - where TEq : struct, IEq + public static void Clear(this ref SmallList source) + where TStack : struct, IStack { - switch (source._count) + source._stack = default; // clear the stack items + var restCount = source._count - source.StackCapacity; + if (restCount > 0) { - case 1: - if (eq.Equals(it, source._it0)) return 0; - break; - - case 2: - if (eq.Equals(it, source._it0)) return 0; - if (eq.Equals(it, source._it1)) return 1; - break; - - case 3: - if (eq.Equals(it, source._it0)) return 0; - if (eq.Equals(it, source._it1)) return 1; - if (eq.Equals(it, source._it2)) return 2; - break; - - case var n: - if (eq.Equals(it, source._it0)) return 0; - if (eq.Equals(it, source._it1)) return 1; - if (eq.Equals(it, source._it2)) return 2; - if (eq.Equals(it, source._it3)) return 3; - const int StackCapacity = SmallList4.StackCapacity; - if (n == StackCapacity) - break; - - return source._rest.TryGetIndex(in it, 0, source._count - StackCapacity, eq, -1 - StackCapacity) + StackCapacity; + Debug.Assert(source._rest != null, "Expecting deeper items are already existing on stack"); + Array.Clear(source._rest, 0, restCount); } - return -1; + source._count = 0; } - /// Returns the index of the found item or appends the item to the end of the list, and returns its index + /// Appends the default item to the end of the list and returns the reference to it. [MethodImpl((MethodImplOptions)256)] - public static int GetIndexOrAdd(this ref SmallList4 source, in TItem item, TEq eq = default) - where TEq : struct, IEq + public static ref T AddDefaultAndGetRef(this ref SmallList source) + where TStack : struct, IStack { - switch (source._count) - { - case 0: - source._count = 1; - source._it0 = item; - return 0; - - case 1: - if (eq.Equals(item, source._it0)) return 0; - source._count = 2; - source._it1 = item; - return 1; - - case 2: - if (eq.Equals(item, source._it0)) return 0; - if (eq.Equals(item, source._it1)) return 1; - source._count = 3; - source._it2 = item; - return 2; - - case 3: - if (eq.Equals(item, source._it0)) return 0; - if (eq.Equals(item, source._it1)) return 1; - if (eq.Equals(item, source._it2)) return 2; - source._count = 4; - source._it3 = item; - return 3; - - default: - if (eq.Equals(item, source._it0)) return 0; - if (eq.Equals(item, source._it1)) return 1; - if (eq.Equals(item, source._it2)) return 2; - if (eq.Equals(item, source._it3)) return 3; - var restCount = source._count - SmallList4.StackCapacity; - if (restCount != 0) - { - var i = source._rest.TryGetIndex(item, 0, restCount, eq); - if (i != -1) - return i + SmallList4.StackCapacity; - } - AddDefaultAndGetRef(ref source._rest, restCount) = item; - return source._count++; - } + var index = source._count++; + var stackCap = source.StackCapacity; + if (index < stackCap) + return ref source._stack[index]; + Debug.Assert(source._rest != null, "Expecting deeper items are already existing on stack"); + return ref SmallList.AddDefaultAndGetRef(ref source._rest, index - stackCap); } - /// Enumerates all the items - public static SmallList4Enumerable Enumerate(this ref SmallList4 list) => new SmallList4Enumerable(list); - - /// Enumerable on stack, without allocations - public struct SmallList4Enumerable : IEnumerable, IEnumerable + /// Adds the item to the end of the list aka the Stack.Push. + /// Returns the index. + [MethodImpl((MethodImplOptions)256)] + public static int Add(this ref SmallList source, in T item) + where TStack : struct, IStack { - private readonly SmallList4 _list; - /// Constructor - public SmallList4Enumerable(SmallList4 list) => _list = list; - /// - public SmallList4Enumerator GetEnumerator() => new SmallList4Enumerator(_list); - IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); - IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + var index = source._count++; + var stackCap = source.StackCapacity; + if (index < stackCap) + source._stack[index] = item; + else + SmallList.AddDefaultAndGetRef(ref source._rest, index - stackCap) = item; + Debug.Assert(source._rest != null, "Expecting deeper items are already existing on stack"); + return index; } - /// Enumerator on stack, without allocations - public struct SmallList4Enumerator : IEnumerator, IEnumerator + /// Looks for the item in the list and return its index if found or -1 for the absent item + [MethodImpl((MethodImplOptions)256)] + public static int TryGetIndex(this ref SmallList source, in T item, TEq eq = default) + where TStack : struct, IStack + where TEq : struct, IEq { - private readonly SmallList4 _list; - private int _index; - internal SmallList4Enumerator(SmallList4 list) - { - _list = list; - _index = -1; - } - private TItem _current; - /// - public TItem Current => _current; - object IEnumerator.Current => _current; - /// - public bool MoveNext() + if (source._count != 0) { - var index = ++_index; - var list = _list; - if (index < _list.Count) - switch (index) - { - case 0: _current = list._it0; return true; - case 1: _current = list._it1; return true; - case 2: _current = list._it2; return true; - case 3: _current = list._it3; return true; - default: - _current = list._rest[index - SmallList4.StackCapacity]; - return true; - } - return false; + var index = 0; + foreach (var it in source.Enumerate()) + { + if (eq.Equals(item, it)) + return index; + ++index; + } } - /// - public void Reset() => _index = -1; - /// - public void Dispose() { } + return -1; + } + + /// Returns the index of the found item or appends the item to the end of the list, and returns its index + [MethodImpl((MethodImplOptions)256)] + public static int GetIndexOrAdd(this ref SmallList source, in T item, TEq eq = default) + where TStack : struct, IStack + where TEq : struct, IEq + { + var i = source.TryGetIndex(in item, eq); + return i != -1 ? i : source.Add(in item); } /// Enumerates all the items @@ -454,7 +367,7 @@ public bool MoveNext() if (index < _list.Count) { _current = index < _list.StackCapacity - ? _list._stack.GetSurePresentRef(index) + ? _list._stack[index] : _list._rest[index - _list.StackCapacity]; return true; } @@ -560,63 +473,40 @@ public static int GetIndexOrAdd(this ref SmallList2 source, T #pragma warning disable CS9101 // UnscopedRef goes wrong on Ubuntu +internal static class Stack +{ + [MethodImpl(MethodImplOptions.NoInlining)] + internal static ref T ThrowIndexOutOfBounds(int index, int capacity) + { + throw new IndexOutOfRangeException($"Index {index} is out of range for Stack{capacity}<{typeof(T)},..>."); + } +} + // todo: @wip generalized Stack is the WIP and may be moved to ImTools repo /// Abstracts over collection of the items on stack of the fixed Capacity, /// to be used as a part of the hybrid data structures which grow from stack to heap public interface IStack where TStack : struct, IStack { - /// Count of items holding + /// Possible count of items holding on stack int Capacity { get; } - /// Indexer - T this[int index] { get; set; } + /// Gets first item by ref + [UnscopedRef] + ref T First { get; } - /// Set indexed item via value passed by-ref - void Set(int index, in T item); + /// Gets last item by ref + [UnscopedRef] + ref T Last { get; } - /// Gets the ref to the struct T field/item by index. Does not not check the index boundaries - do it externally! + /// Indexer returning the item by ref to read and write the item value [UnscopedRef] - ref T GetSurePresentRef(int index); + ref T this[int index] { get; } +#if SUPPORTS_CREATE_SPAN /// Creates a span from the struct items [UnscopedRef] Span AsSpan(); -} - -internal static class StackTools - where TStack : struct, IStack -{ -#if NETSTANDARD2_0_OR_GREATER || NET472 - internal static readonly ConstructorInfo SpanConstructor = - typeof(Span).GetConstructor(new[] { typeof(void*), typeof(int) }); - - internal delegate Span AsSpanDelegate(ref TStack stack, int capacity); - - internal static readonly AsSpanDelegate AsSpanCompiled; - - static StackTools() - { - var dynamicMethod = new DynamicMethod( - string.Empty, - typeof(Span), - [typeof(TStack).MakeByRefType(), typeof(int)], // todo: @perf pool this thing - typeof(TStack), - true - ); - - // Set capacity to the estimated size to avoid realloc, 1 + 1 + 1 + 5 + 1 = 9 bytes + a small buffer - var il = dynamicMethod.GetILGenerator(16); - - // IL to replicate: return new Span(Unsafe.AsPointer(ref this), StackCapacity); - il.Emit(OpCodes.Ldarg_0); // Load 'ref this' - il.Emit(OpCodes.Conv_U); // Convert managed reference to native unsigned int (void*) - il.Emit(OpCodes.Ldarg_1); // Load length (StackCapacity) argument - il.Emit(OpCodes.Newobj, SpanConstructor); - il.Emit(OpCodes.Ret); - - AsSpanCompiled = (AsSpanDelegate)dynamicMethod.CreateDelegate(typeof(AsSpanDelegate)); - } #endif } @@ -627,60 +517,45 @@ public struct Stack2 : IStack> /// Count of items on stack public const int StackCapacity = 2; - internal T _i0, _i1; + internal T _it0, _it1; /// public int Capacity => StackCapacity; /// [UnscopedRef] - [MethodImpl((MethodImplOptions)256)] - public ref T GetSurePresentRef(int index) - { - Debug.Assert(index < StackCapacity); - switch (index) - { - case 0: return ref _i0; - default: return ref _i1; - } - } + public ref T First { get => ref _it0; } + + /// + [UnscopedRef] + public ref T Last { get => ref _it1; } /// - public T this[int index] + [UnscopedRef] + public ref T this[int index] { [MethodImpl((MethodImplOptions)256)] get { - Debug.Assert(index < StackCapacity); - return index switch + if (index < 0 | index >= StackCapacity) + return ref Stack.ThrowIndexOutOfBounds(index, StackCapacity); +#if SUPPORTS_UNSAFE + return ref Unsafe.Add(ref _it0, index); +#else + switch (index) { - 0 => _i0, - _ => _i1, - }; - } - [MethodImpl((MethodImplOptions)256)] - set => Set(index, in value); - } - - /// - [MethodImpl((MethodImplOptions)256)] - public void Set(int index, in T value) - { - Debug.Assert(index < StackCapacity); - switch (index) - { - case 0: _i0 = value; break; - default: _i1 = value; break; + case 0: return ref _it0; + default: return ref _it1; + } +#endif } } +#if SUPPORTS_CREATE_SPAN /// [UnscopedRef] [MethodImpl((MethodImplOptions)256)] public Span AsSpan() => -#if NETSTANDARD2_0_OR_GREATER || NET472 - StackTools>.AsSpanCompiled(ref this, StackCapacity); -#else MemoryMarshal.CreateSpan(ref Unsafe.As, T>(ref this), StackCapacity); #endif } @@ -692,72 +567,106 @@ public struct Stack4 : IStack> /// Count of items on stack public const int StackCapacity = 4; - internal T _i0, _i1, _i2, _i3; + internal T _it0, _it1, _it2, _it3; /// public int Capacity => StackCapacity; /// [UnscopedRef] - [MethodImpl((MethodImplOptions)256)] - public ref T GetSurePresentRef(int index) - { - Debug.Assert(index < StackCapacity); - switch (index) - { - case 0: return ref _i0; - case 1: return ref _i1; - case 2: return ref _i2; - default: return ref _i3; - } - } + public ref T First { get => ref _it0; } + + /// + [UnscopedRef] + public ref T Last { get => ref _it3; } /// - public T this[int index] + [UnscopedRef] + public ref T this[int index] { [MethodImpl((MethodImplOptions)256)] get { - Debug.Assert(index < StackCapacity); - return index switch + if (index < 0 | index >= StackCapacity) + return ref Stack.ThrowIndexOutOfBounds(index, StackCapacity); +#if SUPPORTS_UNSAFE + return ref Unsafe.Add(ref _it0, index); +#else + switch (index) { - 0 => _i0, - 1 => _i1, - 2 => _i2, - _ => _i3, - }; + case 0: return ref _it0; + case 1: return ref _it1; + case 2: return ref _it2; + default: return ref _it3; + } +#endif } - [MethodImpl((MethodImplOptions)256)] - set => Set(index, in value); } - /// Sets the value by the index +#if SUPPORTS_CREATE_SPAN + /// + [UnscopedRef] [MethodImpl((MethodImplOptions)256)] - public void Set(int index, in T value) + public Span AsSpan() => + MemoryMarshal.CreateSpan(ref Unsafe.As, T>(ref this), StackCapacity); +#endif +} + +/// Implementation of `IStack` for 8 items on stack +[StructLayout(LayoutKind.Sequential, Pack = 1)] +public struct Stack8 : IStack> +{ + /// Count of items on stack + public const int StackCapacity = 8; + internal T _it0, _it1, _it2, _it3, _it4, _it5, _it6, _it7; + + /// + public int Capacity => StackCapacity; + + /// + [UnscopedRef] + public ref T First { get => ref _it0; } + + /// + [UnscopedRef] + public ref T Last { get => ref _it7; } + + /// + [UnscopedRef] + public ref T this[int index] { - Debug.Assert(index < StackCapacity); - switch (index) + get { - case 0: _i0 = value; break; - case 1: _i1 = value; break; - case 2: _i2 = value; break; - default: _i3 = value; break; + if (index < 0 | index >= StackCapacity) + return ref Stack.ThrowIndexOutOfBounds(index, StackCapacity); +#if SUPPORTS_UNSAFE + return ref Unsafe.Add(ref _it0, index); +#else + switch (index) + { + case 0: return ref _it0; + case 1: return ref _it1; + case 2: return ref _it2; + case 3: return ref _it3; + case 4: return ref _it4; + case 5: return ref _it5; + case 6: return ref _it6; + default: return ref _it7; + } +#endif } } +#if SUPPORTS_CREATE_SPAN /// [UnscopedRef] [MethodImpl((MethodImplOptions)256)] public Span AsSpan() => -#if NETSTANDARD2_0_OR_GREATER || NET472 - StackTools>.AsSpanCompiled(ref this, StackCapacity); -#else - MemoryMarshal.CreateSpan(ref Unsafe.As, T>(ref this), StackCapacity); + MemoryMarshal.CreateSpan(ref Unsafe.As, T>(ref this), StackCapacity); #endif } -// todo: @wip -/// Implementation of `IStack` for 4 items on stack +/// Implementation of `IStack` for 16 items on stack [StructLayout(LayoutKind.Sequential, Pack = 1)] public struct Stack16 : IStack> { @@ -772,59 +681,52 @@ public struct Stack16 : IStack> /// [UnscopedRef] - [MethodImpl((MethodImplOptions)256)] - public ref T GetSurePresentRef(int index) - { - Debug.Assert(index < StackCapacity); - switch (index) - { - case 0: return ref _it0; - case 1: return ref _it1; - case 2: return ref _it2; - default: return ref _it3; - } - } + public ref T First { get => ref _it0; } /// - public T this[int index] + [UnscopedRef] + public ref T Last { get => ref _it15; } + + /// + [UnscopedRef] + public ref T this[int index] { [MethodImpl((MethodImplOptions)256)] get { - Debug.Assert(index < StackCapacity); - return index switch + if (index < 0 | index >= StackCapacity) + return ref Stack.ThrowIndexOutOfBounds(index, StackCapacity); +#if SUPPORTS_UNSAFE + return ref Unsafe.Add(ref _it0, index); +#else + switch (index) { - 0 => _it0, - 1 => _it1, - 2 => _it2, - _ => _it3, - }; - } - [MethodImpl((MethodImplOptions)256)] - set => Set(index, in value); - } - - /// Sets the value by the index - [MethodImpl((MethodImplOptions)256)] - public void Set(int index, in T value) - { - Debug.Assert(index < StackCapacity); - switch (index) - { - case 0: _it0 = value; break; - case 1: _it1 = value; break; - case 2: _it2 = value; break; - default: _it3 = value; break; + case 0: return ref _it0; + case 1: return ref _it1; + case 2: return ref _it2; + case 3: return ref _it3; + case 4: return ref _it4; + case 5: return ref _it5; + case 6: return ref _it6; + case 7: return ref _it7; + case 8: return ref _it8; + case 9: return ref _it9; + case 10: return ref _it10; + case 11: return ref _it11; + case 12: return ref _it12; + case 13: return ref _it13; + case 14: return ref _it14; + default: return ref _it15; + } +#endif } } +#if SUPPORTS_CREATE_SPAN /// [UnscopedRef] [MethodImpl((MethodImplOptions)256)] public Span AsSpan() => -#if NETSTANDARD2_0_OR_GREATER || NET472 - StackTools>.AsSpanCompiled(ref this, StackCapacity); -#else MemoryMarshal.CreateSpan(ref Unsafe.As, T>(ref this), StackCapacity); #endif } @@ -856,8 +758,9 @@ public int Count get => _count; } - /// Returns surely present item by its index - public TItem this[int index] + /// Returns surely present item by ref + [UnscopedRef] + public ref TItem this[int index] { [MethodImpl((MethodImplOptions)256)] get @@ -866,376 +769,9 @@ public TItem this[int index] Debug.Assert(index < _count); var stackCap = _stack.Capacity; if (index < stackCap) - return _stack[index]; - + return ref _stack[index]; Debug.Assert(_rest != null); - return _rest[index - stackCap]; - } - } - - /// Adds the item to the end of the list aka the Stack.Push - [MethodImpl((MethodImplOptions)256)] - public void Add(in TItem item) - { - var index = _count++; - var stackCap = _stack.Capacity; - if (index < stackCap) - _stack.Set(index, in item); - else - SmallList.AddDefaultAndGetRef(ref _rest, index - stackCap) = item; - } - - /// Adds the item to the end of the list aka the Stack.Push - [MethodImpl((MethodImplOptions)256)] - public void Add2(in TItem item) - { - var index = _count++; - var stackCap = _stack.Capacity; - if (index < stackCap) - _stack.AsSpan()[index] = item; - else - SmallList.AddDefaultAndGetRef(ref _rest, index - stackCap) = item; - } -} - -/// List with the number of first items (4) stored inside its struct and the rest in the growable array. -/// Supports addition and removal (remove is without resize) only at the end of the list, aka Stack behavior -[DebuggerDisplay("{Count} of {_it0?.ToString()}, {_it1?.ToString()}, {_it2?.ToString()}, {_it3?.ToString()}, ...")] -public struct SmallList4 -{ - /// The number of entries stored inside the map itself without moving them to array on heap - public const int StackCapacity = 4; - - internal int _count; - internal TItem _it0, _it1, _it2, _it3; - internal TItem[] _rest; - - /// Gets the number of items in the list - public int Count - { - [MethodImpl((MethodImplOptions)256)] - get => _count; - } - - /// Populate with one item - [MethodImpl((MethodImplOptions)256)] - public void Init1(TItem it0) - { - _count = 1; - _it0 = it0; - } - - /// Populate with two items - [MethodImpl((MethodImplOptions)256)] - public void Init2(TItem it0, TItem it1) - { - _count = 2; - _it0 = it0; - _it1 = it1; - } - - /// Populate with 3 items - [MethodImpl((MethodImplOptions)256)] - public void Init3(TItem it0, TItem it1, TItem it2) - { - _count = 3; - _it0 = it0; - _it1 = it1; - _it2 = it2; - } - - /// Populate with 4 items - [MethodImpl((MethodImplOptions)256)] - public void Init4(TItem it0, TItem it1, TItem it2, TItem it3) - { - _count = StackCapacity; - _it0 = it0; - _it1 = it1; - _it2 = it2; - _it3 = it3; - } - - /// Populate with `count` items - [MethodImpl((MethodImplOptions)256)] - public void Init(int count, in TItem it0, in TItem it1, in TItem it2, in TItem it3) - { - Debug.Assert(count >= 0 & count <= 4); - _count = count; - _it0 = it0; - _it1 = it1; - _it2 = it2; - _it3 = it3; - } - - /// Populates the list stack items and owns/uses the provided rest array and its count - [MethodImpl((MethodImplOptions)256)] - public void Embed(TItem it0, TItem it1, TItem it2, TItem it3, TItem[] rest, int restCount) - { - _it0 = it0; - _it1 = it1; - _it2 = it2; - _it3 = it3; - _rest = rest; - _count = StackCapacity + restCount; - } - - /// Populate with arbitrary items - public void InitFromList(TList items) where TList : IReadOnlyList - { - switch (items.Count) - { - case 0: - break; - case 1: - Init1(items[0]); - break; - case 2: - Init2(items[0], items[1]); - break; - case 3: - Init3(items[0], items[1], items[2]); - break; - case 4: - Init4(items[0], items[1], items[2], items[3]); - break; - default: - Init4(items[0], items[1], items[2], items[3]); - - // keep the capacity at count + StackCapacity - _count = items.Count; - var rest = new TItem[_count]; // todo: @perf take from the ArrayPool.Shared - for (var i = StackCapacity; i < _count; ++i) - rest[i - StackCapacity] = items[i]; // todo: @perf does List have a Copy? - _rest = rest; - break; - } - } - - /// Returns surely present item by its index - public TItem this[int index] - { - [MethodImpl((MethodImplOptions)256)] - get - { - Debug.Assert(_count != 0); - Debug.Assert(index < _count); - switch (index) - { - case 0: return _it0; - case 1: return _it1; - case 2: return _it2; - case 3: return _it3; - default: - Debug.Assert(_rest != null, $"Expecting deeper items are already existing on stack at index: {index}"); - return _rest[index - StackCapacity]; - } - } - } - - /// Adds the item to the end of the list aka the Stack.Push - [MethodImpl((MethodImplOptions)256)] - public void Add(in TItem item) - { - var index = _count++; - switch (index) - { - case 0: _it0 = item; break; - case 1: _it1 = item; break; - case 2: _it2 = item; break; - case 3: _it3 = item; break; - default: - SmallList.AddDefaultAndGetRef(ref _rest, index - StackCapacity) = item; - break; - } - } - - /// Adds the default item to the end of the list aka the Stack.Push default - [MethodImpl((MethodImplOptions)256)] - public void AddDefault() - { - if (++_count >= StackCapacity) - SmallList.AddDefaultAndGetRef(ref _rest, _count - StackCapacity); - } - - /// Removes the last item from the list aka the Stack Pop. Assumes that the list is not empty! - [MethodImpl((MethodImplOptions)256)] - public void RemoveLastSurePresentItem() - { - Debug.Assert(_count != 0); - var index = --_count; - switch (index) - { - case 0: _it0 = default; break; - case 1: _it1 = default; break; - case 2: _it2 = default; break; - case 3: _it3 = default; break; - default: - Debug.Assert(_rest != null, $"Expecting a deeper parent stack created before accessing it here at level {index}"); - _rest[index - StackCapacity] = default; - break; - } - } - - /// Adds another list to the current list - public void AddList(in SmallList4 added) - { - if (_count == 0) - { - Init4(added._it0, added._it1, added._it2, added._it3); - var addedRestCount = added.Count - StackCapacity; - if (addedRestCount > 0) - { - _rest = new TItem[addedRestCount + StackCapacity]; // add a bit of the empty room of `StackCapacity` at the end, so you may add the new items without immediate resize - Array.Copy(added._rest, 0, _rest, 0, addedRestCount); - } - // Setting the _count here because Init4 above sets the count to 4, but in reality the added list may have less items than 4 - _count = added.Count; - return; - } - switch (added.Count) - { - case 0: break; - case 1: Add(added._it0); break; - case 2: Add(added._it0); Add(added._it1); break; - case 3: Add(added._it0); Add(added._it1); Add(added._it2); break; - case 4: Add(added._it0); Add(added._it1); Add(added._it2); Add(added._it3); break; - case var addedCount: - Add(added._it0); Add(added._it1); Add(added._it2); Add(added._it3); - - // Here the _count reflects the 4 added items above - var addedRestCount = addedCount - StackCapacity; - var currRestCount = _count - StackCapacity; - - // Expand the rest so it can hold the current items and added items - if (_rest.Length < currRestCount + addedRestCount) - { - var newRest = new TItem[currRestCount + addedRestCount + StackCapacity]; // add a bit of the empty room of `StackCapacity` at the end - Array.Copy(_rest, 0, newRest, 0, currRestCount); - _rest = newRest; - } - - // Copy the added items to the rest - Array.Copy(added._rest, 0, _rest, currRestCount, addedRestCount); - _count += addedRestCount; - break; - } - } - - /// Drops the first item out of the list, and shifts the remaining items indices by -1, so the second item become the first and so on. - /// If the list is empty the method does nothing. - /// The method returns number of the dropped items, e.g. 0 or 1. - /// The method is similar to JS Array.shift - [MethodImpl((MethodImplOptions)256)] - public int DropFirst() - { - switch (_count) - { - case 0: return 0; - case 1: _it0 = default; break; - case 2: _it0 = _it1; _it1 = default; break; - case 3: _it0 = _it1; _it1 = _it2; _it2 = default; break; - case 4: _it0 = _it1; _it1 = _it2; _it2 = _it3; _it3 = default; break; - default: - _it0 = _it1; _it1 = _it2; _it2 = _it3; _it3 = _rest[0]; - Array.Copy(_rest, 1, _rest, 0, (_count - StackCapacity) - 1); - _rest[(_count - StackCapacity) - 1] = default; - break; - } - --_count; - return 1; - } - - ///Clears the list, but keeps the already allocated array on heap to reuse in the future - public void Clear() - { - _it0 = default; - _it1 = default; - _it2 = default; - _it3 = default; - if (_count > StackCapacity) - { - Debug.Assert(_rest != null); - Array.Clear(_rest, 0, _rest.Length); - } - _count = 0; - } - - /// Drops the first `n` items out of the list, and shifts the remaining items indices by -1, so the second item become the first and so on. - /// If the list is empty the method does nothing. - /// The method returns number of the dropped items, e.g. 0 or 1. - /// The method is similar to JS Array.shift - public int DropFirstN(int n) - { - if (n <= 0) - return 0; - - if (n >= _count) - { - Clear(); - return _count; - } - - if (_count <= StackCapacity) - { - switch (n) - { - case 1: _it0 = _it1; _it1 = _it2; _it2 = _it3; _it3 = default; break; - case 2: _it0 = _it2; _it1 = _it3; _it2 = default; _it3 = default; break; - // no need to check for n == 4, as the n is strictly less than _count in the check above - default: _it0 = _it3; _it1 = default; _it2 = default; _it3 = default; break; - } - } - else - { - Debug.Assert(_rest != null); - var last = (_count - StackCapacity) - n; - switch (n) - { - case 1: - _it0 = _it1; _it1 = _it2; _it2 = _it3; _it3 = _rest[0]; - Array.Copy(_rest, 1, _rest, 0, last); // don't worry if the `last` is 0 (for the 5 item list), Array.Copy will handle 0 just fine. - _rest[last] = default; - break; - case 2: - _it0 = _it2; _it1 = _it3; _it2 = _rest[0]; _it3 = _rest[1]; - Array.Copy(_rest, 2, _rest, 0, last); - _rest[last] = default; _rest[last + 1] = default; - break; - case 3: - _it0 = _it3; _it1 = _rest[0]; _it2 = _rest[1]; _it3 = _rest[2]; - Array.Copy(_rest, 3, _rest, 0, last); - _rest[last] = default; _rest[last + 1] = default; _rest[last + 2] = default; - break; - default: - _it0 = _rest[0]; _it1 = _rest[1]; _it2 = _rest[2]; _it3 = _rest[3]; - Array.Copy(_rest, n, _rest, 0, last); - Array.Clear(_rest, last, n); - break; - } - } - - _count -= n; - return n; - } - - /// Copy items to new the array - public TItem[] ToArray() - { - switch (_count) - { - case 0: return Tools.Empty(); - case 1: return new[] { _it0 }; - case 2: return new[] { _it0, _it1 }; - case 3: return new[] { _it0, _it1, _it2 }; - case 4: return new[] { _it0, _it1, _it2, _it3 }; - default: - var items = new TItem[_count]; - items[0] = _it0; - items[1] = _it1; - items[2] = _it2; - items[3] = _it3; - Array.Copy(_rest, 0, items, 4, _count - StackCapacity); - return items; + return ref _rest[index - stackCap]; } } } diff --git a/src/FastExpressionCompiler/TestTools.cs b/src/FastExpressionCompiler/TestTools.cs index b59dd047..57064f66 100644 --- a/src/FastExpressionCompiler/TestTools.cs +++ b/src/FastExpressionCompiler/TestTools.cs @@ -303,7 +303,7 @@ public static bool AreEqual(IEnumerable expected, IEnumerable actual, // Using those 4 slots directly to represent recent 4 equal items, before the non-equal item. // The slots will be rotated by overriding the `a` again, when the `d` is reached, then the `b`, etc. ItemsCompared a = default, b = default, c = default, d = default; - SmallList4> collectedItems = default; + SmallList, Stack4>> collectedItems = default; var nonEqualItemCount = 0; var collectedMaxNonEqualItems = false; @@ -686,7 +686,7 @@ public bool AreEqual(T expected, T actual, [CallerArgumentExpression(nameof(actual))] string actualName = "", [CallerMemberName] string testName = "", [CallerLineNumber] int sourceLineNumber = -1) => Equals(expected, actual) || Fail(testName, sourceLineNumber, AssertKind.AreEqual, - $"Expected `AreEqual(expected: {expectedName}, actual: {actualName})`,{NewLine} but found expected: `{expected.ToCode()}` and actual: `{actual.ToCode()}`"); + $"Expected `AreEqual(expected: {expectedName}, actual: {actualName})`,{NewLine} but found expected `{expected.ToCode()}` and actual `{actual.ToCode()}`"); [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool AreSame(T expected, T actual, @@ -694,7 +694,7 @@ public bool AreSame(T expected, T actual, [CallerArgumentExpression(nameof(actual))] string actualName = "", [CallerMemberName] string testName = "", [CallerLineNumber] int sourceLineNumber = -1) where T : class => ReferenceEquals(expected, actual) || Fail(testName, sourceLineNumber, AssertKind.AreSame, - $"Expected `AreSame({expectedName}, {actualName})`, but found `{expected.ToCode()}` is Not the same `{actual.ToCode()}`"); + $"Expected `AreSame({expectedName}, {actualName})`, but found expected `{expected.ToCode()}` is Not the same as actual `{actual.ToCode()}`"); [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool AreNotSame(T expected, T actual, @@ -756,7 +756,7 @@ public bool AreEqual(IEnumerable expected, IEnumerable actual, // Using those 4 slots directly to represent recent 4 equal items, before the non-equal item. // The slots will be rotated by overriding the `a` again, when the `d` is reached, then the `b`, etc. ItemsCompared a = default, b = default, c = default, d = default; - SmallList4> collectedItems = default; + SmallList, Stack4>> collectedItems = default; var nonEqualItemCount = 0; var collectedMaxNonEqualItems = false; diff --git a/test/FastExpressionCompiler.Benchmarks/AccessByRef_vs_ByIGetRefStructImpl.cs b/test/FastExpressionCompiler.Benchmarks/AccessByRef_vs_ByIGetRefStructImpl.cs index 8fc8bbb4..6cfea6cc 100644 --- a/test/FastExpressionCompiler.Benchmarks/AccessByRef_vs_ByIGetRefStructImpl.cs +++ b/test/FastExpressionCompiler.Benchmarks/AccessByRef_vs_ByIGetRefStructImpl.cs @@ -31,52 +31,52 @@ .NET SDK 7.0.306 [MemoryDiagnoser] public class AccessByRef_vs_ByIGetRefStructImpl { - private readonly SmallList4 Labels; + // private readonly SmallList4 Labels; - public AccessByRef_vs_ByIGetRefStructImpl() - { - for (var i = 0; i < 8; ++i) - Labels.AddDefault(); - } + // public AccessByRef_vs_ByIGetRefStructImpl() + // { + // for (var i = 0; i < 8; ++i) + // Labels.AddDefault(); + // } - // [Benchmark(Baseline = true)] - // public void AccessByRef() - // { - // for (short i = 3; i < 8; ++i) - // { - // ref var l = ref Labels.DebugDeepItems[i]; - // l.InlinedLambdaInvokeIndex = i; - // } - // } + // [Benchmark(Baseline = true)] + // public void AccessByRef() + // { + // for (short i = 3; i < 8; ++i) + // { + // ref var l = ref Labels.DebugDeepItems[i]; + // l.InlinedLambdaInvokeIndex = i; + // } + // } - // [Benchmark] - // public void ByIGetRefStructImpl() - // { - // for (short i = 3; i < 8; ++i) - // Labels.GetSurePresentItem(i, i); - // } + // [Benchmark] + // public void ByIGetRefStructImpl() + // { + // for (short i = 3; i < 8; ++i) + // Labels.GetSurePresentItem(i, i); + // } - // [Benchmark] - // public void StaticMethodAsDelegate() - // { - // for (short i = 3; i < 8; ++i) - // Labels.GetSurePresentItem(i, i, Handle); + // [Benchmark] + // public void StaticMethodAsDelegate() + // { + // for (short i = 3; i < 8; ++i) + // Labels.GetSurePresentItem(i, i, Handle); - // [MethodImpl((MethodImplOptions)256)] - // static xo Handle(ref LabelInfo it, in short n) - // { - // it.InlinedLambdaInvokeIndex = n; - // return default; - // } - // } + // [MethodImpl((MethodImplOptions)256)] + // static xo Handle(ref LabelInfo it, in short n) + // { + // it.InlinedLambdaInvokeIndex = n; + // return default; + // } + // } - // public struct SetInlinedLambdaInvokeIndex : IHandleRef - // { - // [MethodImpl((MethodImplOptions)256)] - // public xo Handle(ref LabelInfo it, in short n) - // { - // it.InlinedLambdaInvokeIndex = n; - // return default; - // } - // } + // public struct SetInlinedLambdaInvokeIndex : IHandleRef + // { + // [MethodImpl((MethodImplOptions)256)] + // public xo Handle(ref LabelInfo it, in short n) + // { + // it.InlinedLambdaInvokeIndex = n; + // return default; + // } + // } } diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index b11f0405..a66df33b 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -105,25 +105,33 @@ .NET SDK 9.0.203 | Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | BranchInstructions/Op | BranchMispredictions/Op | CacheMisses/Op | Allocated | Alloc Ratio | |------------------------ |----------:|----------:|----------:|------:|--------:|-----:|----------------------:|------------------------:|---------------:|----------:|------------:| | Double_and_Sum_AsSpan | 9.959 ns | 0.2341 ns | 0.4567 ns | 0.64 | 0.04 | 1 | 29 | 0 | 0 | - | NA | - | Double_and_Sum_BySwitch | 15.605 ns | 0.3465 ns | 0.7532 ns | 1.00 | 0.07 | 2 | 35 | 0 | 0 | - | NA | */ + | Double_and_Sum_BySwitch | 15.605 ns | 0.3465 ns | 0.7532 ns | 1.00 | 0.07 | 2 | 35 | 0 | 0 | - | NA | - SmallList> _smallList; + ## Indexer using Unsafe.Add vs AsSpan()[index] + + | Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | BranchInstructions/Op | BranchMispredictions/Op | CacheMisses/Op | Allocated | Alloc Ratio | + |----------------------- |---------:|---------:|---------:|------:|--------:|-----:|----------------------:|------------------------:|---------------:|----------:|------------:| + | Double_and_Sum_Indexer | 17.29 ns | 0.380 ns | 0.355 ns | 1.00 | 0.03 | 1 | 57 | 0 | 0 | - | NA | + | Double_and_Sum_AsSpan | 22.10 ns | 0.311 ns | 0.275 ns | 1.28 | 0.03 | 2 | 57 | 0 | 0 | - | NA | + */ + + SmallList> _list; [GlobalSetup] public void Init() { - // 4 on stack and 4 on heap - for (var i = 0; i < 8; i++) - _smallList.Add(i); + // half on stack and half on heap + for (var i = 0; i < 16; i++) + _list.Add(i); } [Benchmark(Baseline = true)] - public int Double_and_Sum_BySwitch() + public int Double_and_Sum_Indexer() { var sum = 0; - for (var i = 0; i < _smallList.Count; i++) + for (var i = 0; i < _list.Count; i++) { - ref var n = ref _smallList.GetSurePresentItemRef(i); + ref var n = ref _list.GetSurePresentItemRef(i); n += n; sum += n; } @@ -134,9 +142,9 @@ public int Double_and_Sum_BySwitch() public int Double_and_Sum_AsSpan() { var sum = 0; - for (var i = 0; i < _smallList.Count; i++) + for (var i = 0; i < _list.Count; i++) { - ref var n = ref _smallList.GetSurePresentItemRef2(i); + ref var n = ref _list.GetSurePresentItemRef2(i); n += n; sum += n; } @@ -164,7 +172,7 @@ .NET SDK 9.0.203 */ [Benchmark(Baseline = true)] - public int Add_BySwitch() + public int Add_BySpan() { SmallList> list = default; @@ -176,19 +184,4 @@ public int Add_BySwitch() sum += n; return sum; } - - [Benchmark] - public int Add_AsSpan() - { - SmallList> list = default; - - for (var n = 8; n > 0; --n) - list.Add2(n + 3); - - - var sum = 0; - foreach (var n in list.Enumerate()) - sum += n; - return sum; - } } diff --git a/test/FastExpressionCompiler.Benchmarks/Program.cs b/test/FastExpressionCompiler.Benchmarks/Program.cs index 2e51ca76..25eac5fc 100644 --- a/test/FastExpressionCompiler.Benchmarks/Program.cs +++ b/test/FastExpressionCompiler.Benchmarks/Program.cs @@ -50,8 +50,8 @@ public static void Main() //BenchmarkRunner.Run(); //BenchmarkRunner.Run(); - BenchmarkRunner.Run(); - // BenchmarkRunner.Run(); + // BenchmarkRunner.Run(); + BenchmarkRunner.Run(); // BenchmarkRunner.Run(); //var a = new NestedLambdasVsVars(); diff --git a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs index fb0313e6..7210ad17 100644 --- a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs +++ b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs @@ -54,12 +54,12 @@ public void Original_case(TestContext t) public void TestSmallList(TestContext t) { for (var i = 0; i < 8; ++i) - _smallList.Add2(i); + _smallList.Add(i); var doubleSum = 0; foreach (var n in _smallList.Enumerate()) doubleSum += n + n; - t.AreEqual(112, doubleSum); + t.AreEqual(56, doubleSum); } } \ No newline at end of file From f341d8a0defe4d9b82e9cb1aa375c178056c8168 Mon Sep 17 00:00:00 2001 From: dadhi Date: Sat, 31 May 2025 16:48:27 +0200 Subject: [PATCH 06/32] move the stuff to the SmallList struct and optimize along the way --- .../FastExpressionCompiler.cs | 2 +- src/FastExpressionCompiler/ImTools.cs | 550 +++++++++--------- src/FastExpressionCompiler/TestTools.cs | 4 +- .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 2 +- ..._repeated_calls_to_ConcurrentDictionary.cs | 2 +- 5 files changed, 293 insertions(+), 267 deletions(-) diff --git a/src/FastExpressionCompiler/FastExpressionCompiler.cs b/src/FastExpressionCompiler/FastExpressionCompiler.cs index 5df4398f..2653eb4f 100644 --- a/src/FastExpressionCompiler/FastExpressionCompiler.cs +++ b/src/FastExpressionCompiler/FastExpressionCompiler.cs @@ -10929,7 +10929,7 @@ internal static StringBuilder AppendName(this StringBuilder sb, object parOrTarg if (noNameIndex == 0) { var found = false; - foreach (var n in named.Enumerate()) + foreach (var n in named) { if (found = ReferenceEquals(n.Named, parOrTarget)) { diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 977580a6..16dc72f2 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -55,7 +55,7 @@ public static class RefTools /// Note that the result is the `null` even for the struct `T`, so avoid the accessing its members without the check [MethodImpl((MethodImplOptions)256)] public static ref T GetNullRef() => -#if NET6_0_OR_GREATER +#if SUPPORTS_UNSAFE ref Unsafe.NullRef(); #else ref _missing; @@ -91,6 +91,12 @@ public static class SmallList internal const int ForLoopCopyCount = 4; internal const int DefaultInitialCapacity = 4; + [MethodImpl(MethodImplOptions.NoInlining)] + internal static ref T ThrowIndexOutOfBounds(int index, int count) + { + throw new IndexOutOfRangeException($"Index {index} is out of range of count {count} for SmallList<{typeof(T)},..>."); + } + [MethodImpl((MethodImplOptions)256)] internal static void Expand(ref TItem[] items) { @@ -186,199 +192,6 @@ public static int GetIndexOrAdd(this ref SmallList source, in return count; } - /// Good stuff - [MethodImpl((MethodImplOptions)256)] - public static void InitCount(this ref SmallList source, int count) - where TStack : struct, IStack - { - Debug.Assert(count > 0, "Count should be more than 0"); - Debug.Assert(source._count == 0, "Initially the count should be 0"); - - // Add the StackCapacity empty space at the end, we may use it later for BuildToArray. - // The actual source Capacity will be StackCapacity + count. - if (count > source.StackCapacity) - source._rest = new T[count]; - source._count = count; - } - - /// Returns a surely present item ref by its index - [MethodImpl((MethodImplOptions)256)] - public static ref T GetSurePresentItemRef(this ref SmallList source, int index) - where TStack : struct, IStack - { - Debug.Assert(source.Count != 0); - Debug.Assert(index < source.Count); - - var stackCap = source.StackCapacity; - if (index < stackCap) - return ref source._stack[index]; - - Debug.Assert(source._rest != null); - return ref source._rest[index - stackCap]; - } - -#if SUPPORTS_CREATE_SPAN - /// Returns a surely present item ref by its index - [MethodImpl((MethodImplOptions)256)] - public static ref T GetSurePresentItemRef2(this ref SmallList source, int index) - where TStack : struct, IStack - { - Debug.Assert(source.Count != 0); - Debug.Assert(index < source.Count); - - var stackCap = source.StackCapacity; - if (index < stackCap) - return ref source._stack.AsSpan()[index]; - - Debug.Assert(source._rest != null); - return ref source._rest[index - stackCap]; - } -#endif - - /// Returns last present item ref, assumes that the list is not empty! - [MethodImpl((MethodImplOptions)256)] - public static ref T GetLastSurePresentItem(this ref SmallList source) - where TStack : struct, IStack => - ref source.GetSurePresentItemRef(source._count - 1); - - /// Removes the last item from the list aka the Stack Pop. Assumes that the list is not empty! - [MethodImpl((MethodImplOptions)256)] - public static void RemoveLastSurePresentItem(this ref SmallList source) - where TStack : struct, IStack - { - Debug.Assert(source._count != 0); - var index = --source._count; - source[index] = default; - } - - ///Clears the list, but keeps the already allocated array on heap to reuse in the future - [MethodImpl((MethodImplOptions)256)] - public static void Clear(this ref SmallList source) - where TStack : struct, IStack - { - source._stack = default; // clear the stack items - var restCount = source._count - source.StackCapacity; - if (restCount > 0) - { - Debug.Assert(source._rest != null, "Expecting deeper items are already existing on stack"); - Array.Clear(source._rest, 0, restCount); - } - source._count = 0; - } - - /// Appends the default item to the end of the list and returns the reference to it. - [MethodImpl((MethodImplOptions)256)] - public static ref T AddDefaultAndGetRef(this ref SmallList source) - where TStack : struct, IStack - { - var index = source._count++; - var stackCap = source.StackCapacity; - if (index < stackCap) - return ref source._stack[index]; - Debug.Assert(source._rest != null, "Expecting deeper items are already existing on stack"); - return ref SmallList.AddDefaultAndGetRef(ref source._rest, index - stackCap); - } - - /// Adds the item to the end of the list aka the Stack.Push. - /// Returns the index. - [MethodImpl((MethodImplOptions)256)] - public static int Add(this ref SmallList source, in T item) - where TStack : struct, IStack - { - var index = source._count++; - var stackCap = source.StackCapacity; - if (index < stackCap) - source._stack[index] = item; - else - SmallList.AddDefaultAndGetRef(ref source._rest, index - stackCap) = item; - Debug.Assert(source._rest != null, "Expecting deeper items are already existing on stack"); - return index; - } - - /// Looks for the item in the list and return its index if found or -1 for the absent item - [MethodImpl((MethodImplOptions)256)] - public static int TryGetIndex(this ref SmallList source, in T item, TEq eq = default) - where TStack : struct, IStack - where TEq : struct, IEq - { - if (source._count != 0) - { - var index = 0; - foreach (var it in source.Enumerate()) - { - if (eq.Equals(item, it)) - return index; - ++index; - } - } - return -1; - } - - /// Returns the index of the found item or appends the item to the end of the list, and returns its index - [MethodImpl((MethodImplOptions)256)] - public static int GetIndexOrAdd(this ref SmallList source, in T item, TEq eq = default) - where TStack : struct, IStack - where TEq : struct, IEq - { - var i = source.TryGetIndex(in item, eq); - return i != -1 ? i : source.Add(in item); - } - - /// Enumerates all the items - [MethodImpl((MethodImplOptions)256)] - public static SmallListEnumerable Enumerate(this ref SmallList list) - where TStack : struct, IStack - => new SmallListEnumerable(list); - - /// Enumerable on stack, without allocations - public struct SmallListEnumerable : IEnumerable - where TStack : struct, IStack - { - private readonly SmallList _list; - /// Constructor - public SmallListEnumerable(SmallList list) => _list = list; - /// - [MethodImpl((MethodImplOptions)256)] - public SmallListEnumerator GetEnumerator() => new SmallListEnumerator(_list); - IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); - IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); - } - - /// Enumerator on stack, without allocations - public struct SmallListEnumerator : IEnumerator, IEnumerator - where TStack : struct, IStack - { - private readonly SmallList _list; - private int _index; - internal SmallListEnumerator(SmallList list) - { - _list = list; - _index = -1; - } - private T _current; - /// - public T Current => _current; - object IEnumerator.Current => _current; - /// - [MethodImpl((MethodImplOptions)256)] - public bool MoveNext() - { - var index = ++_index; - if (index < _list.Count) - { - _current = index < _list.StackCapacity - ? _list._stack[index] - : _list._rest[index - _list.StackCapacity]; - return true; - } - return false; - } - /// - public void Reset() => _index = -1; - /// - public void Dispose() { } - } - /// Returns surely present item ref by its index [MethodImpl((MethodImplOptions)256)] public static ref TItem GetSurePresentItemRef(this ref SmallList2 source, int index) @@ -390,7 +203,7 @@ public static ref TItem GetSurePresentItemRef(this ref SmallList2 case 0: return ref source._it0; case 1: return ref source._it1; default: - Debug.Assert(source._rest != null, $"Expecting deeper items are already existing on stack at index: {index}"); + Debug.Assert(source._rest != null, $"Expecting deeper items are already existing on heap at index: {index}"); return ref source._rest[index - SmallList2.StackCapacity]; } } @@ -499,6 +312,11 @@ public interface IStack [UnscopedRef] ref T Last { get; } + /// Returns the item by ref to read and write the item value, + /// but does not check the index bounds comparing to the `this[index]` + [UnscopedRef] + ref T GetSurePresentItemRef(int index); + /// Indexer returning the item by ref to read and write the item value [UnscopedRef] ref T this[int index] { get; } @@ -530,6 +348,22 @@ public struct Stack2 : IStack> [UnscopedRef] public ref T Last { get => ref _it1; } + /// + [UnscopedRef] + [MethodImpl((MethodImplOptions)256)] + public ref T GetSurePresentItemRef(int index) + { +#if SUPPORTS_UNSAFE + return ref Unsafe.Add(ref _it0, index); +#else + switch (index) + { + case 0: return ref _it0; + default: return ref _it1; + } +#endif + } + /// [UnscopedRef] public ref T this[int index] @@ -539,15 +373,7 @@ public ref T this[int index] { if (index < 0 | index >= StackCapacity) return ref Stack.ThrowIndexOutOfBounds(index, StackCapacity); -#if SUPPORTS_UNSAFE - return ref Unsafe.Add(ref _it0, index); -#else - switch (index) - { - case 0: return ref _it0; - default: return ref _it1; - } -#endif + return ref GetSurePresentItemRef(index); } } @@ -580,6 +406,24 @@ public struct Stack4 : IStack> [UnscopedRef] public ref T Last { get => ref _it3; } + /// + [UnscopedRef] + [MethodImpl((MethodImplOptions)256)] + public ref T GetSurePresentItemRef(int index) + { +#if SUPPORTS_UNSAFE + return ref Unsafe.Add(ref _it0, index); +#else + switch (index) + { + case 0: return ref _it0; + case 1: return ref _it1; + case 2: return ref _it2; + default: return ref _it3; + } +#endif + } + /// [UnscopedRef] public ref T this[int index] @@ -589,17 +433,7 @@ public ref T this[int index] { if (index < 0 | index >= StackCapacity) return ref Stack.ThrowIndexOutOfBounds(index, StackCapacity); -#if SUPPORTS_UNSAFE - return ref Unsafe.Add(ref _it0, index); -#else - switch (index) - { - case 0: return ref _it0; - case 1: return ref _it1; - case 2: return ref _it2; - default: return ref _it3; - } -#endif + return ref GetSurePresentItemRef(index); } } @@ -631,29 +465,38 @@ public struct Stack8 : IStack> [UnscopedRef] public ref T Last { get => ref _it7; } + /// + [UnscopedRef] + [MethodImpl((MethodImplOptions)256)] + public ref T GetSurePresentItemRef(int index) + { +#if SUPPORTS_UNSAFE + return ref Unsafe.Add(ref _it0, index); +#else + switch (index) + { + case 0: return ref _it0; + case 1: return ref _it1; + case 2: return ref _it2; + case 3: return ref _it3; + case 4: return ref _it4; + case 5: return ref _it5; + case 6: return ref _it6; + default: return ref _it7; + } +#endif + } + /// [UnscopedRef] public ref T this[int index] { + [MethodImpl((MethodImplOptions)256)] get { if (index < 0 | index >= StackCapacity) return ref Stack.ThrowIndexOutOfBounds(index, StackCapacity); -#if SUPPORTS_UNSAFE - return ref Unsafe.Add(ref _it0, index); -#else - switch (index) - { - case 0: return ref _it0; - case 1: return ref _it1; - case 2: return ref _it2; - case 3: return ref _it3; - case 4: return ref _it4; - case 5: return ref _it5; - case 6: return ref _it6; - default: return ref _it7; - } -#endif + return ref GetSurePresentItemRef(index); } } @@ -687,6 +530,36 @@ public struct Stack16 : IStack> [UnscopedRef] public ref T Last { get => ref _it15; } + /// + [UnscopedRef] + [MethodImpl((MethodImplOptions)256)] + public ref T GetSurePresentItemRef(int index) + { +#if SUPPORTS_UNSAFE + return ref Unsafe.Add(ref _it0, index); +#else + switch (index) + { + case 0: return ref _it0; + case 1: return ref _it1; + case 2: return ref _it2; + case 3: return ref _it3; + case 4: return ref _it4; + case 5: return ref _it5; + case 6: return ref _it6; + case 7: return ref _it7; + case 8: return ref _it8; + case 9: return ref _it9; + case 10: return ref _it10; + case 11: return ref _it11; + case 12: return ref _it12; + case 13: return ref _it13; + case 14: return ref _it14; + default: return ref _it15; + } +#endif + } + /// [UnscopedRef] public ref T this[int index] @@ -696,29 +569,7 @@ public ref T this[int index] { if (index < 0 | index >= StackCapacity) return ref Stack.ThrowIndexOutOfBounds(index, StackCapacity); -#if SUPPORTS_UNSAFE - return ref Unsafe.Add(ref _it0, index); -#else - switch (index) - { - case 0: return ref _it0; - case 1: return ref _it1; - case 2: return ref _it2; - case 3: return ref _it3; - case 4: return ref _it4; - case 5: return ref _it5; - case 6: return ref _it6; - case 7: return ref _it7; - case 8: return ref _it8; - case 9: return ref _it9; - case 10: return ref _it10; - case 11: return ref _it11; - case 12: return ref _it12; - case 13: return ref _it13; - case 14: return ref _it14; - default: return ref _it15; - } -#endif + return ref GetSurePresentItemRef(index); } } @@ -733,8 +584,8 @@ public Span AsSpan() => // todo: @wip /// Generic version of SmallList abstracted for how much items are on stack -public struct SmallList - where TStack : struct, IStack +public struct SmallList : IEnumerable + where TStack : struct, IStack { internal int _count; // For this warning it is fine `_stack` is never assigned to, and will always have its default value @@ -742,7 +593,7 @@ public struct SmallList internal TStack _stack; #pragma warning restore CS0649 - internal TItem[] _rest; + internal T[] _rest; /// public int StackCapacity @@ -758,24 +609,199 @@ public int Count get => _count; } + /// Ensures that the list has allocated space to hold `count` of items + [MethodImpl((MethodImplOptions)256)] + public void InitCount(int count) + { + Debug.Assert(count > 0, "Count should be more than 0"); + Debug.Assert(_count == 0, "Initial the count should be 0"); + + // Add the StackCapacity empty space at the end, we may use it later for BuildToArray. + // The actual source Capacity will be StackCapacity + count. + if (count > StackCapacity) + _rest = new T[count]; + _count = count; + } + /// Returns surely present item by ref [UnscopedRef] - public ref TItem this[int index] + public ref T this[int index] { [MethodImpl((MethodImplOptions)256)] get { - Debug.Assert(_count != 0); - Debug.Assert(index < _count); + if (index < 0 | index >= _count) + return ref SmallList.ThrowIndexOutOfBounds(index, _count); + var stackCap = _stack.Capacity; if (index < stackCap) - return ref _stack[index]; - Debug.Assert(_rest != null); + return ref _stack.GetSurePresentItemRef(index); + + Debug.Assert(_rest != null, "Expecting deeper items are already existing on heap"); return ref _rest[index - stackCap]; } } + + /// Returns a surely present item ref by its index + [UnscopedRef] + [MethodImpl((MethodImplOptions)256)] + public ref T GetSurePresentItemRef(int index) + { + Debug.Assert(Count != 0); + Debug.Assert(index < Count); + + var stackCap = _stack.Capacity; + if (index < stackCap) + return ref _stack.GetSurePresentItemRef(index); + + Debug.Assert(_rest != null); + return ref _rest[index - stackCap]; // todo: @wip use GetSurePresentItemRef for the array? + } + + /// Appends the default item to the end of the list and returns the reference to it. + [UnscopedRef] + [MethodImpl((MethodImplOptions)256)] + public ref T AddDefaultAndGetRef() + { + var index = _count++; + var stackCap = StackCapacity; + if (index < stackCap) + return ref _stack.GetSurePresentItemRef(index); + return ref SmallList.AddDefaultAndGetRef(ref _rest, index - stackCap); + } + + /// Adds the item to the end of the list aka the Stack.Push. Returns the index of the added item. + [MethodImpl((MethodImplOptions)256)] + public int Add(in T item) + { + var index = _count++; + var stackCap = StackCapacity; + if (index < stackCap) + _stack.GetSurePresentItemRef(index) = item; + else + SmallList.AddDefaultAndGetRef(ref _rest, index - stackCap) = item; + return index; + } + + /// Looks for the item in the list and return its index if found or -1 for the absent item + [MethodImpl((MethodImplOptions)256)] + public int TryGetIndex(in T item, TEq eq = default) where TEq : struct, IEq + { + if (_count != 0) + { + var index = 0; + foreach (var it in this) + { + if (eq.Equals(item, it)) + return index; + ++index; + } + } + return -1; + } + + /// Returns the index of the found item or appends the item to the end of the list, and returns its index + [MethodImpl((MethodImplOptions)256)] + public int GetIndexOrAdd(in T item, TEq eq = default) where TEq : struct, IEq + { + var i = TryGetIndex(in item, eq); + return i != -1 ? i : Add(in item); + } + + ///Clears the list, but keeps the already allocated array on heap to reuse in the future + [MethodImpl((MethodImplOptions)256)] + public void Clear() + { + _stack = default; // todo: @perf is there way to faster clear items on stack? + var restCount = _count - StackCapacity; + if (restCount > 0) + { + Debug.Assert(_rest != null, "Expecting deeper items are already existing on heap"); + Array.Clear(_rest, 0, restCount); + } + _count = 0; + } + +#if SUPPORTS_CREATE_SPAN + // todo: @wip @remove for benchmarking only + /// Returns a surely present item ref by its index + [UnscopedRef] + [MethodImpl((MethodImplOptions)256)] + public ref T GetSurePresentItemRef2(int index) + { + Debug.Assert(Count != 0); + Debug.Assert(index < Count); + + var stackCap = StackCapacity; + if (index < stackCap) + return ref _stack.AsSpan()[index]; + + Debug.Assert(_rest != null); + return ref _rest[index - stackCap]; + } +#endif + + /// Returns last present item ref, assumes that the list is not empty! + [UnscopedRef] + [MethodImpl((MethodImplOptions)256)] + public ref T GetLastSurePresentItem() + { + Debug.Assert(_count != 0, "Expecting that the list is not empty"); + return ref GetSurePresentItemRef(_count - 1); + } + + /// Removes the last item from the list aka the Stack Pop. Assumes that the list is not empty! + [MethodImpl((MethodImplOptions)256)] + public void RemoveLastSurePresentItem() + { + Debug.Assert(_count != 0, "Expecting that the list is not empty"); + var index = --_count; + GetSurePresentItemRef(index) = default; + } + + /// Returns an enumerator struct + [MethodImpl((MethodImplOptions)256)] + public SmallListEnumerator GetEnumerator() => new SmallListEnumerator(this); + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); +} + +/// Enumerator on stack, without allocations +public struct SmallListEnumerator : IEnumerator, IEnumerator + where TStack : struct, IStack +{ + private readonly SmallList _list; + private int _index; + internal SmallListEnumerator(SmallList list) + { + _list = list; + _index = -1; + } + private T _current; + /// + public T Current => _current; + object IEnumerator.Current => _current; + /// + [MethodImpl((MethodImplOptions)256)] + public bool MoveNext() + { + var index = ++_index; + if (index < _list.Count) + { + _current = index < _list.StackCapacity + ? _list._stack.GetSurePresentItemRef(index) + : _list._rest[index - _list.StackCapacity]; + return true; + } + return false; + } + /// + public void Reset() => _index = -1; + /// + public void Dispose() { } } + /// List with the number of first items (2) stored inside its struct and the rest in the growable array. /// Supports addition and removal (remove is without resize) only at the end of the list, aka Stack behavior [DebuggerDisplay("{Count} of {_it0?.ToString()}, {_it1?.ToString()}, ...")] @@ -870,7 +896,7 @@ public TItem this[int index] case 0: return _it0; case 1: return _it1; default: - Debug.Assert(_rest != null, $"Expecting deeper items are already existing on stack at index: {index}"); + Debug.Assert(_rest != null, $"Expecting deeper items are already existing on heap at index: {index}"); return _rest[index - StackCapacity]; } } diff --git a/src/FastExpressionCompiler/TestTools.cs b/src/FastExpressionCompiler/TestTools.cs index 57064f66..f90de3fd 100644 --- a/src/FastExpressionCompiler/TestTools.cs +++ b/src/FastExpressionCompiler/TestTools.cs @@ -401,7 +401,7 @@ public static bool AreEqual(IEnumerable expected, IEnumerable actual, else sb.AppendLine($"first {MaxNonEqualItemCount} non equal items (and stopped searching):"); - foreach (var (index, isEqual, expectedItem, actualItem) in collectedItems.Enumerate()) + foreach (var (index, isEqual, expectedItem, actualItem) in collectedItems) sb.AppendLine($"{index,4}{(isEqual ? " " : " -> ")}{expectedItem.ToCode(),16},{actualItem.ToCode(),16}"); } @@ -854,7 +854,7 @@ public bool AreEqual(IEnumerable expected, IEnumerable actual, else sb.AppendLine($"first {MaxNonEqualItemCount} non equal items (and stopped searching):"); - foreach (var (index, isEqual, expectedItem, actualItem) in collectedItems.Enumerate()) + foreach (var (index, isEqual, expectedItem, actualItem) in collectedItems) sb.AppendLine($"{index,4}{(isEqual ? " " : " -> ")}{expectedItem.ToCode(),16},{actualItem.ToCode(),16}"); } diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index a66df33b..dacf4241 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -180,7 +180,7 @@ public int Add_BySpan() list.Add(n + 3); var sum = 0; - foreach (var n in list.Enumerate()) + foreach (var n in list) sum += n; return sum; } diff --git a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs index 7210ad17..0092c36f 100644 --- a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs +++ b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs @@ -57,7 +57,7 @@ public void TestSmallList(TestContext t) _smallList.Add(i); var doubleSum = 0; - foreach (var n in _smallList.Enumerate()) + foreach (var n in _smallList) doubleSum += n + n; t.AreEqual(56, doubleSum); From bccd35a349f368be8eba1b3b0c4a33978929ee6c Mon Sep 17 00:00:00 2001 From: dadhi Date: Sun, 1 Jun 2025 22:57:02 +0200 Subject: [PATCH 07/32] remove SmallList2 and 300 loc --- .../Expression.cs | 61 ++- .../ExpressionVisitor.cs | 2 +- .../FastExpressionCompiler.cs | 10 +- src/FastExpressionCompiler/ImTools.cs | 434 +++--------------- .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 24 +- 5 files changed, 120 insertions(+), 411 deletions(-) diff --git a/src/FastExpressionCompiler.LightExpression/Expression.cs b/src/FastExpressionCompiler.LightExpression/Expression.cs index 9411ea70..c34c0188 100644 --- a/src/FastExpressionCompiler.LightExpression/Expression.cs +++ b/src/FastExpressionCompiler.LightExpression/Expression.cs @@ -1598,7 +1598,7 @@ public static BinaryExpression LessThanOrEqual(Expression left, Expression right public static BlockExpression Block(IEnumerable expressions) => new BlockExpression(expressions.AsReadOnlyList()); // todo: @perf optimize the double copying - public static BlockExpression Block(in SmallList2 expressions) => new BlockExpression(in expressions); + public static BlockExpression Block(in SmallList> expressions) => new BlockExpression(in expressions); public static BlockExpression Block(IEnumerable variables, Expression expr0) => new ManyVariablesBlockExpression(variables.AsReadOnlyList(), expr0); // todo: @perf @mem add the check for empty variables @@ -1620,7 +1620,7 @@ public static BlockExpression Block(IEnumerable variables, public static BlockExpression Block(IEnumerable variables, IEnumerable expressions) => Block(variables, expressions.AsReadOnlyList()); - public static BlockExpression Block(IEnumerable variables, in SmallList2 expressions) + public static BlockExpression Block(IEnumerable variables, in SmallList> expressions) { var vars = variables.AsReadOnlyList(); return vars.Count == 0 @@ -1643,7 +1643,7 @@ public static BlockExpression Block(Type type, IReadOnlyList express public static BlockExpression Block(Type type, IEnumerable expressions) => new TypedBlockExpression(type, expressions.AsReadOnlyList()); // todo: @perf @mem - public static BlockExpression Block(Type type, in SmallList2 expressions) => + public static BlockExpression Block(Type type, in SmallList> expressions) => new TypedBlockExpression(type, in expressions); public static BlockExpression Block(Type type, IEnumerable variables, Expression expr0) @@ -1670,7 +1670,7 @@ public static BlockExpression Block(Type type, IEnumerable public static BlockExpression Block(Type type, IEnumerable variables, IEnumerable expressions) => new TypedManyVariablesBlockExpression(type, variables.AsReadOnlyList(), expressions.AsReadOnlyList()); // todo: @perf @mem - public static BlockExpression Block(Type type, IEnumerable variables, in SmallList2 expressions) => + public static BlockExpression Block(Type type, IEnumerable variables, in SmallList> expressions) => new TypedManyVariablesBlockExpression(type, variables.AsReadOnlyList(), in expressions); public static BlockExpression MakeBlock(Type type, IEnumerable variables, IEnumerable expressions) @@ -5062,20 +5062,47 @@ public class BlockExpression : Expression, IArgumentProvider public override ExpressionType NodeType => ExpressionType.Block; public override Type Type => Result.Type; public virtual IReadOnlyList Variables => Tools.Empty(); - public SmallList2 Expressions; + public SmallList> Expressions; public Expression Result => Expressions.GetLastSurePresentItem(); // todo: @check what if no expressions? public virtual int ArgumentCount => 0; public virtual Expression GetArgument(int index) => throw new NotImplementedException(); - internal BlockExpression(in SmallList2 expressions) => + internal BlockExpression(in SmallList> expressions) => Expressions = expressions; - internal BlockExpression(Expression e0) => - Expressions.Populate1(e0); - internal BlockExpression(Expression e0, Expression e1) => - Expressions.Populate2(e0, e1); - internal BlockExpression(Expression e0, Expression e1, params Expression[] rest) => - Expressions.Populate(e0, e1, rest); - internal BlockExpression(IReadOnlyList expressions) => - Expressions.Populate(expressions); + internal BlockExpression(Expression e0) + { + Expressions.Stack._it0 = e0; + Expressions.Count = 1; + } + internal BlockExpression(Expression e0, Expression e1) + { + Expressions.Stack._it0 = e0; + Expressions.Stack._it1 = e1; + Expressions.Count = 2; + } + internal BlockExpression(Expression e0, Expression e1, params Expression[] rest) + { + Expressions.Stack._it0 = e0; + Expressions.Stack._it1 = e1; + Expressions.Rest = rest; + Expressions.Count = rest.Length + 2; + } + internal BlockExpression(IReadOnlyList expressions) + { + Debug.Assert(expressions != null, "Expressions cannot be null"); + var count = expressions.Count; + if (count >= 1) + Expressions.Stack._it0 = expressions[0]; + if (count >= 2) + Expressions.Stack._it1 = expressions[1]; + if (count >= 3) + { + var rest = new Expression[count - 2]; + for (var i = 2; i < count; ++i) + rest[i - 2] = expressions[i]; + Expressions.Rest = rest; + } + Expressions.Count = count; + } #if SUPPORTS_VISITOR [RequiresUnreferencedCode(Trimming.Message)] protected internal override Expression Accept(ExpressionVisitor visitor) => visitor.VisitBlock(this); @@ -5090,7 +5117,7 @@ internal override SysExpr CreateSysExpression(ref SmallList exp public sealed class TypedBlockExpression : BlockExpression { public override Type Type { get; } - internal TypedBlockExpression(Type type, in SmallList2 expressions) : base(in expressions) => Type = type; + internal TypedBlockExpression(Type type, in SmallList> expressions) : base(in expressions) => Type = type; internal TypedBlockExpression(Type type, Expression e0) : base(e0) => Type = type; internal TypedBlockExpression(Type type, Expression e0, Expression e1) : base(e0, e1) => Type = type; internal TypedBlockExpression(Type type, Expression e0, Expression e1, params Expression[] rest) : base(e0, e1, rest) => Type = type; @@ -5102,7 +5129,7 @@ public class ManyVariablesBlockExpression : BlockExpression public sealed override IReadOnlyList Variables { get; } public sealed override int ArgumentCount => Expressions.Count; public sealed override Expression GetArgument(int index) => Expressions.GetSurePresentItemRef(index); - internal ManyVariablesBlockExpression(IReadOnlyList variables, in SmallList2 expressions) : base(in expressions) => + internal ManyVariablesBlockExpression(IReadOnlyList variables, in SmallList> expressions) : base(in expressions) => Variables = variables; internal ManyVariablesBlockExpression(IReadOnlyList variables, Expression e0) : base(e0) => Variables = variables; @@ -5117,7 +5144,7 @@ internal ManyVariablesBlockExpression(IReadOnlyList variabl public sealed class TypedManyVariablesBlockExpression : ManyVariablesBlockExpression { public override Type Type { get; } - internal TypedManyVariablesBlockExpression(Type type, IReadOnlyList variables, in SmallList2 expressions) + internal TypedManyVariablesBlockExpression(Type type, IReadOnlyList variables, in SmallList> expressions) : base(variables, in expressions) => Type = type; internal TypedManyVariablesBlockExpression(Type type, IReadOnlyList variables, Expression e0) : base(variables, e0) => Type = type; diff --git a/src/FastExpressionCompiler.LightExpression/ExpressionVisitor.cs b/src/FastExpressionCompiler.LightExpression/ExpressionVisitor.cs index 3f914c68..62923417 100644 --- a/src/FastExpressionCompiler.LightExpression/ExpressionVisitor.cs +++ b/src/FastExpressionCompiler.LightExpression/ExpressionVisitor.cs @@ -65,7 +65,7 @@ public IReadOnlyList Visit(IReadOnlyList nodes) return newNodes ?? nodes; } - public IReadOnlyList Visit(SmallList2 nodes) + public IReadOnlyList Visit(SmallList> nodes) { var newNodes = new Expression[nodes.Count]; for (var i = 0; i < nodes.Count; ++i) diff --git a/src/FastExpressionCompiler/FastExpressionCompiler.cs b/src/FastExpressionCompiler/FastExpressionCompiler.cs index 2653eb4f..625502ad 100644 --- a/src/FastExpressionCompiler/FastExpressionCompiler.cs +++ b/src/FastExpressionCompiler/FastExpressionCompiler.cs @@ -773,7 +773,7 @@ public struct ClosureInfo /// Constant usage count and variable index. /// It is a separate collection from the Constants because we directly convert later into the closure array - public SmallList2 ConstantUsageThenVarIndex; + public SmallList> ConstantUsageThenVarIndex; /// Parameters not passed through lambda parameter list But used inside lambda body. /// The top expression should Not contain not passed parameters. @@ -1686,9 +1686,9 @@ private static Expression CreateInlinedLambdaInvocationExpression( #else var lambdaPars = lambdaExpr.Parameters; #endif - SmallList2 inlinedBlockExprs = default; - SmallList2 savedVars = default; - SmallList2 savedVarsBlockExprs = default; + SmallList> inlinedBlockExprs = default; + SmallList> savedVars = default; + SmallList> savedVarsBlockExprs = default; for (var i = 0; i < invokeArgCount; i++) { @@ -11308,7 +11308,7 @@ internal static StringBuilder NewLineIndentArgumentExprs(this StringBuilder s } // todo: @improve figure how to avoid the duplication with the method above IReadOnlyList exprs - internal static StringBuilder NewLineIndentArgumentExprs(this StringBuilder sb, SmallList2 exprs, + internal static StringBuilder NewLineIndentArgumentExprs(this StringBuilder sb, SmallList> exprs, List paramsExprs, List uniqueExprs, List lts, int lineIndent, bool stripNamespace, Func printType, int indentSpaces, ObjectToCode notRecognizedToCode) where T : Expression diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 16dc72f2..5842b960 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -98,10 +98,10 @@ internal static ref T ThrowIndexOutOfBounds(int index, int count) } [MethodImpl((MethodImplOptions)256)] - internal static void Expand(ref TItem[] items) + internal static void Expand(ref T[] items) { // `| 1` is for the case when the length is 0 - var newItems = new TItem[(items.Length << 1) | 1]; // have fun to guess the new length, ha-ha ;-P + var newItems = new T[(items.Length << 1) | 1]; // have fun to guess the new length, ha-ha ;-P if (items.Length > ForLoopCopyCount) Array.Copy(items, newItems, items.Length); else @@ -113,7 +113,7 @@ internal static void Expand(ref TItem[] items) /// Appends the new default item at the end of the items. Assumes that `index lte items.Length`! /// `items` should be not null [MethodImpl((MethodImplOptions)256)] - public static ref TItem AddDefaultToNotNullItemsAndGetRef(ref TItem[] items, int index) + public static ref T AddDefaultToNotNullItemsAndGetRef(ref T[] items, int index) { Debug.Assert(index <= items.Length); if (index == items.Length) @@ -123,12 +123,12 @@ public static ref TItem AddDefaultToNotNullItemsAndGetRef(ref TItem[] ite /// Appends the new default item at the end of the items. Assumes that `index lte items.Length`, `items` may be null [MethodImpl((MethodImplOptions)256)] - public static ref TItem AddDefaultAndGetRef(ref TItem[] items, int index, int initialCapacity = DefaultInitialCapacity) + public static ref T AddDefaultAndGetRef(ref T[] items, int index, int initialCapacity = DefaultInitialCapacity) { if (items == null) { Debug.Assert(index == 0); - items = new TItem[initialCapacity]; + items = new T[initialCapacity]; return ref items[index]; } @@ -140,25 +140,26 @@ public static ref TItem AddDefaultAndGetRef(ref TItem[] items, int index, /// Returns surely present item ref by its index [MethodImpl((MethodImplOptions)256)] - public static ref TItem GetSurePresentItemRef(this ref SmallList source, int index) => + public static ref T GetSurePresentItemRef(this ref SmallList source, int index) => ref source.Items[index]; // todo: @perf add the not null variant /// Appends the new default item to the list and returns ref to it for write or read [MethodImpl((MethodImplOptions)256)] - public static ref TItem Add(this ref SmallList source, int initialCapacity = DefaultInitialCapacity) => + public static ref T Add(this ref SmallList source, int initialCapacity = DefaultInitialCapacity) => ref AddDefaultAndGetRef(ref source.Items, source.Count++, initialCapacity); /// Appends the new item to the list // todo: @perf add the not null variant [MethodImpl((MethodImplOptions)256)] - public static void Add(this ref SmallList source, in TItem item, int initialCapacity = DefaultInitialCapacity) => + public static void Add(this ref SmallList source, in T item, int initialCapacity = DefaultInitialCapacity) => AddDefaultAndGetRef(ref source.Items, source.Count++, initialCapacity) = item; /// Looks for the item in the list and return its index if found or -1 for the absent item [MethodImpl((MethodImplOptions)256)] - public static int TryGetIndex(this TItem[] items, in TItem it, int startIndex, int count, TEq eq = default, int notFoundResult = -1) - where TEq : struct, IEq + public static int TryGetIndex(this T[] items, in T it, int startIndex, int count, TEq eq = default, + int notFoundResult = -1) + where TEq : struct, IEq { Debug.Assert(items != null); for (var i = startIndex; i < count; ++i) @@ -172,14 +173,14 @@ public static int TryGetIndex(this TItem[] items, in TItem it, int s /// Looks for the item in the list and return its index if found or -1 for the absent item [MethodImpl((MethodImplOptions)256)] - public static int TryGetIndex(this ref SmallList source, TItem it, TEq eq = default) - where TEq : struct, IEq + public static int TryGetIndex(this ref SmallList source, T it, TEq eq = default) + where TEq : struct, IEq => source.Items.TryGetIndex(it, 0, source.Count, eq); /// Returns the index of the found item or appends the item to the end of the list, and returns its index [MethodImpl((MethodImplOptions)256)] - public static int GetIndexOrAdd(this ref SmallList source, in TItem item, TEq eq = default) - where TEq : struct, IEq + public static int GetIndexOrAdd(this ref SmallList source, in T item, TEq eq = default) + where TEq : struct, IEq { var count = source.Count; if (count != 0) @@ -191,97 +192,6 @@ public static int GetIndexOrAdd(this ref SmallList source, in source.Add() = item; return count; } - - /// Returns surely present item ref by its index - [MethodImpl((MethodImplOptions)256)] - public static ref TItem GetSurePresentItemRef(this ref SmallList2 source, int index) - { - Debug.Assert(source.Count != 0); - Debug.Assert(index < source.Count); - switch (index) - { - case 0: return ref source._it0; - case 1: return ref source._it1; - default: - Debug.Assert(source._rest != null, $"Expecting deeper items are already existing on heap at index: {index}"); - return ref source._rest[index - SmallList2.StackCapacity]; - } - } - - /// Returns last present item ref, assumes that the list is not empty! - [MethodImpl((MethodImplOptions)256)] - public static ref TItem GetLastSurePresentItem(this ref SmallList2 source) => - ref source.GetSurePresentItemRef(source._count - 1); - - /// Appends the default item to the end of the list and returns the reference to it. - [MethodImpl((MethodImplOptions)256)] - public static ref TItem AddDefaultAndGetRef(this ref SmallList2 source) - { - var index = source._count++; - switch (index) - { - case 0: return ref source._it0; - case 1: return ref source._it1; - default: - return ref AddDefaultAndGetRef(ref source._rest, index - SmallList2.StackCapacity); - } - } - - /// Looks for the item in the list and return its index if found or -1 for the absent item - [MethodImpl((MethodImplOptions)256)] - public static int TryGetIndex(this ref SmallList2 source, TItem it, TEq eq = default) - where TEq : struct, IEq - { - switch (source._count) - { - case 1: - if (eq.Equals(it, source._it0)) return 0; - break; - - case var n: - if (eq.Equals(it, source._it0)) return 0; - if (eq.Equals(it, source._it1)) return 1; - const int StackCapacity = SmallList2.StackCapacity; - if (n == StackCapacity) - break; - return source._rest.TryGetIndex(in it, 0, source._count - StackCapacity, eq, -1 - StackCapacity) + StackCapacity; - } - return -1; - } - - /// Returns the index of the found item or appends the item to the end of the list, and returns its index - [MethodImpl((MethodImplOptions)256)] - public static int GetIndexOrAdd(this ref SmallList2 source, TItem item, TEq eq = default) - where TEq : struct, IEq - { - switch (source._count) - { - case 0: - source._count = 1; - source._it0 = item; - return 0; - - case 1: - if (eq.Equals(item, source._it0)) return 0; - source._count = 2; - source._it1 = item; - return 1; - - default: - if (eq.Equals(item, source._it0)) return 0; - if (eq.Equals(item, source._it1)) return 1; - - var restCount = source._count - SmallList2.StackCapacity; - if (restCount != 0) - { - var i = source._rest.TryGetIndex(in item, 0, restCount, eq); - if (i != -1) - return i + SmallList2.StackCapacity; - } - AddDefaultAndGetRef(ref source._rest, restCount) = item; - return source._count++; - } - } } #pragma warning disable CS9101 // UnscopedRef goes wrong on Ubuntu @@ -304,14 +214,6 @@ public interface IStack /// Possible count of items holding on stack int Capacity { get; } - /// Gets first item by ref - [UnscopedRef] - ref T First { get; } - - /// Gets last item by ref - [UnscopedRef] - ref T Last { get; } - /// Returns the item by ref to read and write the item value, /// but does not check the index bounds comparing to the `this[index]` [UnscopedRef] @@ -340,14 +242,6 @@ public struct Stack2 : IStack> /// public int Capacity => StackCapacity; - /// - [UnscopedRef] - public ref T First { get => ref _it0; } - - /// - [UnscopedRef] - public ref T Last { get => ref _it1; } - /// [UnscopedRef] [MethodImpl((MethodImplOptions)256)] @@ -398,14 +292,6 @@ public struct Stack4 : IStack> /// public int Capacity => StackCapacity; - /// - [UnscopedRef] - public ref T First { get => ref _it0; } - - /// - [UnscopedRef] - public ref T Last { get => ref _it3; } - /// [UnscopedRef] [MethodImpl((MethodImplOptions)256)] @@ -457,14 +343,6 @@ public struct Stack8 : IStack> /// public int Capacity => StackCapacity; - /// - [UnscopedRef] - public ref T First { get => ref _it0; } - - /// - [UnscopedRef] - public ref T Last { get => ref _it7; } - /// [UnscopedRef] [MethodImpl((MethodImplOptions)256)] @@ -522,14 +400,6 @@ public struct Stack16 : IStack> /// public int Capacity => StackCapacity; - /// - [UnscopedRef] - public ref T First { get => ref _it0; } - - /// - [UnscopedRef] - public ref T Last { get => ref _it15; } - /// [UnscopedRef] [MethodImpl((MethodImplOptions)256)] @@ -587,40 +457,31 @@ public Span AsSpan() => public struct SmallList : IEnumerable where TStack : struct, IStack { - internal int _count; + /// Let's enable access to the Count, so you can Pop the item by --list.Count. Just don't forget to nullify the popped item if needed + public int Count; + // For this warning it is fine `_stack` is never assigned to, and will always have its default value #pragma warning disable CS0649 - internal TStack _stack; -#pragma warning restore CS0649 - - internal T[] _rest; - /// - public int StackCapacity - { - [MethodImpl((MethodImplOptions)256)] - get => _stack.Capacity; - } + /// Let's enable access to the stack, just know what's you doing + public TStack Stack; +#pragma warning restore CS0649 - /// Gets the number of items in the list - public int Count - { - [MethodImpl((MethodImplOptions)256)] - get => _count; - } + /// Exposes the rest on heap + public T[] Rest; /// Ensures that the list has allocated space to hold `count` of items [MethodImpl((MethodImplOptions)256)] public void InitCount(int count) { Debug.Assert(count > 0, "Count should be more than 0"); - Debug.Assert(_count == 0, "Initial the count should be 0"); + Debug.Assert(Count == 0, "Initial the count should be 0"); // Add the StackCapacity empty space at the end, we may use it later for BuildToArray. // The actual source Capacity will be StackCapacity + count. - if (count > StackCapacity) - _rest = new T[count]; - _count = count; + if (count > Stack.Capacity) + Rest = new T[count]; + Count = count; } /// Returns surely present item by ref @@ -630,15 +491,15 @@ public ref T this[int index] [MethodImpl((MethodImplOptions)256)] get { - if (index < 0 | index >= _count) - return ref SmallList.ThrowIndexOutOfBounds(index, _count); + if (index < 0 | index >= Count) + return ref SmallList.ThrowIndexOutOfBounds(index, Count); - var stackCap = _stack.Capacity; + var stackCap = Stack.Capacity; if (index < stackCap) - return ref _stack.GetSurePresentItemRef(index); + return ref Stack.GetSurePresentItemRef(index); - Debug.Assert(_rest != null, "Expecting deeper items are already existing on heap"); - return ref _rest[index - stackCap]; + Debug.Assert(Rest != null, "Expecting deeper items are already existing on heap"); + return ref Rest[index - stackCap]; } } @@ -650,12 +511,12 @@ public ref T GetSurePresentItemRef(int index) Debug.Assert(Count != 0); Debug.Assert(index < Count); - var stackCap = _stack.Capacity; + var stackCap = Stack.Capacity; if (index < stackCap) - return ref _stack.GetSurePresentItemRef(index); + return ref Stack.GetSurePresentItemRef(index); - Debug.Assert(_rest != null); - return ref _rest[index - stackCap]; // todo: @wip use GetSurePresentItemRef for the array? + Debug.Assert(Rest != null); + return ref Rest[index - stackCap]; } /// Appends the default item to the end of the list and returns the reference to it. @@ -663,23 +524,23 @@ public ref T GetSurePresentItemRef(int index) [MethodImpl((MethodImplOptions)256)] public ref T AddDefaultAndGetRef() { - var index = _count++; - var stackCap = StackCapacity; + var index = Count++; + var stackCap = Stack.Capacity; if (index < stackCap) - return ref _stack.GetSurePresentItemRef(index); - return ref SmallList.AddDefaultAndGetRef(ref _rest, index - stackCap); + return ref Stack.GetSurePresentItemRef(index); + return ref SmallList.AddDefaultAndGetRef(ref Rest, index - stackCap); } /// Adds the item to the end of the list aka the Stack.Push. Returns the index of the added item. [MethodImpl((MethodImplOptions)256)] public int Add(in T item) { - var index = _count++; - var stackCap = StackCapacity; + var index = Count++; + var stackCap = Stack.Capacity; if (index < stackCap) - _stack.GetSurePresentItemRef(index) = item; + Stack.GetSurePresentItemRef(index) = item; else - SmallList.AddDefaultAndGetRef(ref _rest, index - stackCap) = item; + SmallList.AddDefaultAndGetRef(ref Rest, index - stackCap) = item; return index; } @@ -687,7 +548,7 @@ public int Add(in T item) [MethodImpl((MethodImplOptions)256)] public int TryGetIndex(in T item, TEq eq = default) where TEq : struct, IEq { - if (_count != 0) + if (Count != 0) { var index = 0; foreach (var it in this) @@ -712,50 +573,31 @@ public int GetIndexOrAdd(in T item, TEq eq = default) where TEq : struct, I [MethodImpl((MethodImplOptions)256)] public void Clear() { - _stack = default; // todo: @perf is there way to faster clear items on stack? - var restCount = _count - StackCapacity; + Stack = default; // todo: @perf is there way to faster clear items on stack? + var restCount = Count - Stack.Capacity; if (restCount > 0) { - Debug.Assert(_rest != null, "Expecting deeper items are already existing on heap"); - Array.Clear(_rest, 0, restCount); + Debug.Assert(Rest != null, "Expecting deeper items are already existing on heap"); + Array.Clear(Rest, 0, restCount); } - _count = 0; - } - -#if SUPPORTS_CREATE_SPAN - // todo: @wip @remove for benchmarking only - /// Returns a surely present item ref by its index - [UnscopedRef] - [MethodImpl((MethodImplOptions)256)] - public ref T GetSurePresentItemRef2(int index) - { - Debug.Assert(Count != 0); - Debug.Assert(index < Count); - - var stackCap = StackCapacity; - if (index < stackCap) - return ref _stack.AsSpan()[index]; - - Debug.Assert(_rest != null); - return ref _rest[index - stackCap]; + Count = 0; } -#endif /// Returns last present item ref, assumes that the list is not empty! [UnscopedRef] [MethodImpl((MethodImplOptions)256)] public ref T GetLastSurePresentItem() { - Debug.Assert(_count != 0, "Expecting that the list is not empty"); - return ref GetSurePresentItemRef(_count - 1); + Debug.Assert(Count != 0, "Expecting that the list is not empty"); + return ref GetSurePresentItemRef(Count - 1); } /// Removes the last item from the list aka the Stack Pop. Assumes that the list is not empty! [MethodImpl((MethodImplOptions)256)] public void RemoveLastSurePresentItem() { - Debug.Assert(_count != 0, "Expecting that the list is not empty"); - var index = --_count; + Debug.Assert(Count != 0, "Expecting that the list is not empty"); + var index = --Count; GetSurePresentItemRef(index) = default; } @@ -788,9 +630,9 @@ public bool MoveNext() var index = ++_index; if (index < _list.Count) { - _current = index < _list.StackCapacity - ? _list._stack.GetSurePresentItemRef(index) - : _list._rest[index - _list.StackCapacity]; + _current = index < _list.Stack.Capacity + ? _list.Stack.GetSurePresentItemRef(index) + : _list.Rest[index - _list.Stack.Capacity]; return true; } return false; @@ -801,166 +643,6 @@ public bool MoveNext() public void Dispose() { } } - -/// List with the number of first items (2) stored inside its struct and the rest in the growable array. -/// Supports addition and removal (remove is without resize) only at the end of the list, aka Stack behavior -[DebuggerDisplay("{Count} of {_it0?.ToString()}, {_it1?.ToString()}, ...")] -public struct SmallList2 -{ - /// The number of entries stored inside the map itself without moving them to array on heap - public const int StackCapacity = 2; - - internal int _count; - internal TItem _it0, _it1; - internal TItem[] _rest; - - /// Good stuff - [MethodImpl((MethodImplOptions)256)] - public void InitCount(int count) - { - _count = count; - if (count > StackCapacity) - _rest = new TItem[count]; // add the StackCapacity empty space at the end, we may use it later for BuildToArray - } - - /// Populate with one item - [MethodImpl((MethodImplOptions)256)] - public void Populate1(TItem it0) - { - _count = 1; - _it0 = it0; - } - - /// Populate with two items - [MethodImpl((MethodImplOptions)256)] - public void Populate2(TItem it0, TItem it1) - { - _count = StackCapacity; - _it0 = it0; - _it1 = it1; - } - - /// Populate with more than two items - [MethodImpl((MethodImplOptions)256)] - public void Populate(TItem it0, TItem it1, params TItem[] rest) - { - _count = StackCapacity + rest.Length; - _it0 = it0; - _it1 = it1; - _rest = rest; - } - - /// Populate with arbitrary items - public void Populate(TList items) where TList : IReadOnlyList - { - switch (items.Count) - { - case 0: - break; - case 1: - Populate1(items[0]); - break; - case 2: - Populate2(items[0], items[1]); - break; - default: - Populate2(items[0], items[1]); - - // keep the capacity at count + StackCapacity - _count = items.Count; - var rest = new TItem[_count]; // todo: @perf take from the ArrayPool.Shared - for (var i = StackCapacity; i < _count; ++i) - rest[i - StackCapacity] = items[i]; // todo: @perf does List have a Copy? - _rest = rest; - break; - } - } - - /// Gets the number of items in the list - public int Count - { - [MethodImpl((MethodImplOptions)256)] - get => _count; - } - - /// Returns surely present item by its index - public TItem this[int index] - { - [MethodImpl((MethodImplOptions)256)] - get - { - Debug.Assert(_count != 0); - Debug.Assert(index < _count); - switch (index) - { - case 0: return _it0; - case 1: return _it1; - default: - Debug.Assert(_rest != null, $"Expecting deeper items are already existing on heap at index: {index}"); - return _rest[index - StackCapacity]; - } - } - } - - /// Adds the item to the end of the list aka the Stack.Push - [MethodImpl((MethodImplOptions)256)] - public void Add(in TItem item) - { - var index = _count++; - switch (index) - { - case 0: _it0 = item; break; - case 1: _it1 = item; break; - default: - SmallList.AddDefaultAndGetRef(ref _rest, index - StackCapacity) = item; - break; - } - } - - /// Adds the default item to the end of the list aka the Stack.Push default - [MethodImpl((MethodImplOptions)256)] - public void AddDefault() - { - if (++_count >= StackCapacity) - SmallList.AddDefaultAndGetRef(ref _rest, _count - StackCapacity); - } - - /// Removes the last item from the list aka the Stack Pop. Assumes that the list is not empty! - [MethodImpl((MethodImplOptions)256)] - public void RemoveLastSurePresentItem() - { - Debug.Assert(_count != 0); - var index = --_count; - switch (index) - { - case 0: _it0 = default; break; - case 1: _it1 = default; break; - default: - Debug.Assert(_rest != null, $"Expecting a deeper parent stack created before accessing it here at level {index}"); - _rest[index - StackCapacity] = default; - break; - } - } - - /// Copy items to new the array - [MethodImpl((MethodImplOptions)256)] - public TItem[] ToArray() - { - switch (_count) - { - case 0: return Tools.Empty(); - case 1: return new[] { _it0 }; - case 2: return new[] { _it0, _it1 }; - default: - var items = new TItem[_count]; - items[0] = _it0; - items[1] = _it1; - Array.Copy(_rest, 0, items, 2, _count - StackCapacity); - return items; - } - } -} - /// Printable thing via provided printer public interface IPrintable { diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index dacf4241..b353c593 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -138,18 +138,18 @@ public int Double_and_Sum_Indexer() return sum; } - [Benchmark] - public int Double_and_Sum_AsSpan() - { - var sum = 0; - for (var i = 0; i < _list.Count; i++) - { - ref var n = ref _list.GetSurePresentItemRef2(i); - n += n; - sum += n; - } - return sum; - } + // [Benchmark] + // public int Double_and_Sum_AsSpan() + // { + // var sum = 0; + // for (var i = 0; i < _list.Count; i++) + // { + // ref var n = ref _list.GetSurePresentItemRef2(i); + // n += n; + // sum += n; + // } + // return sum; + // } } [MemoryDiagnoser, RankColumn, Orderer(BenchmarkDotNet.Order.SummaryOrderPolicy.FastestToSlowest)] From 6cf2d999ca483a8f7cbe97a5daf558967287fd90 Mon Sep 17 00:00:00 2001 From: dadhi Date: Mon, 2 Jun 2025 22:56:42 +0200 Subject: [PATCH 08/32] generic SmallMap with configured count of items on stack --- .../FastExpressionCompiler.cs | 54 +- src/FastExpressionCompiler/ImTools.cs | 766 ++++++++---------- src/FastExpressionCompiler/TestTools.cs | 11 +- .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 1 + 4 files changed, 365 insertions(+), 467 deletions(-) diff --git a/src/FastExpressionCompiler/FastExpressionCompiler.cs b/src/FastExpressionCompiler/FastExpressionCompiler.cs index 625502ad..e783f424 100644 --- a/src/FastExpressionCompiler/FastExpressionCompiler.cs +++ b/src/FastExpressionCompiler/FastExpressionCompiler.cs @@ -43,7 +43,7 @@ namespace FastExpressionCompiler.LightExpression using PE = FastExpressionCompiler.LightExpression.ParameterExpression; using FastExpressionCompiler.LightExpression.ImTools; using FastExpressionCompiler.LightExpression.ILDecoder; - using static FastExpressionCompiler.LightExpression.ImTools.SmallMap4; + using static FastExpressionCompiler.LightExpression.ImTools.SmallMap; #else namespace FastExpressionCompiler { @@ -51,7 +51,7 @@ namespace FastExpressionCompiler using PE = System.Linq.Expressions.ParameterExpression; using FastExpressionCompiler.ImTools; using FastExpressionCompiler.ILDecoder; - using static FastExpressionCompiler.ImTools.SmallMap4; + using static FastExpressionCompiler.ImTools.SmallMap; #endif using System; using System.Collections; @@ -737,19 +737,13 @@ public struct ClosureInfo /// Tracks the use of the variables in the blocks stack per variable, /// (uint) contains (ushort) BlockIndex in the upper bits and (ushort) VarIndex in the lower bits. /// to determine if variable is the local variable and in what block it's defined - private SmallMap4>, - RefEq, SmallMap4.SingleArrayEntries>, RefEq> - > _varInBlockMap; + private SmallMap4>, RefEq> _varInBlock; /// The map of inlined invocations collected in TryCollect and then used in TryEmit - internal SmallMap4, - SmallMap4.SingleArrayEntries> - > InlinedLambdaInvocationMap; + internal SmallMap4> InlinedLambdaInvocation; /// New or Call expressions containing the complex expression, e.g. inlined Lambda Invoke or Try with Finally - internal SmallMap4, - SmallMap4.SingleArrayEntries> - > ArgsContainingComplexExpression; + internal SmallMap4> ArgsContainingComplexExpression; internal bool HasComplexExpression; @@ -758,9 +752,7 @@ public struct ClosureInfo /// Tracks of how many gotos, labels referencing the specific target, they may be the same gotos expression, /// because the gotos may be reused multiple times in the big expression - internal SmallMap4, - SmallMap4.SingleArrayEntries> - > TargetToGotosAndLabels; + internal SmallMap4> TargetToGotosAndLabels; /// This is required because we have the return from the nested lambda expression, /// and when inlined in the parent lambda it is no longer the return but just a jump to the label. @@ -917,7 +909,7 @@ public void PushBlockAndConstructLocalVars(IReadOnlyList blockVarExprs, ILGe [MethodImpl((MethodImplOptions)256)] private void PushVarInBlockMap(ParameterExpression pe, ushort blockIndex, ushort varIndex) { - ref var blocks = ref _varInBlockMap.AddOrGetValueRef(pe, out _); + ref var blocks = ref _varInBlock.Map.AddOrGetValueRef(pe, out _); if (blocks.Count == 0 || (blocks.GetLastSurePresentItem() >>> 16) != blockIndex) blocks.Add((uint)(blockIndex << 16) | varIndex); } @@ -925,10 +917,10 @@ private void PushVarInBlockMap(ParameterExpression pe, ushort blockIndex, ushort public void PopBlock() { Debug.Assert(_blockCount > 0); - var varCount = _varInBlockMap.Count; + var varCount = _varInBlock.Map.Count; for (var i = 0; i < varCount; ++i) { - ref var varBlocks = ref _varInBlockMap.GetSurePresentEntryRef(i); + ref var varBlocks = ref _varInBlock.Map.GetSurePresentEntryRef(i); if (varBlocks.Value.Count == _blockCount) varBlocks.Value.RemoveLastSurePresentItem(); } @@ -938,14 +930,14 @@ public void PopBlock() [MethodImpl((MethodImplOptions)256)] public bool IsLocalVar(ParameterExpression varParamExpr) { - ref var blocks = ref _varInBlockMap.TryGetValueRefUnsafe(varParamExpr, out var found); + ref var blocks = ref _varInBlock.Map.TryGetValueRef(varParamExpr, out var found); return found && blocks.Count != 0; } [MethodImpl((MethodImplOptions)256)] public int GetDefinedLocalVarOrDefault(ParameterExpression varParamExpr) { - ref var blocks = ref _varInBlockMap.TryGetValueRefUnsafe(varParamExpr, out var found); + ref var blocks = ref _varInBlock.Map.TryGetValueRef(varParamExpr, out var found); return found && blocks.Count != 0 // rare case with the block count 0 may occur when we collected the block and vars, but not yet defined the variable for it ? (int)(blocks.GetLastSurePresentItem() & ushort.MaxValue) : -1; @@ -1339,7 +1331,7 @@ public static Result TryCollectInfo(ref ClosureInfo closure, Expression expr, if (hasComplexExpression) { closure.HasComplexExpression = true; - closure.ArgsContainingComplexExpression.AddOrGetValueRef(callExpr, out _); + closure.ArgsContainingComplexExpression.Map.AddOrGetValueRef(callExpr, out _); } return r; } @@ -1376,7 +1368,7 @@ public static Result TryCollectInfo(ref ClosureInfo closure, Expression expr, if (hasComplexExpression) { closure.HasComplexExpression = true; - closure.ArgsContainingComplexExpression.AddOrGetValueRef(newExpr, out _); + closure.ArgsContainingComplexExpression.Map.AddOrGetValueRef(newExpr, out _); } return r; @@ -1475,7 +1467,7 @@ public static Result TryCollectInfo(ref ClosureInfo closure, Expression expr, closure.CurrentInlinedLambdaInvokeIndex = closure.AddInlinedLambdaInvoke(invokeExpr); closure.HasComplexExpression = false; // switch off because we have entered the inlined lambda - ref var inlinedExpr = ref closure.InlinedLambdaInvocationMap.AddOrGetValueRef(invokeExpr, out var found); + ref var inlinedExpr = ref closure.InlinedLambdaInvocation.Map.AddOrGetValueRef(invokeExpr, out var found); if (!found) inlinedExpr = CreateInlinedLambdaInvocationExpression(invokeArgs, invokeArgCount, lambdaExpr); @@ -1585,7 +1577,7 @@ public static Result TryCollectInfo(ref ClosureInfo closure, Expression expr, var labelExpr = (LabelExpression)expr; closure.AddLabel(labelExpr.Target, closure.CurrentInlinedLambdaInvokeIndex); if (labelExpr.Target != null) - closure.TargetToGotosAndLabels.AddOrGetValueRef(labelExpr.Target, out _).Item2++; + closure.TargetToGotosAndLabels.Map.AddOrGetValueRef(labelExpr.Target, out _).Item2++; if (labelExpr.DefaultValue == null) return r; expr = labelExpr.DefaultValue; @@ -1594,7 +1586,7 @@ public static Result TryCollectInfo(ref ClosureInfo closure, Expression expr, case ExpressionType.Goto: var gotoExpr = (GotoExpression)expr; if (gotoExpr.Target != null) - closure.TargetToGotosAndLabels.AddOrGetValueRef(gotoExpr.Target, out _).Item1++; + closure.TargetToGotosAndLabels.Map.AddOrGetValueRef(gotoExpr.Target, out _).Item1++; if (gotoExpr.Value == null) return r; expr = gotoExpr.Value; @@ -2327,7 +2319,7 @@ public static bool TryEmit(Expression expr, statementExprs[i + 1] is LabelExpression label && label.Target == gt.Target) { // But we cannot use the return pattern and eliminate the target label if we have more gotos referencing it, see #430 - var (gotos, labels) = closure.TargetToGotosAndLabels.TryGetValueRefUnsafe(label.Target, out var found); + var (gotos, labels) = closure.TargetToGotosAndLabels.Map.TryGetValueRef(label.Target, out var found); if (found && gotos <= labels) { if ((parent & ParentFlags.TryCatch) != 0) @@ -2460,7 +2452,7 @@ private static bool TryEmitNew(Expression expr, IReadOnlyList paramExprs, IL } else { - if (!closure.ArgsContainingComplexExpression.ContainsKey(newExpr)) + if (!closure.ArgsContainingComplexExpression.Map.ContainsKey(newExpr)) { for (var i = 0; i < argCount; ++i) if (!TryEmit(argExprs.GetArgument(i), paramExprs, il, ref closure, setup, parent, pars[i].ParameterType.IsByRef ? i : -1)) @@ -4960,7 +4952,7 @@ private static bool TryEmitMethodCall(Expression expr, #else var callArgs = callExpr.Arguments; #endif - if (!closure.ArgsContainingComplexExpression.ContainsKey(callExpr)) + if (!closure.ArgsContainingComplexExpression.Map.ContainsKey(callExpr)) { if (loadObjByAddress) EmitStoreAndLoadLocalVariableAddress(il, objExpr.Type); @@ -5282,7 +5274,7 @@ private static bool TryEmitInvoke(InvocationExpression expr, IReadOnlyList p { parent |= ParentFlags.InlinedLambdaInvoke; - ref var inlinedExpr = ref closure.InlinedLambdaInvocationMap.AddOrGetValueRef(expr, out var found); + ref var inlinedExpr = ref closure.InlinedLambdaInvocation.Map.AddOrGetValueRef(expr, out var found); Debug.Assert(found, "The invocation expression should be collected in TryCollectInfo but it is not"); if (!found) return false; @@ -8954,9 +8946,7 @@ public virtual LocalBuilder DeclareLocal(Type localType, bool pinned) #if DEBUG_INFO_LOCAL_VARIABLE_USAGE [ThreadStatic] - public static SmallMap4, - SmallMap4.SingleArrayEntries> - > LocalVarUsage; + public static SmallMap8> LocalVarUsage; #endif // todo: @perf add the map of the used local variables that can be reused, e.g. we are getting the variable used in the local scope but then we may return them into POOL and reuse (many of int variable can be reuses, say for indexes) /// Efficiently returns the next variable index, hopefully without unnecessary allocations. @@ -8966,7 +8956,7 @@ public static int GetNextLocalVarIndex(this ILGenerator il, Type t) #if DEBUG_INFO_LOCAL_VARIABLE_USAGE try { - ref var varUsage = ref LocalVarUsage.AddOrGetValueRef(t, out var found); + ref var varUsage = ref LocalVarUsage.Map.AddOrGetValueRef(t, out var found); if (!found) varUsage = 1; else diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 5842b960..72e91a96 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -46,7 +46,7 @@ namespace FastExpressionCompiler.ImTools; using System.Runtime.InteropServices; using System.Diagnostics.CodeAnalysis; -using static SmallMap4; +using static SmallMap; /// Helpers and polyfills for the missing things in the old .NET versions public static class RefTools @@ -143,6 +143,18 @@ public static ref T AddDefaultAndGetRef(ref T[] items, int index, int initial public static ref T GetSurePresentItemRef(this ref SmallList source, int index) => ref source.Items[index]; + // todo: @perf @wip benchmark this + /// Returns surely present item ref by its index without boundary checks + [MethodImpl((MethodImplOptions)256)] + public static ref T GetSurePresentItemRef(this T[] items, int index) + { +#if SUPPORTS_UNSAFE + return ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(items), index); +#else + return ref items[index]; +#endif + } + // todo: @perf add the not null variant /// Appends the new default item to the list and returns ref to it for write or read [MethodImpl((MethodImplOptions)256)] @@ -196,22 +208,28 @@ public static int GetIndexOrAdd(this ref SmallList source, in T item, #pragma warning disable CS9101 // UnscopedRef goes wrong on Ubuntu -internal static class Stack +/// Utilities for Stack4, Stack8, etc. +public static class Stack { [MethodImpl(MethodImplOptions.NoInlining)] - internal static ref T ThrowIndexOutOfBounds(int index, int capacity) - { + internal static ref T ThrowIndexOutOfBounds(int index, int capacity) => throw new IndexOutOfRangeException($"Index {index} is out of range for Stack{capacity}<{typeof(T)},..>."); - } + +#if SUPPORTS_CREATE_SPAN + /// Creates a span over the stack items + [MethodImpl((MethodImplOptions)256)] + public static Span AsSpan(this ref TStack stack) + where TStack : struct, IStack => + MemoryMarshal.CreateSpan(ref Unsafe.As(ref stack), stack.Capacity); +#endif } -// todo: @wip generalized Stack is the WIP and may be moved to ImTools repo /// Abstracts over collection of the items on stack of the fixed Capacity, /// to be used as a part of the hybrid data structures which grow from stack to heap public interface IStack where TStack : struct, IStack { - /// Possible count of items holding on stack + /// Maximum count of items hold on stack int Capacity { get; } /// Returns the item by ref to read and write the item value, @@ -222,26 +240,17 @@ public interface IStack /// Indexer returning the item by ref to read and write the item value [UnscopedRef] ref T this[int index] { get; } - -#if SUPPORTS_CREATE_SPAN - /// Creates a span from the struct items - [UnscopedRef] - Span AsSpan(); -#endif } /// Implementation of `IStack` for 2 items on stack [StructLayout(LayoutKind.Sequential, Pack = 1)] public struct Stack2 : IStack> { - /// Count of items on stack - public const int StackCapacity = 2; + /// + public int Capacity => 2; internal T _it0, _it1; - /// - public int Capacity => StackCapacity; - /// [UnscopedRef] [MethodImpl((MethodImplOptions)256)] @@ -265,33 +274,22 @@ public ref T this[int index] [MethodImpl((MethodImplOptions)256)] get { - if (index < 0 | index >= StackCapacity) - return ref Stack.ThrowIndexOutOfBounds(index, StackCapacity); - return ref GetSurePresentItemRef(index); + if (index >= 0 & index < Capacity) + return ref GetSurePresentItemRef(index); + return ref Stack.ThrowIndexOutOfBounds(index, Capacity); } } - -#if SUPPORTS_CREATE_SPAN - /// - [UnscopedRef] - [MethodImpl((MethodImplOptions)256)] - public Span AsSpan() => - MemoryMarshal.CreateSpan(ref Unsafe.As, T>(ref this), StackCapacity); -#endif } /// Implementation of `IStack` for 4 items on stack [StructLayout(LayoutKind.Sequential, Pack = 1)] public struct Stack4 : IStack> { - /// Count of items on stack - public const int StackCapacity = 4; + /// + public int Capacity => 4; internal T _it0, _it1, _it2, _it3; - /// - public int Capacity => StackCapacity; - /// [UnscopedRef] [MethodImpl((MethodImplOptions)256)] @@ -317,31 +315,21 @@ public ref T this[int index] [MethodImpl((MethodImplOptions)256)] get { - if (index < 0 | index >= StackCapacity) - return ref Stack.ThrowIndexOutOfBounds(index, StackCapacity); - return ref GetSurePresentItemRef(index); + if (index >= 0 & index < Capacity) + return ref GetSurePresentItemRef(index); + return ref Stack.ThrowIndexOutOfBounds(index, Capacity); } } - -#if SUPPORTS_CREATE_SPAN - /// - [UnscopedRef] - [MethodImpl((MethodImplOptions)256)] - public Span AsSpan() => - MemoryMarshal.CreateSpan(ref Unsafe.As, T>(ref this), StackCapacity); -#endif } /// Implementation of `IStack` for 8 items on stack [StructLayout(LayoutKind.Sequential, Pack = 1)] public struct Stack8 : IStack> { - /// Count of items on stack - public const int StackCapacity = 8; - internal T _it0, _it1, _it2, _it3, _it4, _it5, _it6, _it7; - /// - public int Capacity => StackCapacity; + public int Capacity => 8; + + internal T _it0, _it1, _it2, _it3, _it4, _it5, _it6, _it7; /// [UnscopedRef] @@ -372,34 +360,23 @@ public ref T this[int index] [MethodImpl((MethodImplOptions)256)] get { - if (index < 0 | index >= StackCapacity) - return ref Stack.ThrowIndexOutOfBounds(index, StackCapacity); - return ref GetSurePresentItemRef(index); + if (index >= 0 & index < Capacity) + return ref GetSurePresentItemRef(index); + return ref Stack.ThrowIndexOutOfBounds(index, Capacity); } } - -#if SUPPORTS_CREATE_SPAN - /// - [UnscopedRef] - [MethodImpl((MethodImplOptions)256)] - public Span AsSpan() => - MemoryMarshal.CreateSpan(ref Unsafe.As, T>(ref this), StackCapacity); -#endif } /// Implementation of `IStack` for 16 items on stack [StructLayout(LayoutKind.Sequential, Pack = 1)] public struct Stack16 : IStack> { - /// Count of items on stack - public const int StackCapacity = 16; + /// + public int Capacity => 16; internal T _it0, _it1, _it2, _it3, _it4, _it5, _it6, _it7; internal T _it8, _it9, _it10, _it11, _it12, _it13, _it14, _it15; - /// - public int Capacity => StackCapacity; - /// [UnscopedRef] [MethodImpl((MethodImplOptions)256)] @@ -437,37 +414,26 @@ public ref T this[int index] [MethodImpl((MethodImplOptions)256)] get { - if (index < 0 | index >= StackCapacity) - return ref Stack.ThrowIndexOutOfBounds(index, StackCapacity); - return ref GetSurePresentItemRef(index); + if (index >= 0 & index < Capacity) + return ref GetSurePresentItemRef(index); + return ref Stack.ThrowIndexOutOfBounds(index, Capacity); } } - -#if SUPPORTS_CREATE_SPAN - /// - [UnscopedRef] - [MethodImpl((MethodImplOptions)256)] - public Span AsSpan() => - MemoryMarshal.CreateSpan(ref Unsafe.As, T>(ref this), StackCapacity); -#endif } -// todo: @wip -/// Generic version of SmallList abstracted for how much items are on stack +/// Generic version of SmallList abstracted for how much items are on the stack public struct SmallList : IEnumerable where TStack : struct, IStack { /// Let's enable access to the Count, so you can Pop the item by --list.Count. Just don't forget to nullify the popped item if needed public int Count; - // For this warning it is fine `_stack` is never assigned to, and will always have its default value -#pragma warning disable CS0649 - +#pragma warning disable CS0649 // it is fine `Stack` is never assigned to, and will always have its default value /// Let's enable access to the stack, just know what's you doing public TStack Stack; #pragma warning restore CS0649 - /// Exposes the rest on heap + /// Exposes the rest on the heap public T[] Rest; /// Ensures that the list has allocated space to hold `count` of items @@ -509,7 +475,7 @@ public ref T this[int index] public ref T GetSurePresentItemRef(int index) { Debug.Assert(Count != 0); - Debug.Assert(index < Count); + Debug.Assert(index >= 0 & index < Count); var stackCap = Stack.Capacity; if (index < stackCap) @@ -782,24 +748,20 @@ public int GetHashCode((A, B, C) key) => /// Configuration and the tools for the SmallMap and friends -public static class SmallMap4 +public static class SmallMap { internal const byte MinFreeCapacityShift = 3; // e.g. for the capacity 16: 16 >> 3 => 2, 12.5% of the free hash slots (it does not mean the entries free slot) internal const byte MinHashesCapacityBitShift = 4; // 1 << 4 == 16 - + internal const int IndexMask = (1 << MinHashesCapacityBitShift) - 1; // 0b00000000000000000000000000001111 /// Upper hash bits spent on storing the probes, e.g. 5 bits mean 31 probes max. - public const byte MaxProbeBits = 5; - internal const byte MaxProbeCount = (1 << MaxProbeBits) - 1; - internal const byte ProbeCountShift = 32 - MaxProbeBits; - internal const int HashAndIndexMask = ~(MaxProbeCount << ProbeCountShift); - - /// The number of entries stored inside the map itself without moving them to array on heap - public const int StackEntriesCount = 4; - - /// Creates the map with the storage - [MethodImpl((MethodImplOptions)256)] - public static SmallMap4> New(byte capacityBitShift = 0) - where TEq : struct, IEq => new(capacityBitShift); + public const byte ProbeBits = 5; + internal const byte NotShiftedProbeCountMask = (1 << ProbeBits) - 1; // 0b00000000000000000000000000011111 + // 27, so the upper 5 bits are used for the probe count + internal const byte ProbeCountShift = 32 - ProbeBits; + // ~0b11111000000000000000000000000000 -> 0b00000111111111111111111111111111 + internal const int HashAndIndexMask = ~(NotShiftedProbeCountMask << ProbeCountShift); + // Window with the hash mask wothout the lead ProbeMask and closing IndexMask 0b00000111111111111111111111110000 + internal const int HashMask = HashAndIndexMask & ~IndexMask; /// Holds a single entry consisting of key and value. /// Value may be set or changed but the key is set in stone (by construction). @@ -820,7 +782,15 @@ public Entry(K key, V value) } } - /// binary representation of the `int` + /// Creates the map with the storage + [MethodImpl((MethodImplOptions)256)] + public static SmallMap> + New(byte capacityBitShift = 0) + where TEq : struct, IEq + where TStack : struct, IStack, TStack> + => new(capacityBitShift); + + /// Binary representation of the `int` public static string ToB(int x) => System.Convert.ToString(x, 2).PadLeft(32, '0'); [MethodImpl((MethodImplOptions)256)] @@ -837,12 +807,12 @@ public Entry(K key, V value) internal static int GetHash(ref int[] start, int distance) => start[distance]; #endif - // todo: @improve can we move the Entry into the type parameter to configure and possibly save the memory e.g. for the sets? + // todo: @perf can we move the Entry into the type parameter to configure and possibly save the memory e.g. for the sets? /// Abstraction to configure your own entries data structure. Check the derived types for the examples public interface IEntries where TEq : IEq { - /// Initializes the entries storage to the specified capacity via the number of bits in the capacity - void Init(byte capacityBitShift); + /// Initializes the entries storage to the specified capacity + void Init(int capacityPowerOfTwoPlease); /// Returns the reference to entry by its index, index should map to the present/non-removed entry ref Entry GetSurePresentEntryRef(int index); @@ -863,8 +833,8 @@ public struct SingleArrayEntries : IEntries where TEq : st internal Entry[] _entries; /// - public void Init(byte capacityBitShift) => - _entries = new Entry[1 << capacityBitShift]; + public void Init(int capacityPowerOfTwoPlease) => + _entries = new Entry[capacityPowerOfTwoPlease]; /// [MethodImpl((MethodImplOptions)256)] @@ -892,168 +862,170 @@ public ref V AddKeyAndGetValueRef(K key, int index) return ref e.Value; } } +} - /// Finds the stored value by key. - /// UNSAFE: don't try to set the returned value if the `found == false`, because you will be setting the shared static value. - /// You may set the value ONLY if `found == true`, - /// If you want this kind of sematic use `AddOrGetValueRef` - [MethodImpl((MethodImplOptions)256)] - public static ref V TryGetValueRefUnsafe(this ref SmallMap4 map, K key, out bool found) - where TEq : struct, IEq - where TEntries : struct, IEntries +// todo: @improve ? how/where to add SIMD to improve CPU utilization but not losing perf for smaller sizes +/// +/// Fast and less-allocating hash map without thread safety nets. Please measure it in your own use case before use. +/// It is configurable in regard of hash calculation/equality via `TEq` type parameter and +/// in regard of key-value storage via `TEntries` type parameter. +/// +/// Details: +/// - Implemented as a struct so that the empty/default map does not allocate on heap +/// - Hashes and key-values are the separate collections enabling better cash locality and faster performance (data-oriented design) +/// - No SIMD for now to avoid complexity and costs for the smaller maps, so the map is more fit for the smaller sizes. +/// - Provides the "stable" enumeration of the entries in the added order +/// - The TryRemove method removes the hash but replaces the key-value entry with the tombstone key and the default value. +/// For instance, for the `RefEq` the tombstone is . You may redefine it in the `IEq{K}.GetTombstone()` implementation. +/// +/// +[DebuggerDisplay("{Count} of {_e0}, {_e1}, {_e2}, {_e3}, ...")] +public struct SmallMap + where TEq : struct, IEq + where TStack : struct, IStack, TStack> + where TEntries : struct, IEntries +{ + internal byte _capacityBitShift; + internal int _count; + + // The _packedHashesAndIndexes elements are of `Int32` with the bits split as following: + // 00010|000...110|01101 + // | | |- The index into the _entries structure, 0-based. The index bit count (indexMask) is the hashes capacity - 1. + // | | | This part of the erased hash is used to get the ideal index into the hashes array, so later this part of hash may be restored from the hash index and its probes. + // | |- The remaining middle bits of the original hash + // |- 5 (MaxProbeBits) high bits of the Probe count, with the minimal value of b00001 indicating the non-empty slot. + internal int[] _packedHashesAndIndexes; + +#pragma warning disable IDE0044 // it tries to make entries readonly but they should stay modify-able to prevent its defensive struct copying + internal TEntries _entries; +#pragma warning restore IDE0044 +#pragma warning disable CS0649 // Field 'SmallMap.Stack' is never assigned to, and will always have its default value + internal TStack Stack; +#pragma warning restore CS0649 + + /// Capacity bits + public int CapacityBitShift => _capacityBitShift; + + /// Access to the hashes and indexes + public int[] PackedHashesAndIndexes => _packedHashesAndIndexes; + + /// Number of entries in the map + public int Count => _count; + + /// Access to the key-value entries + public TEntries Entries => _entries; + + /// Capacity calculates as `1 leftShift capacityBitShift` + public SmallMap(byte capacityBitShift) { - if (map._count > StackEntriesCount) - return ref map.TryGetValueRefByHash(key, out found); - switch (map._count) - { - case 1: - if (found = default(TEq).Equals(key, map._e0.Key)) return ref map._e0.Value; - break; - case 2: - if (found = default(TEq).Equals(key, map._e0.Key)) return ref map._e0.Value; - if (found = default(TEq).Equals(key, map._e1.Key)) return ref map._e1.Value; - break; - case 3: - if (found = default(TEq).Equals(key, map._e0.Key)) return ref map._e0.Value; - if (found = default(TEq).Equals(key, map._e1.Key)) return ref map._e1.Value; - if (found = default(TEq).Equals(key, map._e2.Key)) return ref map._e2.Value; - break; - case 4: - if (found = default(TEq).Equals(key, map._e0.Key)) return ref map._e0.Value; - if (found = default(TEq).Equals(key, map._e1.Key)) return ref map._e1.Value; - if (found = default(TEq).Equals(key, map._e2.Key)) return ref map._e2.Value; - if (found = default(TEq).Equals(key, map._e3.Key)) return ref map._e3.Value; - break; - } - found = false; - return ref RefTools.GetNullRef(); + _capacityBitShift = capacityBitShift; + + // the overflow tail to the hashes is the size of log2N where N==capacityBitShift, + // it is probably fine to have the check for the overflow of capacity because it will be mis-predicted only once at the end of loop (it even rarely for the lookup) + _packedHashesAndIndexes = new int[1 << capacityBitShift]; + _entries = default; + _entries.Init(capacityBitShift); } - /// Finds the stored value by key. If found returns ref to the value it can be modified in place. + ///Get the value ref by the entry index. Also the index corresponds to entry adding order. + /// Important: it does not check the index bounds, so you need to check that the index is from 0 to map.Count-1 + [UnscopedRef] [MethodImpl((MethodImplOptions)256)] - public static bool ContainsKey(this ref SmallMap4 map, K key) - where TEq : struct, IEq - where TEntries : struct, IEntries + public ref Entry GetSurePresentEntryRef(int index) { - if (map._count > StackEntriesCount) + Debug.Assert(index >= 0); + Debug.Assert(index < _count); + if (index >= Stack.Capacity) + return ref _entries.GetSurePresentEntryRef(index - Stack.Capacity); + return ref Stack.GetSurePresentItemRef(index); + } + + [UnscopedRef] + private ref V AddOrGetValueRefInEntries(K key, out bool found) + { + // if the free space is less than 1/8 of capacity (12.5%) then Resize + var indexMask = (1 << _capacityBitShift) - 1; + if (indexMask - _count <= (indexMask >>> MinFreeCapacityShift)) + indexMask = ResizeHashes(indexMask); + + var hash = default(TEq).GetHashCode(key); + var hashMiddleMask = HashAndIndexMask & ~indexMask; + var hashMiddle = hash & hashMiddleMask; + var hashIndex = hash & indexMask; + +#if NET7_0_OR_GREATER + ref var hashesAndIndexes = ref MemoryMarshal.GetArrayDataReference(_packedHashesAndIndexes); +#else + var hashesAndIndexes = _packedHashesAndIndexes; +#endif + ref var h = ref GetHashRef(ref hashesAndIndexes, hashIndex); + + // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions + var probes = 1; + while ((h >>> ProbeCountShift) >= probes) { - _ = map.TryGetValueRefByHash(key, out var found); - return found; + // 2. For the equal probes check for equality the hash middle part, and update the entry if the keys are equal too + if (((h >>> ProbeCountShift) == probes) & ((h & hashMiddleMask) == hashMiddle)) + { + ref var e = ref GetSurePresentEntryRef(h & indexMask); + if (found = default(TEq).Equals(e.Key, key)) + return ref e.Value; + } + h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); + ++probes; } + found = false; - // for small counts just compare the keys without calculating the hashes - var eq = default(TEq); - return map._count switch - { - 1 => eq.Equals(key, map._e0.Key), - 2 => eq.Equals(key, map._e0.Key) || eq.Equals(key, map._e1.Key), - 3 => eq.Equals(key, map._e0.Key) || eq.Equals(key, map._e1.Key) || eq.Equals(key, map._e2.Key), - 4 => eq.Equals(key, map._e0.Key) || eq.Equals(key, map._e1.Key) || eq.Equals(key, map._e2.Key) || eq.Equals(key, map._e3.Key), - _ => false, - }; - } + // 3. We did not find the hash and therefore the key, so insert the new entry + var hRobinHooded = h; + h = (probes << ProbeCountShift) | hashMiddle | _count; - /// Gets the reference to the existing value by the provided key (found == true), - /// or adds a new key-value pair (found == false) and allows to set the returned value. - [MethodImpl((MethodImplOptions)256)] - public static ref V AddOrGetValueRef(this ref SmallMap4 map, K key, out bool found) - where TEq : struct, IEq - where TEntries : struct, IEntries - { - if (map._count > StackEntriesCount) - return ref map.AddOrGetValueRefByHash(key, out found); - found = true; - switch (map._count) + // 4. If the robin hooded hash is empty then we stop + // 5. Otherwise we steal the slot with the smaller probes + probes = hRobinHooded >>> ProbeCountShift; + while (hRobinHooded != 0) { - case 0: - found = false; - map._count = 1; - map._e0.Key = key; - return ref map._e0.Value; - - case 1: - if (default(TEq).Equals(key, map._e0.Key)) return ref map._e0.Value; - found = false; - map._count = 2; - map._e1.Key = key; - return ref map._e1.Value; - - case 2: - if (default(TEq).Equals(key, map._e0.Key)) return ref map._e0.Value; - if (default(TEq).Equals(key, map._e1.Key)) return ref map._e1.Value; - found = false; - map._count = 3; - map._e2.Key = key; - return ref map._e2.Value; - - case 3: - if (default(TEq).Equals(key, map._e0.Key)) return ref map._e0.Value; - if (default(TEq).Equals(key, map._e1.Key)) return ref map._e1.Value; - if (default(TEq).Equals(key, map._e2.Key)) return ref map._e2.Value; - found = false; - map._count = 4; - map._e3.Key = key; - return ref map._e3.Value; - - default: - if (default(TEq).Equals(key, map._e0.Key)) return ref map._e0.Value; - if (default(TEq).Equals(key, map._e1.Key)) return ref map._e1.Value; - if (default(TEq).Equals(key, map._e2.Key)) return ref map._e2.Value; - if (default(TEq).Equals(key, map._e3.Key)) return ref map._e3.Value; - found = false; - - map._capacityBitShift = MinHashesCapacityBitShift; - map._packedHashesAndIndexes = new int[1 << MinHashesCapacityBitShift]; - - var indexMask = (1 << MinHashesCapacityBitShift) - 1; - - // todo: @perf optimize by calculating the keys hashes and putting them into the span and iterating over them inside a single method - - map.AddInitialHashWithoutResizing(map._e0.Key, 0, indexMask); - map.AddInitialHashWithoutResizing(map._e1.Key, 1, indexMask); - map.AddInitialHashWithoutResizing(map._e2.Key, 2, indexMask); - map.AddInitialHashWithoutResizing(map._e3.Key, 3, indexMask); - map.AddInitialHashWithoutResizing(key, StackEntriesCount, indexMask); - - map._count = 5; - map._entries.Init(2); - - // we do not copying the entries because we provide the stable value reference guaranties - return ref map._entries.AddKeyAndGetValueRef(key, 0); + h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); + if ((h >>> ProbeCountShift) < ++probes) + { + var tmp = h; + h = (probes << ProbeCountShift) | (hRobinHooded & HashAndIndexMask); + hRobinHooded = tmp; + probes = hRobinHooded >>> ProbeCountShift; + } } + + return ref _entries.AddKeyAndGetValueRef(key, (_count++) - Stack.Capacity); } - private static void AddInitialHashWithoutResizing(this ref SmallMap4 map, K key, int index, int indexMask) - where TEq : struct, IEq - where TEntries : struct, IEntries + private void AddJustHashAndEntryIndexWithoutResizing(int hash, int index) { #if NET7_0_OR_GREATER - ref var hashesAndIndexes = ref MemoryMarshal.GetArrayDataReference(map._packedHashesAndIndexes); + ref var hashesAndIndexes = ref MemoryMarshal.GetArrayDataReference(_packedHashesAndIndexes); #else - var hashesAndIndexes = map._packedHashesAndIndexes; + var hashesAndIndexes = _packedHashesAndIndexes; #endif - var hash = default(TEq).GetHashCode(key); - var hashIndex = hash & indexMask; + var hashIndex = hash & IndexMask; // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions ref var h = ref GetHashRef(ref hashesAndIndexes, hashIndex); var probes = 1; while ((h >>> ProbeCountShift) >= probes) { - h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); + h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & IndexMask); ++probes; } // 3. We did not find the hash and therefore the key, so insert the new entry var hRobinHooded = h; - h = (probes << ProbeCountShift) | (hash & HashAndIndexMask & ~indexMask) | index; + h = (probes << ProbeCountShift) | (hash & HashMask) | index; // 4. If the robin hooded hash is empty then we stop // 5. Otherwise we steal the slot with the smaller probes probes = hRobinHooded >>> ProbeCountShift; while (hRobinHooded != 0) { - h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); + h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & IndexMask); if ((h >>> ProbeCountShift) < ++probes) { var tmp = h; @@ -1064,72 +1036,70 @@ private static void AddInitialHashWithoutResizing(this ref } } - /// Adds the sure absent key entry. - /// Provides the performance in scenarios where you look for present key, and using it, and if ABSENT then add the new one. - /// So this method optimized NOT to look for the present item for the second time in SEQUENCE - public static ref V AddSureAbsentDefaultAndGetRef(this ref SmallMap4 map, K key) - where TEq : struct, IEq - where TEntries : struct, IEntries + /// Gets the reference to the existing value by the provided key (found == true), + /// or adds a new key-value pair (found == false) and allows to set the returned value. + [UnscopedRef] + public ref V AddOrGetValueRef(K key, out bool found) { - if (map._count > StackEntriesCount) - return ref map.AddSureAbsentDefaultAndGetRefByHash(key); - switch (map._count) + if (_count > Stack.Capacity) + return ref AddOrGetValueRefInEntries(key, out found); + + // Linear search in stack (which has a few items) by comparing the keys without calculating the hashes + // Saving on the hash calculation. Losing on the bigger number of comparisons. + for (var i = 0; i < _count; ++i) + { + ref var e = ref GetSurePresentEntryRef(i); + if (found = default(TEq).Equals(e.Key, key)) + return ref e.Value; + } + found = false; + + // Add the new entry to the stack if there is still space in stack + if (_count < Stack.Capacity) { - case 0: - map._count = 1; - map._e0.Key = key; - return ref map._e0.Value; - - case 1: - map._count = 2; - map._e1.Key = key; - return ref map._e1.Value; - - case 2: - map._count = 3; - map._e2.Key = key; - return ref map._e2.Value; - - case 3: - map._count = 4; - map._e3.Key = key; - return ref map._e3.Value; - - default: - map._capacityBitShift = MinHashesCapacityBitShift; - map._packedHashesAndIndexes = new int[1 << MinHashesCapacityBitShift]; - - var indexMask = (1 << MinHashesCapacityBitShift) - 1; - - map.AddInitialHashWithoutResizing(map._e0.Key, 0, indexMask); - map.AddInitialHashWithoutResizing(map._e1.Key, 1, indexMask); - map.AddInitialHashWithoutResizing(map._e2.Key, 2, indexMask); - map.AddInitialHashWithoutResizing(map._e3.Key, 3, indexMask); - map.AddInitialHashWithoutResizing(key, StackEntriesCount, indexMask); - - map._count = 5; - map._entries.Init(2); - return ref map._entries.AddKeyAndGetValueRef(key, 0); + var newIndex = _count++; + ref var newEntry = ref Stack.GetSurePresentItemRef(newIndex); + newEntry.Key = key; + return ref newEntry.Value; } + + // Now all capacity of the stack is used. + // To avoid double work always going linearly through the Stack with the comparison, + // let's calculate the hash of the keys stored on stack and put them + // to the usual HashMap packed hashes and indexes array for the promised O(1) lookup. + // But the values are remaining on the Stack, and for the found index of the entry we use the GetSurePresentItemRef(index) + // to get the value reference either from the Stack or the Entries. + // So the values on the stack are guarntied to be stable from the beginning of the map creation, + // because they are not copied when the Entries need to Resize (depending on the TEntries implementation). + + _capacityBitShift = MinHashesCapacityBitShift; + _packedHashesAndIndexes = new int[1 << MinHashesCapacityBitShift]; + + for (var i = 0; i < Stack.Capacity; ++i) + AddJustHashAndEntryIndexWithoutResizing(default(TEq).GetHashCode(GetSurePresentEntryRef(i).Key), i); + + AddJustHashAndEntryIndexWithoutResizing(default(TEq).GetHashCode(key), Stack.Capacity); + + _count = Stack.Capacity + 1; // +1 because we added the new key + _entries.Init(Stack.Capacity); // Give the heap entries the same initial capcity as Stack, effectively doubling the capacity + return ref _entries.AddKeyAndGetValueRef(key, 0); // add the new key to the entries with the 0 index in the entries } - [MethodImpl((MethodImplOptions)256)] - private static ref V AddSureAbsentDefaultAndGetRefByHash(this ref SmallMap4 map, K key) - where TEq : struct, IEq - where TEntries : struct, IEntries + [UnscopedRef] + private ref V AddSureAbsentDefaultAndGetRefInEntries(K key) { // if the free space is less than 1/8 of capacity (12.5%) then Resize - var indexMask = (1 << map._capacityBitShift) - 1; - if (indexMask - map._count <= (indexMask >>> MinFreeCapacityShift)) - indexMask = map.ResizeHashes(indexMask); + var indexMask = (1 << _capacityBitShift) - 1; + if (indexMask - _count <= (indexMask >>> MinFreeCapacityShift)) + indexMask = ResizeHashes(indexMask); var hash = default(TEq).GetHashCode(key); var hashIndex = hash & indexMask; #if NET7_0_OR_GREATER - ref var hashesAndIndexes = ref MemoryMarshal.GetArrayDataReference(map._packedHashesAndIndexes); + ref var hashesAndIndexes = ref MemoryMarshal.GetArrayDataReference(_packedHashesAndIndexes); #else - var hashesAndIndexes = map._packedHashesAndIndexes; + var hashesAndIndexes = _packedHashesAndIndexes; #endif ref var h = ref GetHashRef(ref hashesAndIndexes, hashIndex); @@ -1143,7 +1113,7 @@ private static ref V AddSureAbsentDefaultAndGetRefByHash(th // 3. We did not find the hash and therefore the key, so insert the new entry var hRobinHooded = h; - h = (probes << ProbeCountShift) | (hash & HashAndIndexMask & ~indexMask) | map._count; + h = (probes << ProbeCountShift) | (hash & HashAndIndexMask & ~indexMask) | _count; // 4. If the robin hooded hash is empty then we stop // 5. Otherwise we steal the slot with the smaller probes @@ -1160,46 +1130,73 @@ private static ref V AddSureAbsentDefaultAndGetRefByHash(th } } - return ref map._entries.AddKeyAndGetValueRef(key, (map._count++) - StackEntriesCount); + return ref _entries.AddKeyAndGetValueRef(key, (_count++) - Stack.Capacity); } - ///Get the value ref by the entry index. Also the index corresponds to entry adding order. - /// Important: it does not check the index bounds, so you need to check that the index is from 0 to map.Count-1 + /// Adds a sure absent key entry. + /// Provides the performance in scenarios where you look for the present key, and using it, and if ABSENT then add the new one. + /// So this method optimized NOT to look for the present item for the second time + [UnscopedRef] [MethodImpl((MethodImplOptions)256)] - public static ref Entry GetSurePresentEntryRef(this ref SmallMap4 map, int index) - where TEq : struct, IEq - where TEntries : struct, IEntries + public ref V AddSureAbsentDefaultAndGetRef(K key) { - Debug.Assert(index >= 0); - Debug.Assert(index < map._count); - if (index >= StackEntriesCount) - return ref map._entries.GetSurePresentEntryRef(index - StackEntriesCount); - switch (index) + if (_count > Stack.Capacity) + return ref AddSureAbsentDefaultAndGetRefInEntries(key); + + // Add the new entry to the stack if there is still space in stack + if (_count < Stack.Capacity) + { + var newIndex = _count++; + ref var newEntry = ref Stack.GetSurePresentItemRef(newIndex); + newEntry.Key = key; + return ref newEntry.Value; + } + + _capacityBitShift = MinHashesCapacityBitShift; + _packedHashesAndIndexes = new int[1 << MinHashesCapacityBitShift]; + + for (var i = 0; i < Stack.Capacity; ++i) + AddJustHashAndEntryIndexWithoutResizing(default(TEq).GetHashCode(GetSurePresentEntryRef(i).Key), i); + + AddJustHashAndEntryIndexWithoutResizing(default(TEq).GetHashCode(key), Stack.Capacity); + + _count = Stack.Capacity + 1; // +1 because we added the new key + _entries.Init(Stack.Capacity); // Give the heap entries the same initial capcity as Stack, effectively doubling the capacity + return ref _entries.AddKeyAndGetValueRef(key, 0); // add the new key to the entries with the 0 index in the entries + } + + /// Finds the stored value by key. If found returns ref to the value it can be modified in place. + [MethodImpl((MethodImplOptions)256)] + public bool ContainsKey(K key) + { + if (_count > Stack.Capacity) { - case 0: return ref map._e0; - case 1: return ref map._e1; - case 2: return ref map._e2; - case 3: return ref map._e3; + TryGetValueRefInEntries(key, out var found); + return found; } - return ref RefTools>.GetNullRef(); + + for (var i = 0; i < _count; ++i) + if (default(TEq).Equals(key, GetSurePresentEntryRef(i).Key)) + return true; + + return false; } + [UnscopedRef] [MethodImpl((MethodImplOptions)256)] - internal static ref V TryGetValueRefByHash(this ref SmallMap4 map, K key, out bool found) - where TEq : struct, IEq - where TEntries : struct, IEntries + internal ref V TryGetValueRefInEntries(K key, out bool found) { var hash = default(TEq).GetHashCode(key); - var indexMask = (1 << map._capacityBitShift) - 1; + var indexMask = (1 << _capacityBitShift) - 1; var hashMiddleMask = HashAndIndexMask & ~indexMask; var hashMiddle = hash & hashMiddleMask; var hashIndex = hash & indexMask; #if NET7_0_OR_GREATER - ref var hashesAndIndexes = ref MemoryMarshal.GetArrayDataReference(map._packedHashesAndIndexes); + ref var hashesAndIndexes = ref MemoryMarshal.GetArrayDataReference(_packedHashesAndIndexes); #else - var hashesAndIndexes = map._packedHashesAndIndexes; + var hashesAndIndexes = _packedHashesAndIndexes; #endif var h = GetHash(ref hashesAndIndexes, hashIndex); @@ -1211,12 +1208,9 @@ internal static ref V TryGetValueRefByHash(this ref SmallMa // 2. For the equal probes check for equality the hash middle part, and update the entry if the keys are equal too if (((h >>> ProbeCountShift) == probes) & ((h & hashMiddleMask) == hashMiddle)) { - ref var e = ref map.GetSurePresentEntryRef(h & indexMask); - if (default(TEq).Equals(e.Key, key)) - { - found = true; + ref var e = ref GetSurePresentEntryRef(h & indexMask); + if (found = default(TEq).Equals(e.Key, key)) return ref e.Value; - } } h = GetHash(ref hashesAndIndexes, ++hashIndex & indexMask); @@ -1227,129 +1221,23 @@ internal static ref V TryGetValueRefByHash(this ref SmallMa return ref RefTools.GetNullRef(); } + /// Finds the stored value by key. Returns the reference to the found value or the null entry + [UnscopedRef] [MethodImpl((MethodImplOptions)256)] - private static ref V AddOrGetValueRefByHash(this ref SmallMap4 map, K key, out bool found) - where TEq : struct, IEq - where TEntries : struct, IEntries + public ref V TryGetValueRef(K key, out bool found) { - // if the free space is less than 1/8 of capacity (12.5%) then Resize - var indexMask = (1 << map._capacityBitShift) - 1; - if (indexMask - map._count <= (indexMask >>> MinFreeCapacityShift)) - indexMask = map.ResizeHashes(indexMask); + if (_count > Stack.Capacity) + return ref TryGetValueRefInEntries(key, out found); - var hash = default(TEq).GetHashCode(key); - var hashMiddleMask = HashAndIndexMask & ~indexMask; - var hashMiddle = hash & hashMiddleMask; - var hashIndex = hash & indexMask; - -#if NET7_0_OR_GREATER - ref var hashesAndIndexes = ref MemoryMarshal.GetArrayDataReference(map._packedHashesAndIndexes); -#else - var hashesAndIndexes = map._packedHashesAndIndexes; -#endif - ref var h = ref GetHashRef(ref hashesAndIndexes, hashIndex); - - // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions - var probes = 1; - while ((h >>> ProbeCountShift) >= probes) + for (var i = 0; i < _count; ++i) { - // 2. For the equal probes check for equality the hash middle part, and update the entry if the keys are equal too - if (((h >>> ProbeCountShift) == probes) & ((h & hashMiddleMask) == hashMiddle)) - { - ref var e = ref map.GetSurePresentEntryRef(h & indexMask); - if (default(TEq).Equals(e.Key, key)) - { - found = true; - return ref e.Value; - } - } - h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); - ++probes; + ref var e = ref GetSurePresentEntryRef(i); + if (found = default(TEq).Equals(key, e.Key)) + return ref e.Value; } - // 3. We did not find the hash and therefore the key, so insert the new entry - var hRobinHooded = h; - h = (probes << ProbeCountShift) | hashMiddle | map._count; - - // 4. If the robin hooded hash is empty then we stop - // 5. Otherwise we steal the slot with the smaller probes - probes = hRobinHooded >>> ProbeCountShift; - while (hRobinHooded != 0) - { - h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); - if ((h >>> ProbeCountShift) < ++probes) - { - var tmp = h; - h = (probes << ProbeCountShift) | (hRobinHooded & HashAndIndexMask); - hRobinHooded = tmp; - probes = hRobinHooded >>> ProbeCountShift; - } - } found = false; - return ref map._entries.AddKeyAndGetValueRef(key, (map._count++) - StackEntriesCount); - } -} - -// todo: @improve ? how/where to add SIMD to improve CPU utilization but not losing perf for smaller sizes -/// -/// Fast and less-allocating hash map without thread safety nets. Please measure it in your own use case before use. -/// It is configurable in regard of hash calculation/equality via `TEq` type parameter and -/// in regard of key-value storage via `TEntries` type parameter. -/// -/// Details: -/// - Implemented as a struct so that the empty/default map does not allocate on heap -/// - Hashes and key-values are the separate collections enabling better cash locality and faster performance (data-oriented design) -/// - No SIMD for now to avoid complexity and costs for the smaller maps, so the map is more fit for the smaller sizes. -/// - Provides the "stable" enumeration of the entries in the added order -/// - The TryRemove method removes the hash but replaces the key-value entry with the tombstone key and the default value. -/// For instance, for the `RefEq` the tombstone is . You may redefine it in the `IEq{K}.GetTombstone()` implementation. -/// -/// -[DebuggerDisplay("{Count} of {_e0}, {_e1}, {_e2}, {_e3}, ...")] -public struct SmallMap4 - where TEq : struct, IEq - where TEntries : struct, IEntries -{ - internal byte _capacityBitShift; - internal int _count; - - // The _packedHashesAndIndexes elements are of `Int32` with the bits split as following: - // 00010|000...110|01101 - // | | |- The index into the _entries structure, 0-based. The index bit count (indexMask) is the hashes capacity - 1. - // | | | This part of the erased hash is used to get the ideal index into the hashes array, so later this part of hash may be restored from the hash index and its probes. - // | |- The remaining middle bits of the original hash - // |- 5 (MaxProbeBits) high bits of the Probe count, with the minimal value of b00001 indicating the non-empty slot. - internal int[] _packedHashesAndIndexes; - -#pragma warning disable IDE0044 // it tries to make entries readonly but they should stay modify-able to prevent its defensive struct copying - internal TEntries _entries; -#pragma warning restore IDE0044 - - // todo: @improve how to configure how much we store on stack - internal Entry _e0, _e1, _e2, _e3; - - /// Capacity bits - public int CapacityBitShift => _capacityBitShift; - - /// Access to the hashes and indexes - public int[] PackedHashesAndIndexes => _packedHashesAndIndexes; - - /// Number of entries in the map - public int Count => _count; - - /// Access to the key-value entries - public TEntries Entries => _entries; - - /// Capacity calculates as `1 leftShift capacityBitShift` - public SmallMap4(byte capacityBitShift) - { - _capacityBitShift = capacityBitShift; - - // the overflow tail to the hashes is the size of log2N where N==capacityBitShift, - // it is probably fine to have the check for the overflow of capacity because it will be mis-predicted only once at the end of loop (it even rarely for the lookup) - _packedHashesAndIndexes = new int[1 << capacityBitShift]; - _entries = default; - _entries.Init(capacityBitShift); + return ref RefTools.GetNullRef(); } internal int ResizeHashes(int indexMask) @@ -1402,4 +1290,26 @@ internal int ResizeHashes(int indexMask) return newIndexMask; } } + +/// Type wrapper to minimize the number of generic args to be specified by the end-user +public struct SmallMap4() where TEq : struct, IEq +{ + /// Map with 4 elements on stack and entries baked by the single array + public SmallMap>, SmallMap.SingleArrayEntries> Map; +} + +/// Type wrapper to minimize the number of generic args to be specified by the end-user +public struct SmallMap8() where TEq : struct, IEq +{ + /// Map with 8 elements on stack and entries baked by the single array + public SmallMap>, SmallMap.SingleArrayEntries> Map; +} + +/// Type wrapper to minimize the number of generic args to be specified by the end-user +public struct SmallMap16() where TEq : struct, IEq +{ + /// Map with 16 elements on stack and entries baked by the single array + public SmallMap>, SmallMap.SingleArrayEntries> Map; +} + #nullable restore \ No newline at end of file diff --git a/src/FastExpressionCompiler/TestTools.cs b/src/FastExpressionCompiler/TestTools.cs index f90de3fd..33d69203 100644 --- a/src/FastExpressionCompiler/TestTools.cs +++ b/src/FastExpressionCompiler/TestTools.cs @@ -157,9 +157,7 @@ public static void PrintIL(this IDelegateDebugInfo debugInfo, [CallerMemberName] { if (!AllowPrintIL) return; - SmallMap4, - SmallMap4.SingleArrayEntries> - > uniquePrinted = default; + SmallMap4> uniquePrinted = default; var totalNestedCount = 0; PrintIL(debugInfo, ref totalNestedCount, ref uniquePrinted, tag ?? "top"); @@ -167,14 +165,13 @@ public static void PrintIL(this IDelegateDebugInfo debugInfo, [CallerMemberName] if (totalNestedCount > 0) { Console.WriteLine("--------------------------------------"); - Console.WriteLine($"Nested lambdas total: {totalNestedCount}, unique: {uniquePrinted.Count}"); + Console.WriteLine($"Nested lambdas total: {totalNestedCount}, unique: {uniquePrinted.Map.Count}"); } } private static void PrintIL(IDelegateDebugInfo debugInfo, ref int totalNestedCount, - ref SmallMap4, - SmallMap4.SingleArrayEntries>> uniquePrinted, + ref SmallMap4> uniquePrinted, string tag) { Debug.Assert(tag != null, "tag should not be null"); @@ -184,7 +181,7 @@ private static void PrintIL(IDelegateDebugInfo debugInfo, var n = 0; foreach (var nested in debugInfo.EnumerateNestedLambdas()) { - ref var printedTag = ref uniquePrinted.AddOrGetValueRef(nested, out var printed); + ref var printedTag = ref uniquePrinted.Map.AddOrGetValueRef(nested, out var printed); if (printed) PrintIL($"{printedTag}", "printed already", static (ap, s) => s.Append(ap)); else diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index b353c593..512296f7 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -107,6 +107,7 @@ .NET SDK 9.0.203 | Double_and_Sum_AsSpan | 9.959 ns | 0.2341 ns | 0.4567 ns | 0.64 | 0.04 | 1 | 29 | 0 | 0 | - | NA | | Double_and_Sum_BySwitch | 15.605 ns | 0.3465 ns | 0.7532 ns | 1.00 | 0.07 | 2 | 35 | 0 | 0 | - | NA | + ## Indexer using Unsafe.Add vs AsSpan()[index] | Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | BranchInstructions/Op | BranchMispredictions/Op | CacheMisses/Op | Allocated | Alloc Ratio | From b9f267361973532f4da53d5487701f0d03d34789 Mon Sep 17 00:00:00 2001 From: dadhi Date: Mon, 2 Jun 2025 23:18:48 +0200 Subject: [PATCH 09/32] some funny benchmarks --- src/FastExpressionCompiler/ImTools.cs | 16 +++++++++ .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 33 ++++++++++++------- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 72e91a96..0825e584 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -485,6 +485,22 @@ public ref T GetSurePresentItemRef(int index) return ref Rest[index - stackCap]; } + /// Returns a surely present item ref by its index + [UnscopedRef] + [MethodImpl((MethodImplOptions)256)] + public ref T GetSurePresentItemRef2(int index) + { + Debug.Assert(Count != 0); + Debug.Assert(index >= 0 & index < Count); + + var stackCap = Stack.Capacity; + if (index < stackCap) + return ref Stack.GetSurePresentItemRef(index); + + Debug.Assert(Rest != null); + return ref Rest.GetSurePresentItemRef(index - stackCap); + } + /// Appends the default item to the end of the list and returns the reference to it. [UnscopedRef] [MethodImpl((MethodImplOptions)256)] diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index 512296f7..64414f5d 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -114,6 +114,15 @@ .NET SDK 9.0.203 |----------------------- |---------:|---------:|---------:|------:|--------:|-----:|----------------------:|------------------------:|---------------:|----------:|------------:| | Double_and_Sum_Indexer | 17.29 ns | 0.380 ns | 0.355 ns | 1.00 | 0.03 | 1 | 57 | 0 | 0 | - | NA | | Double_and_Sum_AsSpan | 22.10 ns | 0.311 ns | 0.275 ns | 1.28 | 0.03 | 2 | 57 | 0 | 0 | - | NA | + + + ## Indexer using Rest[] vs. Rest.GetSurePresentItemRef(i) + + | Method | Mean | Error | StdDev | Ratio | RatioSD | Rank | BranchInstructions/Op | BranchMispredictions/Op | CacheMisses/Op | Allocated | Alloc Ratio | + |----------------------- |---------:|---------:|---------:|------:|--------:|-----:|----------------------:|------------------------:|---------------:|----------:|------------:| + | Double_and_Sum_AsSpan | 17.97 ns | 0.454 ns | 1.325 ns | 0.83 | 0.08 | 1 | 41 | 0 | 0 | - | NA | + | Double_and_Sum_Indexer | 21.82 ns | 0.478 ns | 1.309 ns | 1.00 | 0.08 | 2 | 49 | 0 | 0 | - | NA | + */ SmallList> _list; @@ -139,18 +148,18 @@ public int Double_and_Sum_Indexer() return sum; } - // [Benchmark] - // public int Double_and_Sum_AsSpan() - // { - // var sum = 0; - // for (var i = 0; i < _list.Count; i++) - // { - // ref var n = ref _list.GetSurePresentItemRef2(i); - // n += n; - // sum += n; - // } - // return sum; - // } + [Benchmark] + public int Double_and_Sum_AsSpan() + { + var sum = 0; + for (var i = 0; i < _list.Count; i++) + { + ref var n = ref _list.GetSurePresentItemRef2(i); + n += n; + sum += n; + } + return sum; + } } [MemoryDiagnoser, RankColumn, Orderer(BenchmarkDotNet.Order.SummaryOrderPolicy.FastestToSlowest)] From 5012f9ee55119847730ed8c4a2f05ec91ea98bc7 Mon Sep 17 00:00:00 2001 From: dadhi Date: Tue, 3 Jun 2025 12:48:27 +0200 Subject: [PATCH 10/32] small use of GetSurePresentItemRef --- src/FastExpressionCompiler/ImTools.cs | 24 ++++++------------- .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 24 +++++++++---------- 2 files changed, 19 insertions(+), 29 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 0825e584..fde508a4 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -143,7 +143,6 @@ public static ref T AddDefaultAndGetRef(ref T[] items, int index, int initial public static ref T GetSurePresentItemRef(this ref SmallList source, int index) => ref source.Items[index]; - // todo: @perf @wip benchmark this /// Returns surely present item ref by its index without boundary checks [MethodImpl((MethodImplOptions)256)] public static ref T GetSurePresentItemRef(this T[] items, int index) @@ -465,7 +464,7 @@ public ref T this[int index] return ref Stack.GetSurePresentItemRef(index); Debug.Assert(Rest != null, "Expecting deeper items are already existing on heap"); - return ref Rest[index - stackCap]; + return ref Rest.GetSurePresentItemRef(index - stackCap); } } @@ -854,14 +853,8 @@ public void Init(int capacityPowerOfTwoPlease) => /// [MethodImpl((MethodImplOptions)256)] - public ref Entry GetSurePresentEntryRef(int index) - { -#if NET7_0_OR_GREATER - return ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(_entries), index); -#else - return ref _entries[index]; -#endif - } + public ref Entry GetSurePresentEntryRef(int index) => + ref _entries.GetSurePresentItemRef(index); /// [MethodImpl((MethodImplOptions)256)] @@ -869,11 +862,8 @@ public ref V AddKeyAndGetValueRef(K key, int index) { if (index == _entries.Length) Array.Resize(ref _entries, index << 1); -#if NET7_0_OR_GREATER - ref var e = ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(_entries), index); -#else - ref var e = ref _entries[index]; -#endif + + ref var e = ref _entries.GetSurePresentItemRef(index); e.Key = key; return ref e.Value; } @@ -1016,13 +1006,13 @@ private ref V AddOrGetValueRefInEntries(K key, out bool found) private void AddJustHashAndEntryIndexWithoutResizing(int hash, int index) { + var hashIndex = hash & IndexMask; + #if NET7_0_OR_GREATER ref var hashesAndIndexes = ref MemoryMarshal.GetArrayDataReference(_packedHashesAndIndexes); #else var hashesAndIndexes = _packedHashesAndIndexes; #endif - var hashIndex = hash & IndexMask; - // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions ref var h = ref GetHashRef(ref hashesAndIndexes, hashIndex); var probes = 1; diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index 64414f5d..73868c6e 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -148,18 +148,18 @@ public int Double_and_Sum_Indexer() return sum; } - [Benchmark] - public int Double_and_Sum_AsSpan() - { - var sum = 0; - for (var i = 0; i < _list.Count; i++) - { - ref var n = ref _list.GetSurePresentItemRef2(i); - n += n; - sum += n; - } - return sum; - } + // [Benchmark] + // public int Double_and_Sum_AsSpan() + // { + // var sum = 0; + // for (var i = 0; i < _list.Count; i++) + // { + // ref var n = ref _list.GetSurePresentItemRef2(i); + // n += n; + // sum += n; + // } + // return sum; + // } } [MemoryDiagnoser, RankColumn, Orderer(BenchmarkDotNet.Order.SummaryOrderPolicy.FastestToSlowest)] From 05665bd398a33ae51d32d9778067293d601230f1 Mon Sep 17 00:00:00 2001 From: dadhi Date: Tue, 3 Jun 2025 19:39:33 +0200 Subject: [PATCH 11/32] fix the assert with SmallList.RemoveLastSurePresentItem; cleanup --- .../FastExpressionCompiler.cs | 1 - src/FastExpressionCompiler/ILReader.cs | 44 ++++++++++++++----- src/FastExpressionCompiler/ImTools.cs | 26 +++-------- 3 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/FastExpressionCompiler/FastExpressionCompiler.cs b/src/FastExpressionCompiler/FastExpressionCompiler.cs index e783f424..c077775b 100644 --- a/src/FastExpressionCompiler/FastExpressionCompiler.cs +++ b/src/FastExpressionCompiler/FastExpressionCompiler.cs @@ -9804,7 +9804,6 @@ internal static StringBuilder ToCSharpString(this Expression e, sb.Append("new ").Append(e.Type.GetElementType().ToCode(stripNamespace, printType)); sb.Append(e.NodeType == ExpressionType.NewArrayInit ? "[]{" : "["); - // todo: @wip @minor we probably don't each array bound on the new line var exprs = x.Expressions; if (exprs.Count == 1) exprs[0].ToCSharpString(sb, EnclosedIn.AvoidParens, ref named, diff --git a/src/FastExpressionCompiler/ILReader.cs b/src/FastExpressionCompiler/ILReader.cs index 4d3fc781..e8c6b35c 100644 --- a/src/FastExpressionCompiler/ILReader.cs +++ b/src/FastExpressionCompiler/ILReader.cs @@ -336,13 +336,37 @@ internal ILInstruction(int offset, OpCode opCode) } } +// internal struct BaseIL +// { +// public OperandType OperandType; +// public int Offset; +// public OpCode OpCode; + +// // List of possible extras: +// // - InlineNoneInstruction does not have an extra +// // +// // - Stores `int` for +// // OperandType.InlineBrTarget->Delta, +// // OperandType.ShortInlineBrTarget->Delta, +// // OperandType.InlineI->Int32 +// // todo: may be store delta inline as ExtraOpArrayIndex itself +// public const int ExtraDeltasArrayIndex = 1; + +// // This is for OperandType.InlineSwitch +// public const int ExtraSwitchesArrayIndex = 2; + +// public int ExtraOpArrayIndex; +// public int ExtraOpItemIndex; +// } + // todo: @wip -/// Data-oriented structure SOA of IL instructions. -public struct ILs -{ - public SmallList> Offset; - public SmallList> OpCodes; -} +// Data-oriented structure SOA of IL instructions. +// internal struct ILs +// { +// public SmallList> BaseILs; +// public SmallList> Deltas; +// public SmallList<(int[] Deltas, int[] TargetOffsets), Stack2<(int[] Deltas, int[] TargetOffsets)>> Switches; +// } public sealed class InlineNoneInstruction : ILInstruction { @@ -525,15 +549,15 @@ internal InlineTokInstruction(int offset, OpCode opCode, int token, ITokenResolv public sealed class InlineStringInstruction : ILInstruction { public override OperandType OperandType => OperandType.InlineString; - private readonly ITokenResolver _resolver; + // private readonly ITokenResolver _resolver; public int Token { get; } - private string _string; - public string String => _string ??= _resolver.AsString(Token); + // private string _string; + public string String;// => _string ??= _resolver.AsString(Token); internal InlineStringInstruction(int offset, OpCode opCode, int token, ITokenResolver resolver) : base(offset, opCode) { - _resolver = resolver; + String = resolver.AsString(token); Token = token; } } diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index fde508a4..ba15d825 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -473,8 +473,8 @@ public ref T this[int index] [MethodImpl((MethodImplOptions)256)] public ref T GetSurePresentItemRef(int index) { - Debug.Assert(Count != 0); - Debug.Assert(index >= 0 & index < Count); + Debug.Assert(Count != 0, "SmallList.GetSurePresentItemRef: list should not be empty"); + Debug.Assert(index >= 0 & index < Count, $"SmallList.GetSurePresentItemRef: index {index} should be less than Count {Count}"); var stackCap = Stack.Capacity; if (index < stackCap) @@ -484,22 +484,6 @@ public ref T GetSurePresentItemRef(int index) return ref Rest[index - stackCap]; } - /// Returns a surely present item ref by its index - [UnscopedRef] - [MethodImpl((MethodImplOptions)256)] - public ref T GetSurePresentItemRef2(int index) - { - Debug.Assert(Count != 0); - Debug.Assert(index >= 0 & index < Count); - - var stackCap = Stack.Capacity; - if (index < stackCap) - return ref Stack.GetSurePresentItemRef(index); - - Debug.Assert(Rest != null); - return ref Rest.GetSurePresentItemRef(index - stackCap); - } - /// Appends the default item to the end of the list and returns the reference to it. [UnscopedRef] [MethodImpl((MethodImplOptions)256)] @@ -577,9 +561,9 @@ public ref T GetLastSurePresentItem() [MethodImpl((MethodImplOptions)256)] public void RemoveLastSurePresentItem() { - Debug.Assert(Count != 0, "Expecting that the list is not empty"); - var index = --Count; - GetSurePresentItemRef(index) = default; + Debug.Assert(Count != 0, "SmallList.RemoveLastSurePresentItem: Expecting that the list is not empty"); + GetSurePresentItemRef(Count - 1) = default; + --Count; } /// Returns an enumerator struct From 680764e87353a785d9fc6111a3cc7b35750bd1c8 Mon Sep 17 00:00:00 2001 From: dadhi Date: Wed, 4 Jun 2025 10:33:10 +0200 Subject: [PATCH 12/32] simplify ILReader; make it eager tbd --- src/FastExpressionCompiler/ILReader.cs | 341 +++++-------------------- 1 file changed, 59 insertions(+), 282 deletions(-) diff --git a/src/FastExpressionCompiler/ILReader.cs b/src/FastExpressionCompiler/ILReader.cs index e8c6b35c..40c6f0e2 100644 --- a/src/FastExpressionCompiler/ILReader.cs +++ b/src/FastExpressionCompiler/ILReader.cs @@ -85,7 +85,7 @@ public static StringBuilder ToILString(this IEnumerable ilInstruc try { s = line++ > 0 ? s.AppendLine() : s; - s.Append($"{il.Offset,-4}{il.OpCode}"); + Formatter.Label(s, il.Offset).Append(": ").Append(il.OpCode); switch (il.OperandType) { case OperandType.InlineField: @@ -123,22 +123,21 @@ public static StringBuilder ToILString(this IEnumerable ilInstruc s.Append(' ').Append(tok.Member.Name); break; case OperandType.InlineBrTarget: - var br = (InlineBrTargetInstruction)il; - s.Append(' ').Append(br.TargetOffset); + Formatter.Label(s.Append(' '), ((InlineBrTargetInstruction)il).TargetOffset); break; case OperandType.InlineSwitch: var sw = (InlineSwitchInstruction)il; - s.Append(' '); - foreach (var offset in sw.TargetOffsets) - s.Append(offset).Append(','); + Formatter.MultipleLabels(s.Append(" switch "), sw.TargetOffsets); break; case OperandType.ShortInlineBrTarget: var sbr = (ShortInlineBrTargetInstruction)il; s.Append(' ').Append(sbr.TargetOffset); break; + case OperandType.InlineSig: + Formatter.SigByteArrayToString(s.Append(' '), ((InlineSigInstruction)il).Signature); + break; case OperandType.InlineString: - var si = (InlineStringInstruction)il; - s.Append(" \"").Append(si.String).Append('"'); + Formatter.EscapedString(s.Append(' '), ((InlineStringInstruction)il).String); break; case OperandType.ShortInlineI: var sii = (ShortInlineIInstruction)il; @@ -161,12 +160,10 @@ public static StringBuilder ToILString(this IEnumerable ilInstruc s.Append(' ').Append(ir.Double); break; case OperandType.InlineVar: - var iv = (InlineVarInstruction)il; - s.Append(' ').Append(iv.Ordinal); + Formatter.Argument(s.Append(' '), ((InlineVarInstruction)il).Ordinal); break; case OperandType.ShortInlineVar: - var siv = (ShortInlineVarInstruction)il; - s.Append(' ').Append(siv.Ordinal); + Formatter.Argument(s.Append(' '), ((ShortInlineVarInstruction)il).Ordinal); break; default: break; @@ -192,14 +189,15 @@ public sealed class ILReader : IEnumerable static ILReader() { + // Populate the one-byte and two-byte OpCode arrays foreach (var fi in typeof(OpCodes).GetFields(BindingFlags.Public | BindingFlags.Static)) { var opCode = (OpCode)fi.GetValue(null); var value = (ushort)opCode.Value; - if (value < 0x100) + if (value < 0x100) // 0x100 - 256, 0b0000_0000_0000_0000 _oneByteOpCodes[value] = opCode; - else if ((value & 0xff00) == 0xfe00) + else if ((value & 0xff00) == 0xfe00) // 0xFF00 - 0b1111_1111_0000_0000, 0xFE00 - 0b1111_1110_0000_0000 _twoByteOpCodes[value & 0xff] = opCode; } } @@ -236,6 +234,7 @@ private ILInstruction Next(ref int position) ? _oneByteOpCodes[code] : _twoByteOpCodes[ReadByte(ref position)]; + var token = 0; return opCode.OperandType switch { OperandType.InlineNone => new InlineNoneInstruction(offset, opCode), @@ -258,17 +257,17 @@ private ILInstruction Next(ref int position) // 16-bit integer containing the ordinal of a local variable or an argument OperandType.InlineVar => new InlineVarInstruction(offset, opCode, ReadUInt16(ref position)), // 32-bit metadata string token - OperandType.InlineString => new InlineStringInstruction(offset, opCode, ReadInt32(ref position), _resolver), + OperandType.InlineString => new InlineStringInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsString(token)), // 32-bit metadata signature token - OperandType.InlineSig => new InlineSigInstruction(offset, opCode, ReadInt32(ref position), _resolver), + OperandType.InlineSig => new InlineSigInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsSignature(token)), // 32-bit metadata token - OperandType.InlineMethod => new InlineMethodInstruction(offset, opCode, ReadInt32(ref position), _resolver), + OperandType.InlineMethod => new InlineMethodInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsMethod(token)), // 32-bit metadata token - OperandType.InlineField => new InlineFieldInstruction(_resolver, offset, opCode, ReadInt32(ref position)), + OperandType.InlineField => new InlineFieldInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsField(token)), // 32-bit metadata token - OperandType.InlineType => new InlineTypeInstruction(offset, opCode, ReadInt32(ref position), _resolver), + OperandType.InlineType => new InlineTypeInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsType(token)), // FieldRef, MethodRef, or TypeRef token - OperandType.InlineTok => new InlineTokInstruction(offset, opCode, ReadInt32(ref position), _resolver), + OperandType.InlineTok => new InlineTokInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsMember(token)), // 32-bit integer argument to a switch instruction OperandType.InlineSwitch => new InlineSwitchInstruction(offset, opCode, ReadDeltas(ref position)), _ => throw new NotSupportedException($"Unsupported operand type: {opCode.OperandType}"), @@ -359,8 +358,8 @@ internal ILInstruction(int offset, OpCode opCode) // public int ExtraOpItemIndex; // } -// todo: @wip -// Data-oriented structure SOA of IL instructions. +//todo: @wip +///Data-oriented structure SOA of IL instructions. // internal struct ILs // { // public SmallList> BaseILs; @@ -470,62 +469,52 @@ internal ShortInlineRInstruction(int offset, OpCode opCode, float value) public sealed class InlineFieldInstruction : ILInstruction { public override OperandType OperandType => OperandType.InlineField; - private readonly ITokenResolver _resolver; - public int Token { get; } - private FieldInfo _field; - public FieldInfo Field => _field ??= _resolver.AsField(Token); - internal InlineFieldInstruction(ITokenResolver resolver, int offset, OpCode opCode, int token) + public readonly int Token; + public readonly FieldInfo Field; + internal InlineFieldInstruction(int offset, OpCode opCode, int token, FieldInfo field) : base(offset, opCode) { - _resolver = resolver; Token = token; + Field = field; } } public sealed class InlineMethodInstruction : ILInstruction { public override OperandType OperandType => OperandType.InlineMethod; - private readonly ITokenResolver _resolver; - public int Token { get; } - private MethodBase _method; - public MethodBase Method => _method ??= _resolver.AsMethod(Token); + public readonly int Token; + public readonly MethodBase Method; - internal InlineMethodInstruction(int offset, OpCode opCode, int token, ITokenResolver resolver) + internal InlineMethodInstruction(int offset, OpCode opCode, int token, MethodBase method) : base(offset, opCode) { - _resolver = resolver; Token = token; + Method = method; } } public sealed class InlineTypeInstruction : ILInstruction { public override OperandType OperandType => OperandType.InlineType; - private readonly ITokenResolver _resolver; - public int Token { get; } - private Type _type; - public Type Type => _type ??= _resolver.AsType(Token); - - internal InlineTypeInstruction(int offset, OpCode opCode, int token, ITokenResolver resolver) + public readonly int Token; + public readonly Type Type; + internal InlineTypeInstruction(int offset, OpCode opCode, int token, Type type) : base(offset, opCode) { - _resolver = resolver; Token = token; + Type = type; } } public sealed class InlineSigInstruction : ILInstruction { public override OperandType OperandType => OperandType.InlineSig; - private readonly ITokenResolver _resolver; - public int Token { get; } - private byte[] _signature; - public byte[] Signature => _signature ??= _resolver.AsSignature(Token); - - internal InlineSigInstruction(int offset, OpCode opCode, int token, ITokenResolver resolver) + public readonly int Token; + public readonly byte[] Signature; + internal InlineSigInstruction(int offset, OpCode opCode, int token, byte[] signature) : base(offset, opCode) { - _resolver = resolver; + Signature = signature; Token = token; } } @@ -533,31 +522,26 @@ internal InlineSigInstruction(int offset, OpCode opCode, int token, ITokenResolv public sealed class InlineTokInstruction : ILInstruction { public override OperandType OperandType => OperandType.InlineTok; - private readonly ITokenResolver _resolver; - public int Token { get; } - private MemberInfo _member; - public MemberInfo Member => _member ??= _resolver.AsMember(Token); - - internal InlineTokInstruction(int offset, OpCode opCode, int token, ITokenResolver resolver) + public readonly int Token; + public readonly MemberInfo Member; + internal InlineTokInstruction(int offset, OpCode opCode, int token, MemberInfo member) : base(offset, opCode) { - _resolver = resolver; Token = token; + Member = member; } } public sealed class InlineStringInstruction : ILInstruction { public override OperandType OperandType => OperandType.InlineString; - // private readonly ITokenResolver _resolver; - public int Token { get; } - // private string _string; - public string String;// => _string ??= _resolver.AsString(Token); + public readonly int Token; + public readonly string String; - internal InlineStringInstruction(int offset, OpCode opCode, int token, ITokenResolver resolver) + internal InlineStringInstruction(int offset, OpCode opCode, int token, string s) : base(offset, opCode) { - String = resolver.AsString(token); + String = s; Token = token; } } @@ -658,43 +642,29 @@ public byte[] GetByteArray() } } -public interface IFormatter -{ - string Int32ToHex(int int32); - string Int16ToHex(int int16); - string Int8ToHex(int int8); - string Argument(int ordinal); - string EscapedString(string str); - string Label(int offset); - string MultipleLabels(int[] offsets); - string SigByteArrayToString(byte[] sig); -} - -public struct DefaultFormatter : IFormatter +public static class Formatter { - public string Int32ToHex(int int32) => int32.ToString("X8"); - public string Int16ToHex(int int16) => int16.ToString("X4"); - public string Int8ToHex(int int8) => int8.ToString("X2"); - public string Argument(int ordinal) => $"V_{ordinal}"; - public string Label(int offset) => $"IL_{offset:x4}"; + public static StringBuilder Int32ToHex(StringBuilder sb, int int32) => sb.Append(int32.ToString("X8")); + public static StringBuilder Int16ToHex(StringBuilder sb, int int16) => sb.Append(int16.ToString("X4")); + public static StringBuilder Int8ToHex(StringBuilder sb, int int8) => sb.Append(int8.ToString("X2")); + public static StringBuilder Argument(StringBuilder sb, int ordinal) => sb.Append($"V_{ordinal}"); + public static StringBuilder Label(StringBuilder sb, int offset) => sb.Append($"IL_{offset:D4}"); - public string MultipleLabels(int[] offsets) + public static StringBuilder MultipleLabels(StringBuilder sb, int[] offsets) { - var sb = new StringBuilder(); var length = offsets.Length; for (var i = 0; i < length; i++) { sb.AppendFormat(i == 0 ? "(" : ", "); - sb.Append(Label(offsets[i])); + sb.Append(Label(sb, offsets[i])); } sb.AppendFormat(")"); - return sb.ToString(); + return sb; } - public string EscapedString(string str) + public static StringBuilder EscapedString(StringBuilder sb, string str) { var length = str.Length; - var sb = new StringBuilder(length * 2); sb.Append('"'); for (var i = 0; i < length; i++) @@ -716,212 +686,19 @@ public string EscapedString(string str) sb.Append(ch); } sb.Append('"'); - return sb.ToString(); + return sb; } - public string SigByteArrayToString(byte[] sig) + public static StringBuilder SigByteArrayToString(StringBuilder sb, byte[] sig) { - var sb = new StringBuilder(); var length = sig.Length; for (var i = 0; i < length; i++) { sb.AppendFormat(i == 0 ? "SIG [" : " "); - sb.Append(Int8ToHex(sig[i])); + sb.Append(Int8ToHex(sb, sig[i])); } sb.AppendFormat("]"); - return sb.ToString(); - } -} - -// todo: @feat waiting for C# support of the default/optional generic parameters, e.g. for `ReadableILStringProcessor` -public sealed class ReadableILStringProcessor where TFormatter : struct, IFormatter -{ - private static readonly TFormatter _formatProvider = default; - readonly TextWriter _writer; - - public ReadableILStringProcessor(TextWriter writer) => _writer = writer; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void Write(ILInstruction i, string operandString) => - _writer.WriteLine("IL_{0:x4}: {1,-10} {2}", i.Offset, i.OpCode.Name, operandString); - - public void ProcessInstruction(ILInstruction i) - { - switch (i.OperandType) - { - case OperandType.InlineBrTarget: - Write(i, _formatProvider.Label(((InlineBrTargetInstruction)i).TargetOffset)); - break; - case OperandType.InlineField: - var inlineField = (InlineFieldInstruction)i; - string field; - try - { - field = inlineField.Field + "/" + inlineField.Field.DeclaringType; - } - catch (Exception ex) - { - field = "!" + ex.Message + "!"; - } - Write(i, field); - break; - case OperandType.InlineI: - Write(i, ((InlineIInstruction)i).Int32.ToString()); - break; - case OperandType.InlineI8: - Write(i, ((InlineI8Instruction)i).Int64.ToString()); - break; - case OperandType.InlineMethod: - var inlineMethod = (InlineMethodInstruction)i; - string method; - try - { - method = inlineMethod.Method + "/" + inlineMethod.Method.DeclaringType; - } - catch (Exception ex) - { - method = "!" + ex.Message + "!"; - } - Write(i, method); - break; - case OperandType.InlineNone: - Write(i, string.Empty); - break; - case OperandType.InlineR: - Write(i, ((InlineRInstruction)i).Double.ToString()); - break; - case OperandType.InlineSig: - Write(i, _formatProvider.SigByteArrayToString(((InlineSigInstruction)i).Signature)); - break; - case OperandType.InlineString: - Write(i, _formatProvider.EscapedString(((InlineStringInstruction)i).String)); - break; - case OperandType.InlineSwitch: - var inlineSwitch = (InlineSwitchInstruction)i; - Write(i, _formatProvider.MultipleLabels(inlineSwitch.TargetOffsets)); - break; - case OperandType.InlineTok: - var inlineTok = (InlineTokInstruction)i; - string member; - try - { - member = inlineTok.Member + "/" + inlineTok.Member.DeclaringType; - } - catch (Exception ex) - { - member = "!" + ex.Message + "!"; - } - Write(i, member); - break; - case OperandType.InlineType: - var inlineType = (InlineTypeInstruction)i; - string type; - try - { - type = inlineType.Type.ToString(); - } - catch (Exception ex) - { - type = "!" + ex.Message + "!"; - } - Write(i, type); - break; - case OperandType.InlineVar: - var inlineVar = (InlineVarInstruction)i; - Write(i, _formatProvider.Argument(inlineVar.Ordinal)); - break; - case OperandType.ShortInlineBrTarget: - var shortInlineBrTarget = (ShortInlineBrTargetInstruction)i; - Write(i, _formatProvider.Label(shortInlineBrTarget.TargetOffset)); - break; - case OperandType.ShortInlineI: - Write(i, ((ShortInlineIInstruction)i).Byte.ToString()); - break; - case OperandType.ShortInlineR: - Write(i, ((ShortInlineRInstruction)i).Single.ToString()); - break; - case OperandType.ShortInlineVar: - var shortInlineVar = (ShortInlineVarInstruction)i; - Write(i, _formatProvider.Argument(shortInlineVar.Ordinal)); - break; - default: - Debug.Fail("all cases are covered above, so it is not expected to reach here"); - break; - } - } -} - -public sealed class RawILStringProcessor where TFormatter : struct, IFormatter -{ - static readonly TFormatter _formatter = default; - readonly ReadableILStringProcessor _fallbackProcessor; - readonly TextWriter _writer; - - public RawILStringProcessor(TextWriter writer) - { - _fallbackProcessor = new ReadableILStringProcessor(writer); - _writer = writer; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void Write(ILInstruction i, string operandString) => - _writer.WriteLine("IL_{0:x4}: {1,-4:x2}| {2, -8}", i.Offset, i.OpCode.Value, operandString); - - public void ProcessInstruction(ILInstruction i) - { - switch (i.OperandType) - { - case OperandType.InlineBrTarget: - Write(i, _formatter.Int32ToHex(((InlineBrTargetInstruction)i).TargetOffset)); - break; - case OperandType.InlineField: - Write(i, _formatter.Int32ToHex(((InlineFieldInstruction)i).Token)); - break; - case OperandType.InlineI: - case OperandType.InlineI8: - _fallbackProcessor.ProcessInstruction(i); - break; - case OperandType.InlineMethod: - Write(i, _formatter.Int32ToHex(((InlineMethodInstruction)i).Token)); - break; - case OperandType.InlineNone: - case OperandType.InlineR: - _fallbackProcessor.ProcessInstruction(i); - break; - case OperandType.InlineSig: - Write(i, _formatter.Int32ToHex(((InlineSigInstruction)i).Token)); - break; - case OperandType.InlineString: - Write(i, _formatter.Int32ToHex(((InlineStringInstruction)i).Token)); - break; - case OperandType.InlineSwitch: - Write(i, "..."); - break; - case OperandType.InlineTok: - Write(i, _formatter.Int32ToHex(((InlineTokInstruction)i).Token)); - break; - case OperandType.InlineType: - Write(i, _formatter.Int32ToHex(((InlineTypeInstruction)i).Token)); - break; - case OperandType.InlineVar: - Write(i, _formatter.Int16ToHex(((InlineVarInstruction)i).Ordinal)); - break; - case OperandType.ShortInlineBrTarget: - Write(i, _formatter.Int8ToHex(((ShortInlineBrTargetInstruction)i).Delta)); - break; - case OperandType.ShortInlineI: - Write(i, _formatter.Int8ToHex(((ShortInlineIInstruction)i).Byte)); - break; - case OperandType.ShortInlineR: - _fallbackProcessor.ProcessInstruction(i); - break; - case OperandType.ShortInlineVar: - Write(i, _formatter.Int8ToHex(((ShortInlineVarInstruction)i).Ordinal)); - break; - default: - Debug.Fail("all cases are covered above, so it is not expected to reach here"); - break; - } + return sb; } } From 7a47bf8156b55f9c9d66a049c78bd5f037f1c7a0 Mon Sep 17 00:00:00 2001 From: dadhi Date: Wed, 4 Jun 2025 10:47:19 +0200 Subject: [PATCH 13/32] improv --- src/FastExpressionCompiler/ILReader.cs | 114 ++++++++++++------------- 1 file changed, 53 insertions(+), 61 deletions(-) diff --git a/src/FastExpressionCompiler/ILReader.cs b/src/FastExpressionCompiler/ILReader.cs index 40c6f0e2..aad9b7e3 100644 --- a/src/FastExpressionCompiler/ILReader.cs +++ b/src/FastExpressionCompiler/ILReader.cs @@ -85,9 +85,15 @@ public static StringBuilder ToILString(this IEnumerable ilInstruc try { s = line++ > 0 ? s.AppendLine() : s; - Formatter.Label(s, il.Offset).Append(": ").Append(il.OpCode); + ILFormatter.Label(s, il.Offset).Append(": ").Append(il.OpCode); switch (il.OperandType) { + case OperandType.InlineBrTarget: + ILFormatter.Label(s.Append(' '), ((InlineBrTargetInstruction)il).TargetOffset); + break; + case OperandType.InlineI: + s.Append(' ').Append(((InlineIInstruction)il).Int32); + break; case OperandType.InlineField: var f = (InlineFieldInstruction)il; s.Append(' ') @@ -95,6 +101,9 @@ public static StringBuilder ToILString(this IEnumerable ilInstruc .AppendTypeName(f.Field.DeclaringType).Append('.') .Append(f.Field.Name); break; + case OperandType.InlineI8: + s.Append(' ').Append(((InlineI8Instruction)il).Int64); + break; case OperandType.InlineMethod: var m = (InlineMethodInstruction)il; var sig = m.Method.ToString(); @@ -114,56 +123,40 @@ public static StringBuilder ToILString(this IEnumerable ilInstruc else s.Append(' ').AppendTypeName(m.Method.DeclaringType).Append('.').Append(sig); break; - case OperandType.InlineType: - var t = (InlineTypeInstruction)il; - s.Append(' ').AppendTypeName(t.Type); - break; - case OperandType.InlineTok: - var tok = (InlineTokInstruction)il; - s.Append(' ').Append(tok.Member.Name); - break; - case OperandType.InlineBrTarget: - Formatter.Label(s.Append(' '), ((InlineBrTargetInstruction)il).TargetOffset); + case OperandType.InlineNone: break; - case OperandType.InlineSwitch: - var sw = (InlineSwitchInstruction)il; - Formatter.MultipleLabels(s.Append(" switch "), sw.TargetOffsets); - break; - case OperandType.ShortInlineBrTarget: - var sbr = (ShortInlineBrTargetInstruction)il; - s.Append(' ').Append(sbr.TargetOffset); + case OperandType.InlineR: + s.Append(' ').Append(((InlineRInstruction)il).Double); break; case OperandType.InlineSig: - Formatter.SigByteArrayToString(s.Append(' '), ((InlineSigInstruction)il).Signature); + ILFormatter.SigByteArrayToString(s.Append(' '), ((InlineSigInstruction)il).Signature); break; case OperandType.InlineString: - Formatter.EscapedString(s.Append(' '), ((InlineStringInstruction)il).String); + ILFormatter.EscapedString(s.Append(' '), ((InlineStringInstruction)il).String); break; - case OperandType.ShortInlineI: - var sii = (ShortInlineIInstruction)il; - s.Append(' ').Append(sii.Byte); + case OperandType.InlineSwitch: + ILFormatter.MultipleLabels(s.Append(" switch "), ((InlineSwitchInstruction)il).TargetOffsets); break; - case OperandType.InlineI: - var ii = (InlineIInstruction)il; - s.Append(' ').Append(ii.Int32); + case OperandType.InlineTok: + s.Append(' ').Append(((InlineTokInstruction)il).Member.Name); break; - case OperandType.InlineI8: - var i8 = (InlineI8Instruction)il; - s.Append(' ').Append(i8.Int64); + case OperandType.InlineType: + s.Append(' ').AppendTypeName(((InlineTypeInstruction)il).Type); break; - case OperandType.ShortInlineR: - var sir = (ShortInlineRInstruction)il; - s.Append(' ').Append(sir.Single); + case OperandType.InlineVar: + ILFormatter.Argument(s.Append(' '), ((InlineVarInstruction)il).Ordinal); break; - case OperandType.InlineR: - var ir = (InlineRInstruction)il; - s.Append(' ').Append(ir.Double); + case OperandType.ShortInlineBrTarget: + s.Append(' ').Append(((ShortInlineBrTargetInstruction)il).TargetOffset); break; - case OperandType.InlineVar: - Formatter.Argument(s.Append(' '), ((InlineVarInstruction)il).Ordinal); + case OperandType.ShortInlineI: + s.Append(' ').Append(((ShortInlineIInstruction)il).Byte); + break; + case OperandType.ShortInlineR: + s.Append(' ').Append(((ShortInlineRInstruction)il).Single); break; case OperandType.ShortInlineVar: - Formatter.Argument(s.Append(' '), ((ShortInlineVarInstruction)il).Ordinal); + ILFormatter.Argument(s.Append(' '), ((ShortInlineVarInstruction)il).Ordinal); break; default: break; @@ -237,39 +230,39 @@ private ILInstruction Next(ref int position) var token = 0; return opCode.OperandType switch { - OperandType.InlineNone => new InlineNoneInstruction(offset, opCode), - // 8-bit integer branch target - OperandType.ShortInlineBrTarget => new ShortInlineBrTargetInstruction(offset, opCode, ReadSByte(ref position)), // 32-bit integer branch target OperandType.InlineBrTarget => new InlineBrTargetInstruction(offset, opCode, ReadInt32(ref position)), - // 8-bit integer: 001F ldc.i4.s, FE12 unaligned. - OperandType.ShortInlineI => new ShortInlineIInstruction(offset, opCode, ReadByte(ref position)), + // 32-bit metadata token + OperandType.InlineField => new InlineFieldInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsField(token)), // 32-bit integer OperandType.InlineI => new InlineIInstruction(offset, opCode, ReadInt32(ref position)), // 64-bit integer OperandType.InlineI8 => new InlineI8Instruction(offset, opCode, ReadInt64(ref position)), - // 32-bit IEEE floating point number - OperandType.ShortInlineR => new ShortInlineRInstruction(offset, opCode, ReadSingle(ref position)), + // 32-bit metadata token + OperandType.InlineMethod => new InlineMethodInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsMethod(token)), + OperandType.InlineNone => new InlineNoneInstruction(offset, opCode), // 64-bit IEEE floating point number OperandType.InlineR => new InlineRInstruction(offset, opCode, ReadDouble(ref position)), - // 8-bit integer containing the ordinal of a local variable or an argument - OperandType.ShortInlineVar => new ShortInlineVarInstruction(offset, opCode, ReadByte(ref position)), - // 16-bit integer containing the ordinal of a local variable or an argument - OperandType.InlineVar => new InlineVarInstruction(offset, opCode, ReadUInt16(ref position)), - // 32-bit metadata string token - OperandType.InlineString => new InlineStringInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsString(token)), // 32-bit metadata signature token OperandType.InlineSig => new InlineSigInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsSignature(token)), - // 32-bit metadata token - OperandType.InlineMethod => new InlineMethodInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsMethod(token)), - // 32-bit metadata token - OperandType.InlineField => new InlineFieldInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsField(token)), - // 32-bit metadata token - OperandType.InlineType => new InlineTypeInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsType(token)), - // FieldRef, MethodRef, or TypeRef token - OperandType.InlineTok => new InlineTokInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsMember(token)), + // 32-bit metadata string token + OperandType.InlineString => new InlineStringInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsString(token)), // 32-bit integer argument to a switch instruction OperandType.InlineSwitch => new InlineSwitchInstruction(offset, opCode, ReadDeltas(ref position)), + // FieldRef, MethodRef, or TypeRef token + OperandType.InlineTok => new InlineTokInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsMember(token)), + // 32-bit metadata token + OperandType.InlineType => new InlineTypeInstruction(offset, opCode, token = ReadInt32(ref position), _resolver.AsType(token)), + // 16-bit integer containing the ordinal of a local variable or an argument + OperandType.InlineVar => new InlineVarInstruction(offset, opCode, ReadUInt16(ref position)), + // 8-bit integer branch target + OperandType.ShortInlineBrTarget => new ShortInlineBrTargetInstruction(offset, opCode, ReadSByte(ref position)), + // 8-bit integer: 001F ldc.i4.s, FE12 unaligned. + OperandType.ShortInlineI => new ShortInlineIInstruction(offset, opCode, ReadByte(ref position)), + // 32-bit IEEE floating point number + OperandType.ShortInlineR => new ShortInlineRInstruction(offset, opCode, ReadSingle(ref position)), + // 8-bit integer containing the ordinal of a local variable or an argument + OperandType.ShortInlineVar => new ShortInlineVarInstruction(offset, opCode, ReadByte(ref position)), _ => throw new NotSupportedException($"Unsupported operand type: {opCode.OperandType}"), }; } @@ -370,7 +363,6 @@ internal ILInstruction(int offset, OpCode opCode) public sealed class InlineNoneInstruction : ILInstruction { public override OperandType OperandType => OperandType.InlineNone; - internal InlineNoneInstruction(int offset, OpCode opCode) : base(offset, opCode) { } } @@ -642,7 +634,7 @@ public byte[] GetByteArray() } } -public static class Formatter +public static class ILFormatter { public static StringBuilder Int32ToHex(StringBuilder sb, int int32) => sb.Append(int32.ToString("X8")); public static StringBuilder Int16ToHex(StringBuilder sb, int int16) => sb.Append(int16.ToString("X4")); From 5c31ad337317b4dafd43f4d1e792a1c0d4b60e38 Mon Sep 17 00:00:00 2001 From: dadhi Date: Wed, 4 Jun 2025 11:32:05 +0200 Subject: [PATCH 14/32] minimize laziness for ILReader --- src/FastExpressionCompiler/ILReader.cs | 105 ++++++++++--------------- 1 file changed, 40 insertions(+), 65 deletions(-) diff --git a/src/FastExpressionCompiler/ILReader.cs b/src/FastExpressionCompiler/ILReader.cs index aad9b7e3..0f93fc48 100644 --- a/src/FastExpressionCompiler/ILReader.cs +++ b/src/FastExpressionCompiler/ILReader.cs @@ -227,7 +227,7 @@ private ILInstruction Next(ref int position) ? _oneByteOpCodes[code] : _twoByteOpCodes[ReadByte(ref position)]; - var token = 0; + int token; return opCode.OperandType switch { // 32-bit integer branch target @@ -269,9 +269,9 @@ private ILInstruction Next(ref int position) private int[] ReadDeltas(ref int position) { - var cases = ReadInt32(ref position); - var deltas = new int[cases]; - for (var i = 0; i < cases; i++) + var caseCount = ReadInt32(ref position); + var deltas = new int[caseCount]; + for (var i = 0; i < caseCount; i++) deltas[i] = ReadInt32(ref position); return deltas; } @@ -370,9 +370,8 @@ internal InlineNoneInstruction(int offset, OpCode opCode) public sealed class InlineBrTargetInstruction : ILInstruction { public override OperandType OperandType => OperandType.InlineBrTarget; - public int Delta { get; } + public readonly int Delta; public int TargetOffset => Offset + Delta + 1 + 4; - internal InlineBrTargetInstruction(int offset, OpCode opCode, int delta) : base(offset, opCode) => Delta = delta; } @@ -380,7 +379,7 @@ internal InlineBrTargetInstruction(int offset, OpCode opCode, int delta) public sealed class ShortInlineBrTargetInstruction : ILInstruction { public override OperandType OperandType => OperandType.ShortInlineBrTarget; - public sbyte Delta { get; } + public readonly sbyte Delta; public int TargetOffset => Offset + Delta + 1 + 1; internal ShortInlineBrTargetInstruction(int offset, OpCode opCode, sbyte delta) : base(offset, opCode) => Delta = delta; @@ -389,35 +388,27 @@ internal ShortInlineBrTargetInstruction(int offset, OpCode opCode, sbyte delta) public sealed class InlineSwitchInstruction : ILInstruction { public override OperandType OperandType => OperandType.InlineSwitch; - private readonly int[] _deltas; - private int[] _targetOffsets; - + public readonly int[] Deltas; + public readonly int[] TargetOffsets; internal InlineSwitchInstruction(int offset, OpCode opCode, int[] deltas) - : base(offset, opCode) => _deltas = deltas; + : base(offset, opCode) + { + Deltas = deltas; - public int[] Deltas => (int[])_deltas.Clone(); + var caseCount = deltas.Length; + var itself = 1 + 4 + 4 * caseCount; + var targetOffsets = new int[caseCount]; + for (var i = 0; i < caseCount; i++) + targetOffsets[i] = Offset + deltas[i] + itself; - public int[] TargetOffsets - { - get - { - if (_targetOffsets == null) - { - var cases = _deltas.Length; - var itself = 1 + 4 + 4 * cases; - _targetOffsets = new int[cases]; - for (var i = 0; i < cases; i++) - _targetOffsets[i] = Offset + _deltas[i] + itself; - } - return _targetOffsets; - } + TargetOffsets = targetOffsets; } } public sealed class InlineIInstruction : ILInstruction { public override OperandType OperandType => OperandType.InlineI; - public int Int32 { get; } + public readonly int Int32; internal InlineIInstruction(int offset, OpCode opCode, int value) : base(offset, opCode) => Int32 = value; } @@ -425,8 +416,7 @@ internal InlineIInstruction(int offset, OpCode opCode, int value) public sealed class InlineI8Instruction : ILInstruction { public override OperandType OperandType => OperandType.InlineI8; - public long Int64 { get; } - + public readonly long Int64; internal InlineI8Instruction(int offset, OpCode opCode, long value) : base(offset, opCode) => Int64 = value; } @@ -434,8 +424,7 @@ internal InlineI8Instruction(int offset, OpCode opCode, long value) public sealed class ShortInlineIInstruction : ILInstruction { public override OperandType OperandType => OperandType.ShortInlineI; - public byte Byte { get; } - + public readonly byte Byte; internal ShortInlineIInstruction(int offset, OpCode opCode, byte value) : base(offset, opCode) => Byte = value; } @@ -443,8 +432,7 @@ internal ShortInlineIInstruction(int offset, OpCode opCode, byte value) public class InlineRInstruction : ILInstruction { public override OperandType OperandType => OperandType.InlineR; - public double Double { get; } - + public readonly double Double; internal InlineRInstruction(int offset, OpCode opCode, double value) : base(offset, opCode) => Double = value; } @@ -452,8 +440,7 @@ internal InlineRInstruction(int offset, OpCode opCode, double value) public sealed class ShortInlineRInstruction : ILInstruction { public override OperandType OperandType => OperandType.ShortInlineR; - public float Single { get; } - + public readonly float Single; internal ShortInlineRInstruction(int offset, OpCode opCode, float value) : base(offset, opCode) => Single = value; } @@ -476,7 +463,6 @@ public sealed class InlineMethodInstruction : ILInstruction public override OperandType OperandType => OperandType.InlineMethod; public readonly int Token; public readonly MethodBase Method; - internal InlineMethodInstruction(int offset, OpCode opCode, int token, MethodBase method) : base(offset, opCode) { @@ -529,7 +515,6 @@ public sealed class InlineStringInstruction : ILInstruction public override OperandType OperandType => OperandType.InlineString; public readonly int Token; public readonly string String; - internal InlineStringInstruction(int offset, OpCode opCode, int token, string s) : base(offset, opCode) { @@ -541,7 +526,7 @@ internal InlineStringInstruction(int offset, OpCode opCode, int token, string s) public sealed class InlineVarInstruction : ILInstruction { public override OperandType OperandType => OperandType.InlineVar; - public ushort Ordinal { get; } + public readonly ushort Ordinal; internal InlineVarInstruction(int offset, OpCode opCode, ushort ordinal) : base(offset, opCode) => Ordinal = ordinal; } @@ -549,8 +534,7 @@ internal InlineVarInstruction(int offset, OpCode opCode, ushort ordinal) public sealed class ShortInlineVarInstruction : ILInstruction { public override OperandType OperandType => OperandType.ShortInlineVar; - public byte Ordinal { get; } - + public readonly byte Ordinal; internal ShortInlineVarInstruction(int offset, OpCode opCode, byte ordinal) : base(offset, opCode) => Ordinal = ordinal; } @@ -566,8 +550,7 @@ public class MethodBaseILProvider : IILProvider private static readonly Type _runtimeMethodInfoType = Type.GetType("System.Reflection.RuntimeMethodInfo"); private static readonly Type _runtimeConstructorInfoType = Type.GetType("System.Reflection.RuntimeConstructorInfo"); - private readonly MethodBase _method; - private byte[] _byteArray; + private readonly byte[] _byteArray; public MethodBaseILProvider(MethodBase method) { @@ -578,13 +561,10 @@ public MethodBaseILProvider(MethodBase method) if (methodType != _runtimeMethodInfoType & methodType != _runtimeConstructorInfoType) throw new ArgumentException("Must have type RuntimeMethodInfo or RuntimeConstructorInfo.", nameof(method)); - _method = method; + _byteArray = method.GetMethodBody()?.GetILAsByteArray() ?? []; } - public byte[] GetByteArray() - { - return _byteArray ??= _method.GetMethodBody()?.GetILAsByteArray() ?? []; - } + public byte[] GetByteArray() => _byteArray; } [UnconditionalSuppressMessage("Trimming", "IL2026:Members annotated with 'RequiresUnreferencedCodeAttribute' require dynamic access otherwise can break functionality when trimming application code", Justification = "Uses reflection on internal types and is not trim-compatible.")] @@ -609,29 +589,24 @@ public class DynamicMethodILProvider : IILProvider private static readonly MethodInfo _miBakeByteArray = _runtimeILGeneratorType.GetMethod("BakeByteArray", BindingFlags.NonPublic | BindingFlags.Instance); - private readonly DynamicMethod _method; - private byte[] _byteArray; - - public DynamicMethodILProvider(DynamicMethod method) => _method = method; + private readonly byte[] _byteArray; - public byte[] GetByteArray() + public DynamicMethodILProvider(DynamicMethod method) { - if (_byteArray == null) + var ilgen = method.GetILGenerator(); + try { - var ilgen = _method.GetILGenerator(); - try - { - _byteArray = (byte[])_miBakeByteArray.Invoke(ilgen, null) ?? []; - } - catch (TargetInvocationException) - { - var length = (int)_fiLen.GetValue(ilgen); - _byteArray = new byte[length]; - Array.Copy((byte[])_fiStream.GetValue(ilgen), _byteArray, length); - } + _byteArray = (byte[])_miBakeByteArray.Invoke(ilgen, null) ?? []; + } + catch (TargetInvocationException) + { + var length = (int)_fiLen.GetValue(ilgen); + _byteArray = new byte[length]; + Array.Copy((byte[])_fiStream.GetValue(ilgen), _byteArray, length); } - return _byteArray; } + + public byte[] GetByteArray() => _byteArray; } public static class ILFormatter From 139d658c7bd27f63e677954a6058b11138286c42 Mon Sep 17 00:00:00 2001 From: dadhi Date: Thu, 5 Jun 2025 09:48:57 +0200 Subject: [PATCH 15/32] TEntry type for the SmallMap --- .../FastExpressionCompiler.cs | 2 +- src/FastExpressionCompiler/ImTools.cs | 186 +++++++++++------- 2 files changed, 114 insertions(+), 74 deletions(-) diff --git a/src/FastExpressionCompiler/FastExpressionCompiler.cs b/src/FastExpressionCompiler/FastExpressionCompiler.cs index c077775b..be33dd32 100644 --- a/src/FastExpressionCompiler/FastExpressionCompiler.cs +++ b/src/FastExpressionCompiler/FastExpressionCompiler.cs @@ -1368,7 +1368,7 @@ public static Result TryCollectInfo(ref ClosureInfo closure, Expression expr, if (hasComplexExpression) { closure.HasComplexExpression = true; - closure.ArgsContainingComplexExpression.Map.AddOrGetValueRef(newExpr, out _); + closure.ArgsContainingComplexExpression.Map.AddOrGetEntryRef(newExpr, out _); } return r; diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index ba15d825..b870bfae 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -762,13 +762,22 @@ public static class SmallMap // Window with the hash mask wothout the lead ProbeMask and closing IndexMask 0b00000111111111111111111111110000 internal const int HashMask = HashAndIndexMask & ~IndexMask; + /// Represent a keyed entry stored in the SmallMap. + /// Its implementation struct may include the additional Value for the Map or just the Key for the Set. + /// The implementation may also decide to make Value readonly or writable for the in-place update + public interface IEntry + { + /// Returns the key of the payload + K Key { get; internal set; } + } + /// Holds a single entry consisting of key and value. /// Value may be set or changed but the key is set in stone (by construction). [DebuggerDisplay("{Key?.ToString()}->{Value}")] - public struct Entry + public struct Entry : IEntry { /// The readonly key - public K Key; + public K Key { get; set; } /// The mutable value public V Value; /// Construct with the key and default value @@ -781,14 +790,6 @@ public Entry(K key, V value) } } - /// Creates the map with the storage - [MethodImpl((MethodImplOptions)256)] - public static SmallMap> - New(byte capacityBitShift = 0) - where TEq : struct, IEq - where TStack : struct, IStack, TStack> - => new(capacityBitShift); - /// Binary representation of the `int` public static string ToB(int x) => System.Convert.ToString(x, 2).PadLeft(32, '0'); @@ -806,18 +807,19 @@ public static SmallMap> internal static int GetHash(ref int[] start, int distance) => start[distance]; #endif - // todo: @perf can we move the Entry into the type parameter to configure and possibly save the memory e.g. for the sets? /// Abstraction to configure your own entries data structure. Check the derived types for the examples - public interface IEntries where TEq : IEq + public interface IEntries + where TEntry : struct, IEntry + where TEq : IEq { /// Initializes the entries storage to the specified capacity void Init(int capacityPowerOfTwoPlease); /// Returns the reference to entry by its index, index should map to the present/non-removed entry - ref Entry GetSurePresentEntryRef(int index); + ref TEntry GetSurePresentEntryRef(int index); /// Adds the key at the "end" of entries - so the order of addition is preserved. - ref V AddKeyAndGetValueRef(K key, int index); + ref TEntry AddKeyAndGetEntryRef(K key, int index); } internal const int MinEntriesCapacity = 2; @@ -827,31 +829,68 @@ public readonly struct NoValue { } /// Stores the entries in a single dynamically reallocated growing array [DebuggerDisplay("{Capacity:_entries?.Length ?? 0} of {_entries?[0]}, {_entries?[1]}, ...")] - public struct SingleArrayEntries : IEntries where TEq : struct, IEq + public struct SingleArrayEntries : IEntries + where TEntry : struct, IEntry + where TEq : struct, IEq { - internal Entry[] _entries; + internal TEntry[] _entries; /// public void Init(int capacityPowerOfTwoPlease) => - _entries = new Entry[capacityPowerOfTwoPlease]; + _entries = new TEntry[capacityPowerOfTwoPlease]; /// [MethodImpl((MethodImplOptions)256)] - public ref Entry GetSurePresentEntryRef(int index) => + public ref TEntry GetSurePresentEntryRef(int index) => ref _entries.GetSurePresentItemRef(index); /// [MethodImpl((MethodImplOptions)256)] - public ref V AddKeyAndGetValueRef(K key, int index) + public ref TEntry AddKeyAndGetEntryRef(K key, int index) { if (index == _entries.Length) Array.Resize(ref _entries, index << 1); ref var e = ref _entries.GetSurePresentItemRef(index); e.Key = key; - return ref e.Value; + return ref e; } } + + /// Gets the ref to the existing entry.Value by the provided key (found == true), + /// or adds a new entry (found == false) and returns it.Value by ref. + /// So the method always return a non-null ref to the value, either existing or added + [MethodImpl((MethodImplOptions)256)] + public static ref V AddOrGetValueRef( + this ref SmallMap, TEq, TStackEntries, TEntries> map, K key, out bool found) + where TEq : struct, IEq + where TStackEntries : struct, IStack, TStackEntries> + where TEntries : struct, IEntries, TEq> => + ref map.AddOrGetEntryRef(key, out found).Value; + + /// Adds an entry for sure absent key. + /// Provides the performance in scenarios where you look for the present key, and using it, and if ABSENT then add the new one. + /// So this method optimized NOT to look for the present item for the second time + [MethodImpl((MethodImplOptions)256)] + public static ref V AddSureAbsentDefaultAndGetRef( + this ref SmallMap, TEq, TStackEntries, TEntries> map, K key) + where TEq : struct, IEq + where TStackEntries : struct, IStack, TStackEntries> + where TEntries : struct, IEntries, TEq> + => ref map.AddSureAbsentDefaultEntryAndGetRef(key).Value; + + /// Lookups for the stored entry by key. Returns the ref to the found entry.Value or the null ref + [MethodImpl((MethodImplOptions)256)] + public static ref V TryGetValueRef( + this ref SmallMap, TEq, TStackEntries, TEntries> map, K key, out bool found) + where TEq : struct, IEq + where TStackEntries : struct, IStack, TStackEntries> + where TEntries : struct, IEntries, TEq> + { + ref var e = ref map.TryGetEntryRef(key, out found); + if (found) return ref e.Value; + return ref RefTools.GetNullRef(); + } } // todo: @improve ? how/where to add SIMD to improve CPU utilization but not losing perf for smaller sizes @@ -870,10 +909,11 @@ public ref V AddKeyAndGetValueRef(K key, int index) /// /// [DebuggerDisplay("{Count} of {_e0}, {_e1}, {_e2}, {_e3}, ...")] -public struct SmallMap +public struct SmallMap + where TEntry : struct, IEntry where TEq : struct, IEq - where TStack : struct, IStack, TStack> - where TEntries : struct, IEntries + where TStackEntries : struct, IStack + where TEntries : struct, IEntries { internal byte _capacityBitShift; internal int _count; @@ -890,7 +930,7 @@ public struct SmallMap internal TEntries _entries; #pragma warning restore IDE0044 #pragma warning disable CS0649 // Field 'SmallMap.Stack' is never assigned to, and will always have its default value - internal TStack Stack; + internal TStackEntries StackEntries; #pragma warning restore CS0649 /// Capacity bits @@ -921,17 +961,17 @@ public SmallMap(byte capacityBitShift) /// Important: it does not check the index bounds, so you need to check that the index is from 0 to map.Count-1 [UnscopedRef] [MethodImpl((MethodImplOptions)256)] - public ref Entry GetSurePresentEntryRef(int index) + public ref TEntry GetSurePresentEntryRef(int index) { Debug.Assert(index >= 0); Debug.Assert(index < _count); - if (index >= Stack.Capacity) - return ref _entries.GetSurePresentEntryRef(index - Stack.Capacity); - return ref Stack.GetSurePresentItemRef(index); + if (index >= StackEntries.Capacity) + return ref _entries.GetSurePresentEntryRef(index - StackEntries.Capacity); + return ref StackEntries.GetSurePresentItemRef(index); } [UnscopedRef] - private ref V AddOrGetValueRefInEntries(K key, out bool found) + private ref TEntry AddOrGetRefInEntries(K key, out bool found) { // if the free space is less than 1/8 of capacity (12.5%) then Resize var indexMask = (1 << _capacityBitShift) - 1; @@ -959,7 +999,7 @@ private ref V AddOrGetValueRefInEntries(K key, out bool found) { ref var e = ref GetSurePresentEntryRef(h & indexMask); if (found = default(TEq).Equals(e.Key, key)) - return ref e.Value; + return ref e; } h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); ++probes; @@ -985,7 +1025,7 @@ private ref V AddOrGetValueRefInEntries(K key, out bool found) } } - return ref _entries.AddKeyAndGetValueRef(key, (_count++) - Stack.Capacity); + return ref _entries.AddKeyAndGetEntryRef(key, (_count++) - StackEntries.Capacity); } private void AddJustHashAndEntryIndexWithoutResizing(int hash, int index) @@ -1026,13 +1066,13 @@ private void AddJustHashAndEntryIndexWithoutResizing(int hash, int index) } } - /// Gets the reference to the existing value by the provided key (found == true), - /// or adds a new key-value pair (found == false) and allows to set the returned value. + /// Gets the ref to the existing entry by the provided key (found == true), + /// or adds a new entry (found == false) and returns it by ref [UnscopedRef] - public ref V AddOrGetValueRef(K key, out bool found) + public ref TEntry AddOrGetEntryRef(K key, out bool found) { - if (_count > Stack.Capacity) - return ref AddOrGetValueRefInEntries(key, out found); + if (_count > StackEntries.Capacity) + return ref AddOrGetRefInEntries(key, out found); // Linear search in stack (which has a few items) by comparing the keys without calculating the hashes // Saving on the hash calculation. Losing on the bigger number of comparisons. @@ -1040,17 +1080,17 @@ public ref V AddOrGetValueRef(K key, out bool found) { ref var e = ref GetSurePresentEntryRef(i); if (found = default(TEq).Equals(e.Key, key)) - return ref e.Value; + return ref e; } found = false; // Add the new entry to the stack if there is still space in stack - if (_count < Stack.Capacity) + if (_count < StackEntries.Capacity) { var newIndex = _count++; - ref var newEntry = ref Stack.GetSurePresentItemRef(newIndex); + ref var newEntry = ref StackEntries.GetSurePresentItemRef(newIndex); newEntry.Key = key; - return ref newEntry.Value; + return ref newEntry; } // Now all capacity of the stack is used. @@ -1065,18 +1105,18 @@ public ref V AddOrGetValueRef(K key, out bool found) _capacityBitShift = MinHashesCapacityBitShift; _packedHashesAndIndexes = new int[1 << MinHashesCapacityBitShift]; - for (var i = 0; i < Stack.Capacity; ++i) + for (var i = 0; i < StackEntries.Capacity; ++i) AddJustHashAndEntryIndexWithoutResizing(default(TEq).GetHashCode(GetSurePresentEntryRef(i).Key), i); - AddJustHashAndEntryIndexWithoutResizing(default(TEq).GetHashCode(key), Stack.Capacity); + AddJustHashAndEntryIndexWithoutResizing(default(TEq).GetHashCode(key), StackEntries.Capacity); - _count = Stack.Capacity + 1; // +1 because we added the new key - _entries.Init(Stack.Capacity); // Give the heap entries the same initial capcity as Stack, effectively doubling the capacity - return ref _entries.AddKeyAndGetValueRef(key, 0); // add the new key to the entries with the 0 index in the entries + _count = StackEntries.Capacity + 1; // +1 because we added the new key + _entries.Init(StackEntries.Capacity); // Give the heap entries the same initial capcity as Stack, effectively doubling the capacity + return ref _entries.AddKeyAndGetEntryRef(key, 0); // add the new key to the entries with the 0 index in the entries } [UnscopedRef] - private ref V AddSureAbsentDefaultAndGetRefInEntries(K key) + private ref TEntry AddSureAbsentDefaultAndGetRefInEntries(K key) { // if the free space is less than 1/8 of capacity (12.5%) then Resize var indexMask = (1 << _capacityBitShift) - 1; @@ -1120,48 +1160,48 @@ private ref V AddSureAbsentDefaultAndGetRefInEntries(K key) } } - return ref _entries.AddKeyAndGetValueRef(key, (_count++) - Stack.Capacity); + return ref _entries.AddKeyAndGetEntryRef(key, (_count++) - StackEntries.Capacity); } - /// Adds a sure absent key entry. + /// Adds an entry for sure absent key. /// Provides the performance in scenarios where you look for the present key, and using it, and if ABSENT then add the new one. /// So this method optimized NOT to look for the present item for the second time [UnscopedRef] [MethodImpl((MethodImplOptions)256)] - public ref V AddSureAbsentDefaultAndGetRef(K key) + public ref TEntry AddSureAbsentDefaultEntryAndGetRef(K key) { - if (_count > Stack.Capacity) + if (_count > StackEntries.Capacity) return ref AddSureAbsentDefaultAndGetRefInEntries(key); // Add the new entry to the stack if there is still space in stack - if (_count < Stack.Capacity) + if (_count < StackEntries.Capacity) { var newIndex = _count++; - ref var newEntry = ref Stack.GetSurePresentItemRef(newIndex); + ref var newEntry = ref StackEntries.GetSurePresentItemRef(newIndex); newEntry.Key = key; - return ref newEntry.Value; + return ref newEntry; } _capacityBitShift = MinHashesCapacityBitShift; _packedHashesAndIndexes = new int[1 << MinHashesCapacityBitShift]; - for (var i = 0; i < Stack.Capacity; ++i) + for (var i = 0; i < StackEntries.Capacity; ++i) AddJustHashAndEntryIndexWithoutResizing(default(TEq).GetHashCode(GetSurePresentEntryRef(i).Key), i); - AddJustHashAndEntryIndexWithoutResizing(default(TEq).GetHashCode(key), Stack.Capacity); + AddJustHashAndEntryIndexWithoutResizing(default(TEq).GetHashCode(key), StackEntries.Capacity); - _count = Stack.Capacity + 1; // +1 because we added the new key - _entries.Init(Stack.Capacity); // Give the heap entries the same initial capcity as Stack, effectively doubling the capacity - return ref _entries.AddKeyAndGetValueRef(key, 0); // add the new key to the entries with the 0 index in the entries + _count = StackEntries.Capacity + 1; // +1 because we added the new key + _entries.Init(StackEntries.Capacity); // Give the heap entries the same initial capcity as Stack, effectively doubling the capacity + return ref _entries.AddKeyAndGetEntryRef(key, 0); // add the new key to the entries with the 0 index in the entries } - /// Finds the stored value by key. If found returns ref to the value it can be modified in place. + /// Lookups for the stored key. If found true, otherwise false [MethodImpl((MethodImplOptions)256)] public bool ContainsKey(K key) { - if (_count > Stack.Capacity) + if (_count > StackEntries.Capacity) { - TryGetValueRefInEntries(key, out var found); + TryGetRefInEntries(key, out var found); return found; } @@ -1174,7 +1214,7 @@ public bool ContainsKey(K key) [UnscopedRef] [MethodImpl((MethodImplOptions)256)] - internal ref V TryGetValueRefInEntries(K key, out bool found) + internal ref TEntry TryGetRefInEntries(K key, out bool found) { var hash = default(TEq).GetHashCode(key); @@ -1200,7 +1240,7 @@ internal ref V TryGetValueRefInEntries(K key, out bool found) { ref var e = ref GetSurePresentEntryRef(h & indexMask); if (found = default(TEq).Equals(e.Key, key)) - return ref e.Value; + return ref e; } h = GetHash(ref hashesAndIndexes, ++hashIndex & indexMask); @@ -1208,26 +1248,26 @@ internal ref V TryGetValueRefInEntries(K key, out bool found) } found = false; - return ref RefTools.GetNullRef(); + return ref RefTools.GetNullRef(); } - /// Finds the stored value by key. Returns the reference to the found value or the null entry + /// Lookups for the stored entry by key. Returns the ref to the found entry or the null ref [UnscopedRef] [MethodImpl((MethodImplOptions)256)] - public ref V TryGetValueRef(K key, out bool found) + public ref TEntry TryGetEntryRef(K key, out bool found) { - if (_count > Stack.Capacity) - return ref TryGetValueRefInEntries(key, out found); + if (_count > StackEntries.Capacity) + return ref TryGetRefInEntries(key, out found); for (var i = 0; i < _count; ++i) { ref var e = ref GetSurePresentEntryRef(i); if (found = default(TEq).Equals(key, e.Key)) - return ref e.Value; + return ref e; } found = false; - return ref RefTools.GetNullRef(); + return ref RefTools.GetNullRef(); } internal int ResizeHashes(int indexMask) @@ -1285,21 +1325,21 @@ internal int ResizeHashes(int indexMask) public struct SmallMap4() where TEq : struct, IEq { /// Map with 4 elements on stack and entries baked by the single array - public SmallMap>, SmallMap.SingleArrayEntries> Map; + public SmallMap, TEq, Stack4>, SmallMap.SingleArrayEntries, TEq>> Map; } /// Type wrapper to minimize the number of generic args to be specified by the end-user public struct SmallMap8() where TEq : struct, IEq { /// Map with 8 elements on stack and entries baked by the single array - public SmallMap>, SmallMap.SingleArrayEntries> Map; + public SmallMap, TEq, Stack8>, SmallMap.SingleArrayEntries, TEq>> Map; } /// Type wrapper to minimize the number of generic args to be specified by the end-user public struct SmallMap16() where TEq : struct, IEq { /// Map with 16 elements on stack and entries baked by the single array - public SmallMap>, SmallMap.SingleArrayEntries> Map; + public SmallMap, TEq, Stack16>, SmallMap.SingleArrayEntries, TEq>> Map; } #nullable restore \ No newline at end of file From f9eb067056e49bf83c4cd078ea95ce2345cac0b9 Mon Sep 17 00:00:00 2001 From: dadhi Date: Thu, 5 Jun 2025 09:57:58 +0200 Subject: [PATCH 16/32] adding the Set wrappers based on SmallMap --- src/FastExpressionCompiler/ImTools.cs | 76 +++++++++++++++++++-------- 1 file changed, 54 insertions(+), 22 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index b870bfae..05722371 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -790,21 +790,32 @@ public Entry(K key, V value) } } + /// The entry with just a key. + /// When used with the SmallMap it may represent a Set without wasting the space for the absent value + [DebuggerDisplay("{Key?.ToString()}")] + public struct Entry : IEntry + { + /// The readonly key + public K Key { get; set; } + /// Construct with the key and default value + public Entry(K key) => Key = key; + } + /// Binary representation of the `int` public static string ToB(int x) => System.Convert.ToString(x, 2).PadLeft(32, '0'); [MethodImpl((MethodImplOptions)256)] #if NET7_0_OR_GREATER - internal static ref int GetHashRef(ref int start, int distance) => ref Unsafe.Add(ref start, distance); + internal static ref int NextHashRef(ref int start, int distance) => ref Unsafe.Add(ref start, distance); #else - internal static ref int GetHashRef(ref int[] start, int distance) => ref start[distance]; + internal static ref int NextHashRef(ref int[] start, int distance) => ref start[distance]; #endif [MethodImpl((MethodImplOptions)256)] #if NET7_0_OR_GREATER - internal static int GetHash(ref int start, int distance) => Unsafe.Add(ref start, distance); + internal static int NextHash(ref int start, int distance) => Unsafe.Add(ref start, distance); #else - internal static int GetHash(ref int[] start, int distance) => start[distance]; + internal static int NextHash(ref int[] start, int distance) => start[distance]; #endif /// Abstraction to configure your own entries data structure. Check the derived types for the examples @@ -988,7 +999,7 @@ private ref TEntry AddOrGetRefInEntries(K key, out bool found) #else var hashesAndIndexes = _packedHashesAndIndexes; #endif - ref var h = ref GetHashRef(ref hashesAndIndexes, hashIndex); + ref var h = ref NextHashRef(ref hashesAndIndexes, hashIndex); // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions var probes = 1; @@ -1001,7 +1012,7 @@ private ref TEntry AddOrGetRefInEntries(K key, out bool found) if (found = default(TEq).Equals(e.Key, key)) return ref e; } - h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); + h = ref NextHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); ++probes; } found = false; @@ -1015,7 +1026,7 @@ private ref TEntry AddOrGetRefInEntries(K key, out bool found) probes = hRobinHooded >>> ProbeCountShift; while (hRobinHooded != 0) { - h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); + h = ref NextHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); if ((h >>> ProbeCountShift) < ++probes) { var tmp = h; @@ -1038,11 +1049,11 @@ private void AddJustHashAndEntryIndexWithoutResizing(int hash, int index) var hashesAndIndexes = _packedHashesAndIndexes; #endif // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions - ref var h = ref GetHashRef(ref hashesAndIndexes, hashIndex); + ref var h = ref NextHashRef(ref hashesAndIndexes, hashIndex); var probes = 1; while ((h >>> ProbeCountShift) >= probes) { - h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & IndexMask); + h = ref NextHashRef(ref hashesAndIndexes, ++hashIndex & IndexMask); ++probes; } @@ -1055,7 +1066,7 @@ private void AddJustHashAndEntryIndexWithoutResizing(int hash, int index) probes = hRobinHooded >>> ProbeCountShift; while (hRobinHooded != 0) { - h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & IndexMask); + h = ref NextHashRef(ref hashesAndIndexes, ++hashIndex & IndexMask); if ((h >>> ProbeCountShift) < ++probes) { var tmp = h; @@ -1131,13 +1142,13 @@ private ref TEntry AddSureAbsentDefaultAndGetRefInEntries(K key) #else var hashesAndIndexes = _packedHashesAndIndexes; #endif - ref var h = ref GetHashRef(ref hashesAndIndexes, hashIndex); + ref var h = ref NextHashRef(ref hashesAndIndexes, hashIndex); // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions var probes = 1; while ((h >>> ProbeCountShift) >= probes) { - h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); + h = ref NextHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); ++probes; } @@ -1150,7 +1161,7 @@ private ref TEntry AddSureAbsentDefaultAndGetRefInEntries(K key) probes = hRobinHooded >>> ProbeCountShift; while (hRobinHooded != 0) { - h = ref GetHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); + h = ref NextHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); if ((h >>> ProbeCountShift) < ++probes) { var tmp = h; @@ -1229,7 +1240,7 @@ internal ref TEntry TryGetRefInEntries(K key, out bool found) var hashesAndIndexes = _packedHashesAndIndexes; #endif - var h = GetHash(ref hashesAndIndexes, hashIndex); + var h = NextHash(ref hashesAndIndexes, hashIndex); // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions var probes = 1; @@ -1243,7 +1254,7 @@ internal ref TEntry TryGetRefInEntries(K key, out bool found) return ref e; } - h = GetHash(ref hashesAndIndexes, ++hashIndex & indexMask); + h = NextHash(ref hashesAndIndexes, ++hashIndex & indexMask); ++probes; } @@ -1290,7 +1301,7 @@ internal int ResizeHashes(int indexMask) // Overflow segment is wrapped-around hashes and! the hashes at the beginning robin hooded by the wrapped-around hashes var i = 0; while ((oldHash >>> ProbeCountShift) > 1) - oldHash = GetHash(ref oldHashes, ++i); + oldHash = NextHash(ref oldHashes, ++i); var oldCapacityWithOverflowSegment = i + oldCapacity; while (true) @@ -1302,10 +1313,10 @@ internal int ResizeHashes(int indexMask) // no need for robin-hooding because we already did it for the old hashes and now just filling the hashes into the new array which are already in order var probes = 1; - ref var newHash = ref GetHashRef(ref newHashes, indexWithNextBit); + ref var newHash = ref NextHashRef(ref newHashes, indexWithNextBit); while (newHash != 0) { - newHash = ref GetHashRef(ref newHashes, ++indexWithNextBit & newIndexMask); + newHash = ref NextHashRef(ref newHashes, ++indexWithNextBit & newIndexMask); ++probes; } newHash = (probes << ProbeCountShift) | (oldHash & newHashAndIndexMask); @@ -1313,7 +1324,7 @@ internal int ResizeHashes(int indexMask) if (++i >= oldCapacityWithOverflowSegment) break; - oldHash = GetHash(ref oldHashes, i & indexMask); + oldHash = NextHash(ref oldHashes, i & indexMask); } ++_capacityBitShift; _packedHashesAndIndexes = newHashesAndIndexes; @@ -1321,25 +1332,46 @@ internal int ResizeHashes(int indexMask) } } -/// Type wrapper to minimize the number of generic args to be specified by the end-user +/// Holds the Map with 4 items on stack. Minimizes the number of type arguments required to be specified public struct SmallMap4() where TEq : struct, IEq { /// Map with 4 elements on stack and entries baked by the single array public SmallMap, TEq, Stack4>, SmallMap.SingleArrayEntries, TEq>> Map; } -/// Type wrapper to minimize the number of generic args to be specified by the end-user +/// Holds the Map with 8 items on stack. Minimizes the number of type arguments required to be specified public struct SmallMap8() where TEq : struct, IEq { /// Map with 8 elements on stack and entries baked by the single array public SmallMap, TEq, Stack8>, SmallMap.SingleArrayEntries, TEq>> Map; } -/// Type wrapper to minimize the number of generic args to be specified by the end-user +/// Holds the Map with 16 items on stack. Minimizes the number of type arguments required to be specified public struct SmallMap16() where TEq : struct, IEq { /// Map with 16 elements on stack and entries baked by the single array public SmallMap, TEq, Stack16>, SmallMap.SingleArrayEntries, TEq>> Map; } +/// Holds the Set with 4 items on stack. Minimizes the number of type arguments required to be specified +public struct SmallSet4() where TEq : struct, IEq +{ + /// Set with 4 keys on stack and entries baked by the single array + public SmallMap, TEq, Stack4>, SmallMap.SingleArrayEntries, TEq>> Set; +} + +/// Holds the Set with 8 items on stack. Minimizes the number of type arguments required to be specified +public struct SmallSet8() where TEq : struct, IEq +{ + /// Set with 8 keys on stack and entries baked by the single array + public SmallMap, TEq, Stack8>, SmallMap.SingleArrayEntries, TEq>> Set; +} + +/// Holds the Set with 16 items on stack. Minimizes the number of type arguments required to be specified +public struct SmallSet16() where TEq : struct, IEq +{ + /// Set with 16 keys on stack and entries baked by the single array + public SmallMap, TEq, Stack16>, SmallMap.SingleArrayEntries, TEq>> Set; +} + #nullable restore \ No newline at end of file From 0aa32d6d03e5f8c0fae8f1a78a6d4b594f10cd71 Mon Sep 17 00:00:00 2001 From: dadhi Date: Fri, 6 Jun 2025 11:45:22 +0200 Subject: [PATCH 17/32] trying Stack TryGetByRef --- src/FastExpressionCompiler/ImTools.cs | 274 +++++++++++++++++- .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 94 ++++++ .../Program.cs | 3 +- ..._repeated_calls_to_ConcurrentDictionary.cs | 28 ++ 4 files changed, 387 insertions(+), 12 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 05722371..50097d23 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -47,6 +47,7 @@ namespace FastExpressionCompiler.ImTools; using System.Diagnostics.CodeAnalysis; using static SmallMap; +using System.Runtime.Intrinsics; /// Helpers and polyfills for the missing things in the old .NET versions public static class RefTools @@ -213,14 +214,6 @@ public static class Stack [MethodImpl(MethodImplOptions.NoInlining)] internal static ref T ThrowIndexOutOfBounds(int index, int capacity) => throw new IndexOutOfRangeException($"Index {index} is out of range for Stack{capacity}<{typeof(T)},..>."); - -#if SUPPORTS_CREATE_SPAN - /// Creates a span over the stack items - [MethodImpl((MethodImplOptions)256)] - public static Span AsSpan(this ref TStack stack) - where TStack : struct, IStack => - MemoryMarshal.CreateSpan(ref Unsafe.As(ref stack), stack.Capacity); -#endif } /// Abstracts over collection of the items on stack of the fixed Capacity, @@ -239,8 +232,35 @@ public interface IStack /// Indexer returning the item by ref to read and write the item value [UnscopedRef] ref T this[int index] { get; } + +#if SUPPORTS_CREATE_SPAN + /// Creates a span over the stack items + public Span AsSpan(); +#endif + } +// todo: @wip +// /// Base marker for collection or container holding some number of items +// public interface ISize { } +// /// Marker for collection or container holding 2 or items +// public interface ISize2Plus : ISize { } +// /// Marker for collection or container holding 4 or more items +// public interface ISize4Plus : ISize2Plus { } +// /// Marker for collection or container holding 8 or more items +// public interface ISize8Plus : ISize4Plus { } +// /// Marker for collection or container holding 16 or more items +// public interface ISize16Plus : ISize8Plus { } + +// /// Marker for collection or container holding 4 items +// public interface ISize2 : ISize2Plus { } +// /// Marker for collection or container holding 4 items +// public interface ISize4 : ISize4Plus { } +// /// Marker for collection or container holding 8 items +// public interface ISize8 : ISize8Plus { } +// /// Marker for collection or container holding 16 items +// public interface ISize16 : ISize16Plus { } + /// Implementation of `IStack` for 2 items on stack [StructLayout(LayoutKind.Sequential, Pack = 1)] public struct Stack2 : IStack> @@ -278,6 +298,12 @@ public ref T this[int index] return ref Stack.ThrowIndexOutOfBounds(index, Capacity); } } + +#if SUPPORTS_CREATE_SPAN + /// + [MethodImpl((MethodImplOptions)256)] + public Span AsSpan() => MemoryMarshal.CreateSpan(ref _it0, Capacity); +#endif } /// Implementation of `IStack` for 4 items on stack @@ -319,6 +345,12 @@ public ref T this[int index] return ref Stack.ThrowIndexOutOfBounds(index, Capacity); } } + +#if SUPPORTS_CREATE_SPAN + /// + [MethodImpl((MethodImplOptions)256)] + public Span AsSpan() => MemoryMarshal.CreateSpan(ref _it0, Capacity); +#endif } /// Implementation of `IStack` for 8 items on stack @@ -364,6 +396,12 @@ public ref T this[int index] return ref Stack.ThrowIndexOutOfBounds(index, Capacity); } } + +#if SUPPORTS_CREATE_SPAN + /// + [MethodImpl((MethodImplOptions)256)] + public Span AsSpan() => MemoryMarshal.CreateSpan(ref _it0, Capacity); +#endif } /// Implementation of `IStack` for 16 items on stack @@ -418,6 +456,12 @@ public ref T this[int index] return ref Stack.ThrowIndexOutOfBounds(index, Capacity); } } + +#if SUPPORTS_CREATE_SPAN + /// + [MethodImpl((MethodImplOptions)256)] + public Span AsSpan() => MemoryMarshal.CreateSpan(ref _it0, Capacity); +#endif } /// Generic version of SmallList abstracted for how much items are on the stack @@ -745,6 +789,9 @@ public int GetHashCode((A, B, C) key) => Hasher.Combine(RuntimeHelpers.GetHashCode(key.Item1), Hasher.Combine(RuntimeHelpers.GetHashCode(key.Item2), RuntimeHelpers.GetHashCode(key.Item3))); } +/// Add the Infer parameter to `T Method(..., Infer{T} _)` to enable type inference for T, +/// by calling it as `var t = Method(..., default(Infer{T}))` +public interface Infer { } /// Configuration and the tools for the SmallMap and friends public static class SmallMap @@ -868,11 +915,216 @@ public ref TEntry AddKeyAndGetEntryRef(K key, int index) } } + // todo: @perf optimize with SIMD, ILP, loop-unrolling, etc. + /// Lookup for the K in the TStackEntries, first by calculating it hash with TEq and searching the hash in the TStackHashes + public static ref TEntry TryGetEntryRef( + this ref TStackEntries entries, ref TStackHashes hashes, K key, out bool found, + TEq eq = default, Infer _ = default) + where TEntry : struct, IEntry + where TEq : struct, IEq + where TStackHashes : struct, IStack + where TStackEntries : struct, IStack + { + Debug.Assert(hashes.Capacity == entries.Capacity, + "Expecting that the hashes and entries stacks have the same capacity"); + + var hash = eq.GetHashCode(key); + + for (var i = 0; i < hashes.Capacity; ++i) + { + var h = hashes.GetSurePresentItemRef(i); + if (h == hash) + { + ref var entry = ref entries.GetSurePresentItemRef(i); + if (found = eq.Equals(entry.Key, key)) + return ref entry; + } + } + + found = false; + return ref RefTools.GetNullRef(); + } + + /// Lookup for the K in the TStackEntries, first by calculating it hash with TEq and searching the hash in the TStackHashes + public static ref TEntry TryGetEntryRef4( + this ref TStackEntries entries, ref TStackHashes hashes, K key, out bool found, + TEq eq = default, Infer _ = default) + where TEntry : struct, IEntry + where TEq : struct, IEq + where TStackHashes : struct, IStack + where TStackEntries : struct, IStack + { + Debug.Assert(hashes.Capacity == entries.Capacity, + "Expecting that the hashes and entries stacks have the same capacity"); + + var hash = eq.GetHashCode(key); + + for (var i = 0; i < hashes.Capacity; i += 4) + { + ref var h0 = ref hashes.GetSurePresentItemRef(i); + ref var h1 = ref hashes.GetSurePresentItemRef(i + 1); + ref var h2 = ref hashes.GetSurePresentItemRef(i + 2); + ref var h3 = ref hashes.GetSurePresentItemRef(i + 3); + + var match0 = h0 == hash; + var match1 = h1 == hash; + var match2 = h2 == hash; + var match3 = h3 == hash; + + if (!(match0 | match1 | match2 | match3)) + continue; + + if (match0) + { + ref var entry0 = ref entries.GetSurePresentItemRef(i); + if (found = eq.Equals(entry0.Key, key)) + return ref entry0; + } + + if (match1) + { + ref var entry1 = ref entries.GetSurePresentItemRef(i + 1); + if (found = eq.Equals(entry1.Key, key)) + return ref entry1; + } + + if (match2) + { + ref var entry2 = ref entries.GetSurePresentItemRef(i + 2); + if (found = eq.Equals(entry2.Key, key)) + return ref entry2; + } + + if (match3) + { + ref var entry3 = ref entries.GetSurePresentItemRef(i + 3); + if (found = eq.Equals(entry3.Key, key)) + return ref entry3; + } + } + + found = false; + return ref RefTools.GetNullRef(); + } + + /// Lookup for the K in the TStackEntries, first by calculating it hash with TEq and searching the hash in the TStackHashes + public static ref TEntry TryGetEntryRef8Plus( + this ref TStackEntries entries, ref TStackHashes hashes, K key, out bool found, + TEq eq = default, Infer _ = default) + where TEntry : struct, IEntry + where TEq : struct, IEq + where TStackHashes : struct, IStack + where TStackEntries : struct, IStack + { + Debug.Assert(hashes.Capacity == entries.Capacity, + "Expecting that the hashes and entries stacks have the same capacity"); + + var hash = eq.GetHashCode(key); + +#if NET8_0_OR_GREATER + if (hashes.Capacity >= 8 & Vector256.IsHardwareAccelerated) + { + var vHash = Vector256.Create(hash); + var vHashes = MemoryMarshal.Cast>(hashes.AsSpan()); + var i = 0; + foreach (var vCurr in vHashes) + { + var vMatches = Vector256.Equals(vCurr, vHash); + var matches = Vector256.ExtractMostSignificantBits(vMatches); + while (matches != 0) + { + var matchIndex = System.Numerics.BitOperations.TrailingZeroCount(matches); + + ref var entry = ref entries.GetSurePresentItemRef(i + matchIndex); + if (found = eq.Equals(entry.Key, key)) + return ref entry; + + // Clear lower bits up to and including the first set bit, afaik it can be hw accelerated + // 0b0001_1000 & (0b0001_1000 - 1) -> & 0b0001_1000 & 0b0001_0111 -> 0b0001_0000 + matches &= matches - 1; + } + + i += Vector256.Count; + } + + found = false; + return ref RefTools.GetNullRef(); + } +#endif + + if (hashes.Capacity >= 4) + { + for (var i = 0; i < hashes.Capacity; i += 4) + { + ref var h0 = ref hashes.GetSurePresentItemRef(i); + ref var h1 = ref hashes.GetSurePresentItemRef(i + 1); + ref var h2 = ref hashes.GetSurePresentItemRef(i + 2); + ref var h3 = ref hashes.GetSurePresentItemRef(i + 3); + + var match0 = h0 == hash; + var match1 = h1 == hash; + var match2 = h2 == hash; + var match3 = h3 == hash; + + if (!(match0 | match1 | match2 | match3)) + continue; + + if (match0) + { + ref var entry0 = ref entries.GetSurePresentItemRef(i); + if (found = eq.Equals(entry0.Key, key)) + return ref entry0; + } + + if (match1) + { + ref var entry1 = ref entries.GetSurePresentItemRef(i + 1); + if (found = eq.Equals(entry1.Key, key)) + return ref entry1; + } + + if (match2) + { + ref var entry2 = ref entries.GetSurePresentItemRef(i + 2); + if (found = eq.Equals(entry2.Key, key)) + return ref entry2; + } + + if (match3) + { + ref var entry3 = ref entries.GetSurePresentItemRef(i + 3); + if (found = eq.Equals(entry3.Key, key)) + return ref entry3; + } + } + } + else if (hashes.Capacity == 2) + { + ref var h0 = ref hashes.GetSurePresentItemRef(0); + ref var h1 = ref hashes.GetSurePresentItemRef(1); + if (h0 == hash) + { + ref var entry0 = ref entries.GetSurePresentItemRef(0); + if (found = eq.Equals(entry0.Key, key)) + return ref entry0; + } + if (h1 == hash) + { + ref var entry1 = ref entries.GetSurePresentItemRef(1); + if (found = eq.Equals(entry1.Key, key)) + return ref entry1; + } + } + + found = false; + return ref RefTools.GetNullRef(); + } + /// Gets the ref to the existing entry.Value by the provided key (found == true), /// or adds a new entry (found == false) and returns it.Value by ref. /// So the method always return a non-null ref to the value, either existing or added [MethodImpl((MethodImplOptions)256)] - public static ref V AddOrGetValueRef( + public static ref V AddOrGetValueRef( this ref SmallMap, TEq, TStackEntries, TEntries> map, K key, out bool found) where TEq : struct, IEq where TStackEntries : struct, IStack, TStackEntries> @@ -883,7 +1135,7 @@ public static ref V AddOrGetValueRef( /// Provides the performance in scenarios where you look for the present key, and using it, and if ABSENT then add the new one. /// So this method optimized NOT to look for the present item for the second time [MethodImpl((MethodImplOptions)256)] - public static ref V AddSureAbsentDefaultAndGetRef( + public static ref V AddSureAbsentDefaultAndGetRef( this ref SmallMap, TEq, TStackEntries, TEntries> map, K key) where TEq : struct, IEq where TStackEntries : struct, IStack, TStackEntries> @@ -892,7 +1144,7 @@ public static ref V AddSureAbsentDefaultAndGetRefLookups for the stored entry by key. Returns the ref to the found entry.Value or the null ref [MethodImpl((MethodImplOptions)256)] - public static ref V TryGetValueRef( + public static ref V TryGetValueRef( this ref SmallMap, TEq, TStackEntries, TEntries> map, K key, out bool found) where TEq : struct, IEq where TStackEntries : struct, IStack, TStackEntries> diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index 73868c6e..e1a47abb 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -195,3 +195,97 @@ public int Add_BySpan() return sum; } } + +[MemoryDiagnoser, RankColumn, Orderer(BenchmarkDotNet.Order.SummaryOrderPolicy.FastestToSlowest)] +// [HardwareCounters(HardwareCounter.CacheMisses, HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] +public class StackSearch +{ + /* + ## Strange baseline + + */ + + [Benchmark] + public int Search_loop() + { + Stack8 hashes = default; + Stack8> entries = default; + + for (var n = 0; n < 8; ++n) + { + hashes.GetSurePresentItemRef(n) = default(IntEq).GetHashCode(n); + entries.GetSurePresentItemRef(n) = new SmallMap.Entry(n); + } + + var sum = 0; + for (var i = 12; i >= -4; --i) + { + ref var e = ref entries.TryGetEntryRef( + ref hashes, + i, + out var found, + default(IntEq), + default(Infer>)); + if (found) + sum += e.Key; + } + + return sum; + } + + [Benchmark] + public int Search_ILP_4() + { + Stack8 hashes = default; + Stack8> entries = default; + + for (var n = 0; n < 8; ++n) + { + hashes.GetSurePresentItemRef(n) = default(IntEq).GetHashCode(n); + entries.GetSurePresentItemRef(n) = new SmallMap.Entry(n); + } + + var sum = 0; + for (var i = 12; i >= -4; --i) + { + ref var e = ref entries.TryGetEntryRef4( + ref hashes, + i, + out var found, + default(IntEq), + default(Infer>)); + if (found) + sum += e.Key; + } + + return sum; + } + + [Benchmark(Baseline = true)] + public int Search_SIMD_loop() + { + Stack8 hashes = default; + Stack8> entries = default; + + for (var n = 0; n < 8; ++n) + { + hashes.GetSurePresentItemRef(n) = default(IntEq).GetHashCode(n); + entries.GetSurePresentItemRef(n) = new SmallMap.Entry(n); + } + + var sum = 0; + for (var i = 12; i >= -4; --i) + { + ref var e = ref entries.TryGetEntryRef8Plus( + ref hashes, + i, + out var found, + default(IntEq), + default(Infer>)); + if (found) + sum += e.Key; + } + + return sum; + } +} diff --git a/test/FastExpressionCompiler.Benchmarks/Program.cs b/test/FastExpressionCompiler.Benchmarks/Program.cs index 25eac5fc..b00d5c60 100644 --- a/test/FastExpressionCompiler.Benchmarks/Program.cs +++ b/test/FastExpressionCompiler.Benchmarks/Program.cs @@ -50,8 +50,9 @@ public static void Main() //BenchmarkRunner.Run(); //BenchmarkRunner.Run(); + BenchmarkRunner.Run(); // BenchmarkRunner.Run(); - BenchmarkRunner.Run(); + // BenchmarkRunner.Run(); // BenchmarkRunner.Run(); //var a = new NestedLambdasVsVars(); diff --git a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs index 0092c36f..4b7846bd 100644 --- a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs +++ b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs @@ -16,6 +16,7 @@ public struct Issue476_System_ExecutionEngineException_with_nullables_on_repeate { public void Run(TestRun t) { + TestSmallMap(t); TestSmallList(t); Original_case(t); } @@ -62,4 +63,31 @@ public void TestSmallList(TestContext t) t.AreEqual(56, doubleSum); } + + public void TestSmallMap(TestContext t) + { + Stack8 hashes = default; + Stack8> entries = default; + + for (var n = 0; n < 8; ++n) + { + hashes.GetSurePresentItemRef(n) = default(IntEq).GetHashCode(n); + entries.GetSurePresentItemRef(n) = new SmallMap.Entry(n); + } + + var sum = 0; + for (var i = 12; i >= -4; --i) + { + ref var e = ref entries.TryGetEntryRef( + ref hashes, + i, + out var found, + default(IntEq), + default(Infer>)); + if (found) + sum += e.Key; + } + + t.AreEqual(28, sum); + } } \ No newline at end of file From af3e491f5119ed112d7ebca6785e1ce39f8ca2c5 Mon Sep 17 00:00:00 2001 From: dadhi Date: Fri, 6 Jun 2025 12:32:04 +0200 Subject: [PATCH 18/32] trying harder --- src/FastExpressionCompiler/ImTools.cs | 65 ++++++++++--------- .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 12 ++-- ..._repeated_calls_to_ConcurrentDictionary.cs | 38 ++++++++--- 3 files changed, 69 insertions(+), 46 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 50097d23..3b863a14 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -241,29 +241,29 @@ public interface IStack } // todo: @wip -// /// Base marker for collection or container holding some number of items -// public interface ISize { } -// /// Marker for collection or container holding 2 or items -// public interface ISize2Plus : ISize { } -// /// Marker for collection or container holding 4 or more items -// public interface ISize4Plus : ISize2Plus { } -// /// Marker for collection or container holding 8 or more items -// public interface ISize8Plus : ISize4Plus { } -// /// Marker for collection or container holding 16 or more items -// public interface ISize16Plus : ISize8Plus { } - -// /// Marker for collection or container holding 4 items -// public interface ISize2 : ISize2Plus { } -// /// Marker for collection or container holding 4 items -// public interface ISize4 : ISize4Plus { } -// /// Marker for collection or container holding 8 items -// public interface ISize8 : ISize8Plus { } -// /// Marker for collection or container holding 16 items -// public interface ISize16 : ISize16Plus { } +/// Base marker for collection or container holding some number of items +public interface ISize { } +/// Marker for collection or container holding 2 or items +public interface ISize2Plus : ISize { } +/// Marker for collection or container holding 4 or more items +public interface ISize4Plus : ISize2Plus { } +/// Marker for collection or container holding 8 or more items +public interface ISize8Plus : ISize4Plus { } +/// Marker for collection or container holding 16 or more items +public interface ISize16Plus : ISize8Plus { } + +/// Marker for collection or container holding 4 items +public interface ISize2 : ISize2Plus { } +/// Marker for collection or container holding 4 items +public interface ISize4 : ISize4Plus { } +/// Marker for collection or container holding 8 items +public interface ISize8 : ISize8Plus { } +/// Marker for collection or container holding 16 items +public interface ISize16 : ISize16Plus { } /// Implementation of `IStack` for 2 items on stack [StructLayout(LayoutKind.Sequential, Pack = 1)] -public struct Stack2 : IStack> +public struct Stack2 : IStack>, ISize2 { /// public int Capacity => 2; @@ -308,7 +308,7 @@ public ref T this[int index] /// Implementation of `IStack` for 4 items on stack [StructLayout(LayoutKind.Sequential, Pack = 1)] -public struct Stack4 : IStack> +public struct Stack4 : IStack>, ISize4 { /// public int Capacity => 4; @@ -355,7 +355,7 @@ public ref T this[int index] /// Implementation of `IStack` for 8 items on stack [StructLayout(LayoutKind.Sequential, Pack = 1)] -public struct Stack8 : IStack> +public struct Stack8 : IStack>, ISize8 { /// public int Capacity => 8; @@ -406,7 +406,7 @@ public ref T this[int index] /// Implementation of `IStack` for 16 items on stack [StructLayout(LayoutKind.Sequential, Pack = 1)] -public struct Stack16 : IStack> +public struct Stack16 : IStack>, ISize16 { /// public int Capacity => 16; @@ -791,7 +791,7 @@ public int GetHashCode((A, B, C) key) => /// Add the Infer parameter to `T Method(..., Infer{T} _)` to enable type inference for T, /// by calling it as `var t = Method(..., default(Infer{T}))` -public interface Infer { } +public interface Use { } /// Configuration and the tools for the SmallMap and friends public static class SmallMap @@ -917,9 +917,9 @@ public ref TEntry AddKeyAndGetEntryRef(K key, int index) // todo: @perf optimize with SIMD, ILP, loop-unrolling, etc. /// Lookup for the K in the TStackEntries, first by calculating it hash with TEq and searching the hash in the TStackHashes - public static ref TEntry TryGetEntryRef( + public static ref TEntry TryGetEntryRef_loop( this ref TStackEntries entries, ref TStackHashes hashes, K key, out bool found, - TEq eq = default, Infer _ = default) + TEq eq = default, Use _ = default) where TEntry : struct, IEntry where TEq : struct, IEq where TStackHashes : struct, IStack @@ -946,13 +946,14 @@ public static ref TEntry TryGetEntryRefLookup for the K in the TStackEntries, first by calculating it hash with TEq and searching the hash in the TStackHashes - public static ref TEntry TryGetEntryRef4( + public static ref TEntry TryGetEntryRef_ILP( this ref TStackEntries entries, ref TStackHashes hashes, K key, out bool found, - TEq eq = default, Infer _ = default) + TEq eq = default, Use _ = default)//, Use _cap = default) where TEntry : struct, IEntry where TEq : struct, IEq - where TStackHashes : struct, IStack - where TStackEntries : struct, IStack + where TStackHashes : struct, IStack//, TCap + where TStackEntries : struct, IStack//, TCap + // where TCap : ISize4Plus { Debug.Assert(hashes.Capacity == entries.Capacity, "Expecting that the hashes and entries stacks have the same capacity"); @@ -1008,9 +1009,9 @@ public static ref TEntry TryGetEntryRef4Lookup for the K in the TStackEntries, first by calculating it hash with TEq and searching the hash in the TStackHashes - public static ref TEntry TryGetEntryRef8Plus( + public static ref TEntry TryGetEntryRef( this ref TStackEntries entries, ref TStackHashes hashes, K key, out bool found, - TEq eq = default, Infer _ = default) + TEq eq = default, Use _ = default) where TEntry : struct, IEntry where TEq : struct, IEq where TStackHashes : struct, IStack diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index e1a47abb..075a439d 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -220,12 +220,12 @@ public int Search_loop() var sum = 0; for (var i = 12; i >= -4; --i) { - ref var e = ref entries.TryGetEntryRef( + ref var e = ref entries.TryGetEntryRef_loop( ref hashes, i, out var found, default(IntEq), - default(Infer>)); + default(Use>)); if (found) sum += e.Key; } @@ -248,12 +248,12 @@ public int Search_ILP_4() var sum = 0; for (var i = 12; i >= -4; --i) { - ref var e = ref entries.TryGetEntryRef4( + ref var e = ref entries.TryGetEntryRef_ILP( ref hashes, i, out var found, default(IntEq), - default(Infer>)); + default(Use>)); if (found) sum += e.Key; } @@ -276,12 +276,12 @@ public int Search_SIMD_loop() var sum = 0; for (var i = 12; i >= -4; --i) { - ref var e = ref entries.TryGetEntryRef8Plus( + ref var e = ref entries.TryGetEntryRef( ref hashes, i, out var found, default(IntEq), - default(Infer>)); + default(Use>)); if (found) sum += e.Key; } diff --git a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs index 4b7846bd..57d2cc89 100644 --- a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs +++ b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs @@ -16,7 +16,8 @@ public struct Issue476_System_ExecutionEngineException_with_nullables_on_repeate { public void Run(TestRun t) { - TestSmallMap(t); + TestSmallMap_Lookup_ILP(t); + TestSmallMap_Lookup_loop(t); TestSmallList(t); Original_case(t); } @@ -64,7 +65,7 @@ public void TestSmallList(TestContext t) t.AreEqual(56, doubleSum); } - public void TestSmallMap(TestContext t) + public void TestSmallMap_Lookup_loop(TestContext t) { Stack8 hashes = default; Stack8> entries = default; @@ -78,12 +79,33 @@ public void TestSmallMap(TestContext t) var sum = 0; for (var i = 12; i >= -4; --i) { - ref var e = ref entries.TryGetEntryRef( - ref hashes, - i, - out var found, - default(IntEq), - default(Infer>)); + ref var e = ref entries.TryGetEntryRef_loop( + ref hashes, i, out var found, default(IntEq), + default(Use>)); + if (found) + sum += e.Key; + } + + t.AreEqual(28, sum); + } + + public void TestSmallMap_Lookup_ILP(TestContext t) + { + Stack8 hashes = default; + Stack8> entries = default; + + for (var n = 0; n < 8; ++n) + { + hashes.GetSurePresentItemRef(n) = default(IntEq).GetHashCode(n); + entries.GetSurePresentItemRef(n) = new SmallMap.Entry(n); + } + + var sum = 0; + for (var i = 12; i >= -4; --i) + { + ref var e = ref entries.TryGetEntryRef_ILP( + ref hashes, i, out var found, default(IntEq), + default(Use>)); if (found) sum += e.Key; } From e4fd7fbe98121e5c2f5f9d978cedaea257d3640e Mon Sep 17 00:00:00 2001 From: dadhi Date: Fri, 6 Jun 2025 12:52:08 +0200 Subject: [PATCH 19/32] ok, type level stuff --- src/FastExpressionCompiler/ImTools.cs | 57 +++++++++++++------ ..._repeated_calls_to_ConcurrentDictionary.cs | 2 +- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 3b863a14..44c08a76 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -216,6 +216,12 @@ internal static ref T ThrowIndexOutOfBounds(int index, int capacity) => throw new IndexOutOfRangeException($"Index {index} is out of range for Stack{capacity}<{typeof(T)},..>."); } +public interface IStack : IStack + where TSize : struct, ISize + where TStack : struct, IStack +{ +} + /// Abstracts over collection of the items on stack of the fixed Capacity, /// to be used as a part of the hybrid data structures which grow from stack to heap public interface IStack @@ -253,17 +259,33 @@ public interface ISize8Plus : ISize4Plus { } public interface ISize16Plus : ISize8Plus { } /// Marker for collection or container holding 4 items -public interface ISize2 : ISize2Plus { } +public struct Size2 : ISize2Plus +{ + /// Returns the size of the collection or container + public int Size => 2; +} /// Marker for collection or container holding 4 items -public interface ISize4 : ISize4Plus { } +public struct Size4 : ISize4Plus +{ + /// Returns the size of the collection or container + public int Size => 4; +} /// Marker for collection or container holding 8 items -public interface ISize8 : ISize8Plus { } +public struct Size8 : ISize8Plus +{ + /// Returns the size of the collection or container + public int Size => 8; +} /// Marker for collection or container holding 16 items -public interface ISize16 : ISize16Plus { } +public struct Size16 : ISize16Plus +{ + /// Returns the size of the collection or container + public int Size => 16; +} /// Implementation of `IStack` for 2 items on stack [StructLayout(LayoutKind.Sequential, Pack = 1)] -public struct Stack2 : IStack>, ISize2 +public struct Stack2 : IStack> { /// public int Capacity => 2; @@ -308,7 +330,7 @@ public ref T this[int index] /// Implementation of `IStack` for 4 items on stack [StructLayout(LayoutKind.Sequential, Pack = 1)] -public struct Stack4 : IStack>, ISize4 +public struct Stack4 : IStack> { /// public int Capacity => 4; @@ -355,7 +377,7 @@ public ref T this[int index] /// Implementation of `IStack` for 8 items on stack [StructLayout(LayoutKind.Sequential, Pack = 1)] -public struct Stack8 : IStack>, ISize8 +public struct Stack8 : IStack> { /// public int Capacity => 8; @@ -406,7 +428,7 @@ public ref T this[int index] /// Implementation of `IStack` for 16 items on stack [StructLayout(LayoutKind.Sequential, Pack = 1)] -public struct Stack16 : IStack>, ISize16 +public struct Stack16 : IStack> { /// public int Capacity => 16; @@ -791,7 +813,11 @@ public int GetHashCode((A, B, C) key) => /// Add the Infer parameter to `T Method(..., Infer{T} _)` to enable type inference for T, /// by calling it as `var t = Method(..., default(Infer{T}))` -public interface Use { } +public class Use +{ + public static readonly Use It = new Use(); + private Use() { } +} /// Configuration and the tools for the SmallMap and friends public static class SmallMap @@ -946,18 +972,15 @@ public static ref TEntry TryGetEntryRef_loopLookup for the K in the TStackEntries, first by calculating it hash with TEq and searching the hash in the TStackHashes - public static ref TEntry TryGetEntryRef_ILP( + public static ref TEntry TryGetEntryRef_ILP( this ref TStackEntries entries, ref TStackHashes hashes, K key, out bool found, - TEq eq = default, Use _ = default)//, Use _cap = default) + TEq eq = default, Use _ = default, Use _cap = default) where TEntry : struct, IEntry where TEq : struct, IEq - where TStackHashes : struct, IStack//, TCap - where TStackEntries : struct, IStack//, TCap - // where TCap : ISize4Plus + where TStackHashes : struct, IStack + where TStackEntries : struct, IStack + where TCap : struct, ISize4Plus { - Debug.Assert(hashes.Capacity == entries.Capacity, - "Expecting that the hashes and entries stacks have the same capacity"); - var hash = eq.GetHashCode(key); for (var i = 0; i < hashes.Capacity; i += 4) diff --git a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs index 57d2cc89..bb5d8817 100644 --- a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs +++ b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs @@ -105,7 +105,7 @@ public void TestSmallMap_Lookup_ILP(TestContext t) { ref var e = ref entries.TryGetEntryRef_ILP( ref hashes, i, out var found, default(IntEq), - default(Use>)); + Use>.It, Use.It); if (found) sum += e.Key; } From cdaf72b16552ef3ed4132dd6da1e0f45dfd1e8da Mon Sep 17 00:00:00 2001 From: dadhi Date: Fri, 6 Jun 2025 13:42:12 +0200 Subject: [PATCH 20/32] fix ci, and thigs --- src/FastExpressionCompiler/ImTools.cs | 44 ++++++++++--------- .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 21 +++------ ..._repeated_calls_to_ConcurrentDictionary.cs | 29 +++++++++++- 3 files changed, 56 insertions(+), 38 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 44c08a76..1e83b19e 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -46,8 +46,11 @@ namespace FastExpressionCompiler.ImTools; using System.Runtime.InteropServices; using System.Diagnostics.CodeAnalysis; -using static SmallMap; +#if NET8_0_OR_GREATER using System.Runtime.Intrinsics; +#endif + +using static SmallMap; /// Helpers and polyfills for the missing things in the old .NET versions public static class RefTools @@ -216,6 +219,7 @@ internal static ref T ThrowIndexOutOfBounds(int index, int capacity) => throw new IndexOutOfRangeException($"Index {index} is out of range for Stack{capacity}<{typeof(T)},..>."); } +/// Stack with the Size information to check the Capacity in the compile time public interface IStack : IStack where TSize : struct, ISize where TStack : struct, IStack @@ -248,7 +252,11 @@ public interface IStack // todo: @wip /// Base marker for collection or container holding some number of items -public interface ISize { } +public interface ISize +{ + /// Returns the size of the collection or container + int Size { get; } +} /// Marker for collection or container holding 2 or items public interface ISize2Plus : ISize { } /// Marker for collection or container holding 4 or more items @@ -811,13 +819,9 @@ public int GetHashCode((A, B, C) key) => Hasher.Combine(RuntimeHelpers.GetHashCode(key.Item1), Hasher.Combine(RuntimeHelpers.GetHashCode(key.Item2), RuntimeHelpers.GetHashCode(key.Item3))); } -/// Add the Infer parameter to `T Method(..., Infer{T} _)` to enable type inference for T, -/// by calling it as `var t = Method(..., default(Infer{T}))` -public class Use -{ - public static readonly Use It = new Use(); - private Use() { } -} +/// Add the Infer parameter to `T Method{T}(..., Use{T} _)` to enable type inference for T, +/// by calling it as `var t = Method(..., default(Use{T}))` +public interface Use { } /// Configuration and the tools for the SmallMap and friends public static class SmallMap @@ -974,7 +978,7 @@ public static ref TEntry TryGetEntryRef_loopLookup for the K in the TStackEntries, first by calculating it hash with TEq and searching the hash in the TStackHashes public static ref TEntry TryGetEntryRef_ILP( this ref TStackEntries entries, ref TStackHashes hashes, K key, out bool found, - TEq eq = default, Use _ = default, Use _cap = default) + TEq eq = default, TCap cap = default, Use _ = default) where TEntry : struct, IEntry where TEq : struct, IEq where TStackHashes : struct, IStack @@ -983,7 +987,7 @@ public static ref TEntry TryGetEntryRef_ILPLookup for the K in the TStackEntries, first by calculating it hash with TEq and searching the hash in the TStackHashes - public static ref TEntry TryGetEntryRef( + public static ref TEntry TryGetEntryRef( this ref TStackEntries entries, ref TStackHashes hashes, K key, out bool found, - TEq eq = default, Use _ = default) + TEq eq = default, TCap cap = default, Use _ = default) where TEntry : struct, IEntry where TEq : struct, IEq - where TStackHashes : struct, IStack - where TStackEntries : struct, IStack + where TStackHashes : struct, IStack + where TStackEntries : struct, IStack + where TCap : struct, ISize2Plus { - Debug.Assert(hashes.Capacity == entries.Capacity, - "Expecting that the hashes and entries stacks have the same capacity"); - var hash = eq.GetHashCode(key); #if NET8_0_OR_GREATER - if (hashes.Capacity >= 8 & Vector256.IsHardwareAccelerated) + if (cap.Size >= 8 & Vector256.IsHardwareAccelerated) { var vHash = Vector256.Create(hash); var vHashes = MemoryMarshal.Cast>(hashes.AsSpan()); @@ -1076,7 +1078,7 @@ public static ref TEntry TryGetEntryRef= 4) + if (cap.Size >= 4) { for (var i = 0; i < hashes.Capacity; i += 4) { @@ -1122,7 +1124,7 @@ public static ref TEntry TryGetEntryRef= -4; --i) { ref var e = ref entries.TryGetEntryRef_loop( - ref hashes, - i, - out var found, - default(IntEq), - default(Use>)); + ref hashes, i, out var found, + default(IntEq), default(Use>)); if (found) sum += e.Key; } @@ -249,11 +246,8 @@ public int Search_ILP_4() for (var i = 12; i >= -4; --i) { ref var e = ref entries.TryGetEntryRef_ILP( - ref hashes, - i, - out var found, - default(IntEq), - default(Use>)); + ref hashes, i, out var found, + default(IntEq), default(Size8), default(Use>)); if (found) sum += e.Key; } @@ -277,11 +271,8 @@ public int Search_SIMD_loop() for (var i = 12; i >= -4; --i) { ref var e = ref entries.TryGetEntryRef( - ref hashes, - i, - out var found, - default(IntEq), - default(Use>)); + ref hashes, i, out var found, + default(IntEq), default(Size8), default(Use>)); if (found) sum += e.Key; } diff --git a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs index bb5d8817..3b1c5e03 100644 --- a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs +++ b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs @@ -16,6 +16,7 @@ public struct Issue476_System_ExecutionEngineException_with_nullables_on_repeate { public void Run(TestRun t) { + TestSmallMap_Lookup_SIMD(t); TestSmallMap_Lookup_ILP(t); TestSmallMap_Lookup_loop(t); TestSmallList(t); @@ -104,8 +105,32 @@ public void TestSmallMap_Lookup_ILP(TestContext t) for (var i = 12; i >= -4; --i) { ref var e = ref entries.TryGetEntryRef_ILP( - ref hashes, i, out var found, default(IntEq), - Use>.It, Use.It); + ref hashes, i, out var found, + default(IntEq), default(Size8), default(Use>)); + if (found) + sum += e.Key; + } + + t.AreEqual(28, sum); + } + + public void TestSmallMap_Lookup_SIMD(TestContext t) + { + Stack8 hashes = default; + Stack8> entries = default; + + for (var n = 0; n < 8; ++n) + { + hashes.GetSurePresentItemRef(n) = default(IntEq).GetHashCode(n); + entries.GetSurePresentItemRef(n) = new SmallMap.Entry(n); + } + + var sum = 0; + for (var i = 12; i >= -4; --i) + { + ref var e = ref entries.TryGetEntryRef( + ref hashes, i, out var found, + default(IntEq), default(Size8), default(Use>)); if (found) sum += e.Key; } From 6adf2d64e61c7a5fbe9db98ecffdd929a02b6066 Mon Sep 17 00:00:00 2001 From: dadhi Date: Fri, 6 Jun 2025 14:03:41 +0200 Subject: [PATCH 21/32] bm out --- .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index dbe63a5a..94d5820a 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -197,12 +197,24 @@ public int Add_BySpan() } [MemoryDiagnoser, RankColumn, Orderer(BenchmarkDotNet.Order.SummaryOrderPolicy.FastestToSlowest)] -// [HardwareCounters(HardwareCounter.CacheMisses, HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] +[HardwareCounters(HardwareCounter.CacheMisses, HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] public class StackSearch { /* - ## Strange baseline + ## Baseline + + BenchmarkDotNet v0.15.0, Windows 11 (10.0.26100.4202/24H2/2024Update/HudsonValley) + Intel Core i9-8950HK CPU 2.90GHz (Coffee Lake), 1 CPU, 12 logical and 6 physical cores + .NET SDK 9.0.203 + [Host] : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2 + DefaultJob : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2 + + | Method | Mean | Error | StdDev | Median | Ratio | RatioSD | Rank | BranchInstructions/Op | BranchMispredictions/Op | CacheMisses/Op | Allocated | Alloc Ratio | + |----------------- |---------:|---------:|---------:|---------:|------:|--------:|-----:|----------------------:|------------------------:|---------------:|----------:|------------:| + | Search_SIMD_loop | 46.65 ns | 0.763 ns | 0.637 ns | 46.84 ns | 1.00 | 0.02 | 1 | 103 | 0 | 0 | - | NA | + | Search_ILP_4 | 91.72 ns | 1.227 ns | 1.088 ns | 91.91 ns | 1.97 | 0.03 | 2 | 138 | 0 | 0 | - | NA | + | Search_loop | 96.71 ns | 1.975 ns | 4.499 ns | 94.53 ns | 2.07 | 0.10 | 2 | 274 | 0 | 0 | - | NA | */ [Benchmark] From 42d990dbdc62ff6d74cb91c9263285c811849943 Mon Sep 17 00:00:00 2001 From: dadhi Date: Fri, 6 Jun 2025 14:37:25 +0200 Subject: [PATCH 22/32] spell check galor --- src/FastExpressionCompiler/ImTools.cs | 20 +++++++++---------- .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 1e83b19e..f30d893a 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -219,7 +219,7 @@ internal static ref T ThrowIndexOutOfBounds(int index, int capacity) => throw new IndexOutOfRangeException($"Index {index} is out of range for Stack{capacity}<{typeof(T)},..>."); } -/// Stack with the Size information to check the Capacity in the compile time +/// Stack with the Size information to check it at compile time public interface IStack : IStack where TSize : struct, ISize where TStack : struct, IStack @@ -269,25 +269,25 @@ public interface ISize16Plus : ISize8Plus { } /// Marker for collection or container holding 4 items public struct Size2 : ISize2Plus { - /// Returns the size of the collection or container + /// public int Size => 2; } /// Marker for collection or container holding 4 items public struct Size4 : ISize4Plus { - /// Returns the size of the collection or container + /// public int Size => 4; } /// Marker for collection or container holding 8 items public struct Size8 : ISize8Plus { - /// Returns the size of the collection or container + /// public int Size => 8; } /// Marker for collection or container holding 16 items public struct Size16 : ISize16Plus { - /// Returns the size of the collection or container + /// public int Size => 16; } @@ -819,7 +819,7 @@ public int GetHashCode((A, B, C) key) => Hasher.Combine(RuntimeHelpers.GetHashCode(key.Item1), Hasher.Combine(RuntimeHelpers.GetHashCode(key.Item2), RuntimeHelpers.GetHashCode(key.Item3))); } -/// Add the Infer parameter to `T Method{T}(..., Use{T} _)` to enable type inference for T, +/// Add the Use parameter to `T Method{T}(..., Use{T} _)` to enable type inference for T, /// by calling it as `var t = Method(..., default(Use{T}))` public interface Use { } @@ -836,7 +836,7 @@ public static class SmallMap internal const byte ProbeCountShift = 32 - ProbeBits; // ~0b11111000000000000000000000000000 -> 0b00000111111111111111111111111111 internal const int HashAndIndexMask = ~(NotShiftedProbeCountMask << ProbeCountShift); - // Window with the hash mask wothout the lead ProbeMask and closing IndexMask 0b00000111111111111111111111110000 + // Window with the hash mask without the lead ProbeMask and closing IndexMask 0b00000111111111111111111111110000 internal const int HashMask = HashAndIndexMask & ~IndexMask; /// Represent a keyed entry stored in the SmallMap. @@ -1388,7 +1388,7 @@ public ref TEntry AddOrGetEntryRef(K key, out bool found) // to the usual HashMap packed hashes and indexes array for the promised O(1) lookup. // But the values are remaining on the Stack, and for the found index of the entry we use the GetSurePresentItemRef(index) // to get the value reference either from the Stack or the Entries. - // So the values on the stack are guarntied to be stable from the beginning of the map creation, + // So the values on the stack are guarantied to be stable from the beginning of the map creation, // because they are not copied when the Entries need to Resize (depending on the TEntries implementation). _capacityBitShift = MinHashesCapacityBitShift; @@ -1400,7 +1400,7 @@ public ref TEntry AddOrGetEntryRef(K key, out bool found) AddJustHashAndEntryIndexWithoutResizing(default(TEq).GetHashCode(key), StackEntries.Capacity); _count = StackEntries.Capacity + 1; // +1 because we added the new key - _entries.Init(StackEntries.Capacity); // Give the heap entries the same initial capcity as Stack, effectively doubling the capacity + _entries.Init(StackEntries.Capacity); // Give the heap entries the same initial capacity as Stack, effectively doubling the capacity return ref _entries.AddKeyAndGetEntryRef(key, 0); // add the new key to the entries with the 0 index in the entries } @@ -1480,7 +1480,7 @@ public ref TEntry AddSureAbsentDefaultEntryAndGetRef(K key) AddJustHashAndEntryIndexWithoutResizing(default(TEq).GetHashCode(key), StackEntries.Capacity); _count = StackEntries.Capacity + 1; // +1 because we added the new key - _entries.Init(StackEntries.Capacity); // Give the heap entries the same initial capcity as Stack, effectively doubling the capacity + _entries.Init(StackEntries.Capacity); // Give the heap entries the same initial capacity as Stack, effectively doubling the capacity return ref _entries.AddKeyAndGetEntryRef(key, 0); // add the new key to the entries with the 0 index in the entries } diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index 94d5820a..8e2d7b6f 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -93,7 +93,7 @@ public Type[] ArrayResize() public class SmallList_Switch_vs_AsSpan_ByRef_Access { /* - ## Baseline: hmm, why AsSpan is faster even if it is utilized only by half of the acces, the other part hits the heap? + ## Baseline: hmm, why AsSpan is faster even if it is utilized only by half of the access, the other part hits the heap? BenchmarkDotNet v0.15.0, Windows 11 (10.0.26100.4061/24H2/2024Update/HudsonValley) Intel Core i9-8950HK CPU 2.90GHz (Coffee Lake), 1 CPU, 12 logical and 6 physical cores From 4ec5f184d3f2853ac9cf725db049064cbc899d59 Mon Sep 17 00:00:00 2001 From: dadhi Date: Fri, 6 Jun 2025 17:01:03 +0200 Subject: [PATCH 23/32] @wip adding types to SmallMap --- src/FastExpressionCompiler/ImTools.cs | 198 +++--------------- .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 96 ++++----- ..._repeated_calls_to_ConcurrentDictionary.cs | 50 ----- 3 files changed, 80 insertions(+), 264 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index f30d893a..e04d3253 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -945,96 +945,6 @@ public ref TEntry AddKeyAndGetEntryRef(K key, int index) } } - // todo: @perf optimize with SIMD, ILP, loop-unrolling, etc. - /// Lookup for the K in the TStackEntries, first by calculating it hash with TEq and searching the hash in the TStackHashes - public static ref TEntry TryGetEntryRef_loop( - this ref TStackEntries entries, ref TStackHashes hashes, K key, out bool found, - TEq eq = default, Use _ = default) - where TEntry : struct, IEntry - where TEq : struct, IEq - where TStackHashes : struct, IStack - where TStackEntries : struct, IStack - { - Debug.Assert(hashes.Capacity == entries.Capacity, - "Expecting that the hashes and entries stacks have the same capacity"); - - var hash = eq.GetHashCode(key); - - for (var i = 0; i < hashes.Capacity; ++i) - { - var h = hashes.GetSurePresentItemRef(i); - if (h == hash) - { - ref var entry = ref entries.GetSurePresentItemRef(i); - if (found = eq.Equals(entry.Key, key)) - return ref entry; - } - } - - found = false; - return ref RefTools.GetNullRef(); - } - - /// Lookup for the K in the TStackEntries, first by calculating it hash with TEq and searching the hash in the TStackHashes - public static ref TEntry TryGetEntryRef_ILP( - this ref TStackEntries entries, ref TStackHashes hashes, K key, out bool found, - TEq eq = default, TCap cap = default, Use _ = default) - where TEntry : struct, IEntry - where TEq : struct, IEq - where TStackHashes : struct, IStack - where TStackEntries : struct, IStack - where TCap : struct, ISize4Plus - { - var hash = eq.GetHashCode(key); - - for (var i = 0; i < cap.Size; i += 4) - { - ref var h0 = ref hashes.GetSurePresentItemRef(i); - ref var h1 = ref hashes.GetSurePresentItemRef(i + 1); - ref var h2 = ref hashes.GetSurePresentItemRef(i + 2); - ref var h3 = ref hashes.GetSurePresentItemRef(i + 3); - - var match0 = h0 == hash; - var match1 = h1 == hash; - var match2 = h2 == hash; - var match3 = h3 == hash; - - if (!(match0 | match1 | match2 | match3)) - continue; - - if (match0) - { - ref var entry0 = ref entries.GetSurePresentItemRef(i); - if (found = eq.Equals(entry0.Key, key)) - return ref entry0; - } - - if (match1) - { - ref var entry1 = ref entries.GetSurePresentItemRef(i + 1); - if (found = eq.Equals(entry1.Key, key)) - return ref entry1; - } - - if (match2) - { - ref var entry2 = ref entries.GetSurePresentItemRef(i + 2); - if (found = eq.Equals(entry2.Key, key)) - return ref entry2; - } - - if (match3) - { - ref var entry3 = ref entries.GetSurePresentItemRef(i + 3); - if (found = eq.Equals(entry3.Key, key)) - return ref entry3; - } - } - - found = false; - return ref RefTools.GetNullRef(); - } - /// Lookup for the K in the TStackEntries, first by calculating it hash with TEq and searching the hash in the TStackHashes public static ref TEntry TryGetEntryRef( this ref TStackEntries entries, ref TStackHashes hashes, K key, out bool found, @@ -1078,67 +988,14 @@ public static ref TEntry TryGetEntryRef= 4) - { - for (var i = 0; i < hashes.Capacity; i += 4) - { - ref var h0 = ref hashes.GetSurePresentItemRef(i); - ref var h1 = ref hashes.GetSurePresentItemRef(i + 1); - ref var h2 = ref hashes.GetSurePresentItemRef(i + 2); - ref var h3 = ref hashes.GetSurePresentItemRef(i + 3); - - var match0 = h0 == hash; - var match1 = h1 == hash; - var match2 = h2 == hash; - var match3 = h3 == hash; - - if (!(match0 | match1 | match2 | match3)) - continue; - - if (match0) - { - ref var entry0 = ref entries.GetSurePresentItemRef(i); - if (found = eq.Equals(entry0.Key, key)) - return ref entry0; - } - - if (match1) - { - ref var entry1 = ref entries.GetSurePresentItemRef(i + 1); - if (found = eq.Equals(entry1.Key, key)) - return ref entry1; - } - - if (match2) - { - ref var entry2 = ref entries.GetSurePresentItemRef(i + 2); - if (found = eq.Equals(entry2.Key, key)) - return ref entry2; - } - - if (match3) - { - ref var entry3 = ref entries.GetSurePresentItemRef(i + 3); - if (found = eq.Equals(entry3.Key, key)) - return ref entry3; - } - } - } - else + for (var i = 0; i < hashes.Capacity; ++i) { - ref var h0 = ref hashes.GetSurePresentItemRef(0); - ref var h1 = ref hashes.GetSurePresentItemRef(1); - if (h0 == hash) - { - ref var entry0 = ref entries.GetSurePresentItemRef(0); - if (found = eq.Equals(entry0.Key, key)) - return ref entry0; - } - if (h1 == hash) + var h = hashes.GetSurePresentItemRef(i); + if (h == hash) { - ref var entry1 = ref entries.GetSurePresentItemRef(1); - if (found = eq.Equals(entry1.Key, key)) - return ref entry1; + ref var entry = ref entries.GetSurePresentItemRef(i); + if (found = eq.Equals(entry.Key, key)) + return ref entry; } } @@ -1150,10 +1007,12 @@ public static ref TEntry TryGetEntryRef [MethodImpl((MethodImplOptions)256)] - public static ref V AddOrGetValueRef( - this ref SmallMap, TEq, TStackEntries, TEntries> map, K key, out bool found) + public static ref V AddOrGetValueRef( + this ref SmallMap, TEq, TStackCap, TStackHashes, TStackEntries, TEntries> map, K key, out bool found) where TEq : struct, IEq - where TStackEntries : struct, IStack, TStackEntries> + where TStackCap : struct, ISize2Plus + where TStackHashes : struct, IStack + where TStackEntries : struct, IStack, TStackCap, TStackEntries> where TEntries : struct, IEntries, TEq> => ref map.AddOrGetEntryRef(key, out found).Value; @@ -1161,19 +1020,23 @@ public static ref V AddOrGetValueRef( /// Provides the performance in scenarios where you look for the present key, and using it, and if ABSENT then add the new one. /// So this method optimized NOT to look for the present item for the second time [MethodImpl((MethodImplOptions)256)] - public static ref V AddSureAbsentDefaultAndGetRef( - this ref SmallMap, TEq, TStackEntries, TEntries> map, K key) + public static ref V AddSureAbsentDefaultAndGetRef( + this ref SmallMap, TEq, TStackCap, TStackHashes, TStackEntries, TEntries> map, K key) where TEq : struct, IEq - where TStackEntries : struct, IStack, TStackEntries> + where TStackCap : struct, ISize2Plus + where TStackHashes : struct, IStack + where TStackEntries : struct, IStack, TStackCap, TStackEntries> where TEntries : struct, IEntries, TEq> => ref map.AddSureAbsentDefaultEntryAndGetRef(key).Value; /// Lookups for the stored entry by key. Returns the ref to the found entry.Value or the null ref [MethodImpl((MethodImplOptions)256)] - public static ref V TryGetValueRef( - this ref SmallMap, TEq, TStackEntries, TEntries> map, K key, out bool found) + public static ref V TryGetValueRef( + this ref SmallMap, TEq, TStackCap, TStackHashes, TStackEntries, TEntries> map, K key, out bool found) where TEq : struct, IEq - where TStackEntries : struct, IStack, TStackEntries> + where TStackCap : struct, ISize2Plus + where TStackHashes : struct, IStack + where TStackEntries : struct, IStack, TStackCap, TStackEntries> where TEntries : struct, IEntries, TEq> { ref var e = ref map.TryGetEntryRef(key, out found); @@ -1198,10 +1061,12 @@ public static ref V TryGetValueRef( /// /// [DebuggerDisplay("{Count} of {_e0}, {_e1}, {_e2}, {_e3}, ...")] -public struct SmallMap +public struct SmallMap where TEntry : struct, IEntry where TEq : struct, IEq - where TStackEntries : struct, IStack + where TStackCap : struct, ISize2Plus + where TStackHashes : struct, IStack + where TStackEntries : struct, IStack where TEntries : struct, IEntries { internal byte _capacityBitShift; @@ -1219,6 +1084,7 @@ public struct SmallMap internal TEntries _entries; #pragma warning restore IDE0044 #pragma warning disable CS0649 // Field 'SmallMap.Stack' is never assigned to, and will always have its default value + internal TStackHashes StackHashes; internal TStackEntries StackEntries; #pragma warning restore CS0649 @@ -1614,42 +1480,42 @@ internal int ResizeHashes(int indexMask) public struct SmallMap4() where TEq : struct, IEq { /// Map with 4 elements on stack and entries baked by the single array - public SmallMap, TEq, Stack4>, SmallMap.SingleArrayEntries, TEq>> Map; + public SmallMap, TEq, Size4, Stack4, Stack4>, SmallMap.SingleArrayEntries, TEq>> Map; } /// Holds the Map with 8 items on stack. Minimizes the number of type arguments required to be specified public struct SmallMap8() where TEq : struct, IEq { /// Map with 8 elements on stack and entries baked by the single array - public SmallMap, TEq, Stack8>, SmallMap.SingleArrayEntries, TEq>> Map; + public SmallMap, TEq, Size8, Stack8, Stack8>, SmallMap.SingleArrayEntries, TEq>> Map; } /// Holds the Map with 16 items on stack. Minimizes the number of type arguments required to be specified public struct SmallMap16() where TEq : struct, IEq { /// Map with 16 elements on stack and entries baked by the single array - public SmallMap, TEq, Stack16>, SmallMap.SingleArrayEntries, TEq>> Map; + public SmallMap, TEq, Size16, Stack16, Stack16>, SmallMap.SingleArrayEntries, TEq>> Map; } /// Holds the Set with 4 items on stack. Minimizes the number of type arguments required to be specified public struct SmallSet4() where TEq : struct, IEq { /// Set with 4 keys on stack and entries baked by the single array - public SmallMap, TEq, Stack4>, SmallMap.SingleArrayEntries, TEq>> Set; + public SmallMap, TEq, Size4, Stack4, Stack4>, SmallMap.SingleArrayEntries, TEq>> Set; } /// Holds the Set with 8 items on stack. Minimizes the number of type arguments required to be specified public struct SmallSet8() where TEq : struct, IEq { /// Set with 8 keys on stack and entries baked by the single array - public SmallMap, TEq, Stack8>, SmallMap.SingleArrayEntries, TEq>> Set; + public SmallMap, TEq, Size8, Stack8, Stack8>, SmallMap.SingleArrayEntries, TEq>> Set; } /// Holds the Set with 16 items on stack. Minimizes the number of type arguments required to be specified public struct SmallSet16() where TEq : struct, IEq { /// Set with 16 keys on stack and entries baked by the single array - public SmallMap, TEq, Stack16>, SmallMap.SingleArrayEntries, TEq>> Set; + public SmallMap, TEq, Size16, Stack16, Stack16>, SmallMap.SingleArrayEntries, TEq>> Set; } #nullable restore \ No newline at end of file diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index 8e2d7b6f..5e3d8313 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -210,65 +210,65 @@ .NET SDK 9.0.203 DefaultJob : .NET 9.0.4 (9.0.425.16305), X64 RyuJIT AVX2 - | Method | Mean | Error | StdDev | Median | Ratio | RatioSD | Rank | BranchInstructions/Op | BranchMispredictions/Op | CacheMisses/Op | Allocated | Alloc Ratio | - |----------------- |---------:|---------:|---------:|---------:|------:|--------:|-----:|----------------------:|------------------------:|---------------:|----------:|------------:| - | Search_SIMD_loop | 46.65 ns | 0.763 ns | 0.637 ns | 46.84 ns | 1.00 | 0.02 | 1 | 103 | 0 | 0 | - | NA | - | Search_ILP_4 | 91.72 ns | 1.227 ns | 1.088 ns | 91.91 ns | 1.97 | 0.03 | 2 | 138 | 0 | 0 | - | NA | - | Search_loop | 96.71 ns | 1.975 ns | 4.499 ns | 94.53 ns | 2.07 | 0.10 | 2 | 274 | 0 | 0 | - | NA | + | Method | Mean | Error | StdDev | Median | Ratio | RatioSD | Rank | BranchInstructions/Op | BranchMispredictions/Op | CacheMisses/Op | Allocated | Alloc Ratio | + |-------------- |---------:|---------:|---------:|---------:|------:|--------:|-----:|----------------------:|------------------------:|---------------:|----------:|------------:| + | Search_SIMD | 46.65 ns | 0.763 ns | 0.637 ns | 46.84 ns | 1.00 | 0.02 | 1 | 103 | 0 | 0 | - | NA | + | Search_ILP_4 | 91.72 ns | 1.227 ns | 1.088 ns | 91.91 ns | 1.97 | 0.03 | 2 | 138 | 0 | 0 | - | NA | + | Search_loop | 96.71 ns | 1.975 ns | 4.499 ns | 94.53 ns | 2.07 | 0.10 | 2 | 274 | 0 | 0 | - | NA | */ - [Benchmark] - public int Search_loop() - { - Stack8 hashes = default; - Stack8> entries = default; + // [Benchmark] + // public int Search_loop() + // { + // Stack8 hashes = default; + // Stack8> entries = default; - for (var n = 0; n < 8; ++n) - { - hashes.GetSurePresentItemRef(n) = default(IntEq).GetHashCode(n); - entries.GetSurePresentItemRef(n) = new SmallMap.Entry(n); - } + // for (var n = 0; n < 8; ++n) + // { + // hashes.GetSurePresentItemRef(n) = default(IntEq).GetHashCode(n); + // entries.GetSurePresentItemRef(n) = new SmallMap.Entry(n); + // } - var sum = 0; - for (var i = 12; i >= -4; --i) - { - ref var e = ref entries.TryGetEntryRef_loop( - ref hashes, i, out var found, - default(IntEq), default(Use>)); - if (found) - sum += e.Key; - } + // var sum = 0; + // for (var i = 12; i >= -4; --i) + // { + // ref var e = ref entries.TryGetEntryRef_loop( + // ref hashes, i, out var found, + // default(IntEq), default(Use>)); + // if (found) + // sum += e.Key; + // } - return sum; - } + // return sum; + // } - [Benchmark] - public int Search_ILP_4() - { - Stack8 hashes = default; - Stack8> entries = default; + // [Benchmark] + // public int Search_ILP_4() + // { + // Stack8 hashes = default; + // Stack8> entries = default; - for (var n = 0; n < 8; ++n) - { - hashes.GetSurePresentItemRef(n) = default(IntEq).GetHashCode(n); - entries.GetSurePresentItemRef(n) = new SmallMap.Entry(n); - } + // for (var n = 0; n < 8; ++n) + // { + // hashes.GetSurePresentItemRef(n) = default(IntEq).GetHashCode(n); + // entries.GetSurePresentItemRef(n) = new SmallMap.Entry(n); + // } - var sum = 0; - for (var i = 12; i >= -4; --i) - { - ref var e = ref entries.TryGetEntryRef_ILP( - ref hashes, i, out var found, - default(IntEq), default(Size8), default(Use>)); - if (found) - sum += e.Key; - } + // var sum = 0; + // for (var i = 12; i >= -4; --i) + // { + // ref var e = ref entries.TryGetEntryRef_ILP( + // ref hashes, i, out var found, + // default(IntEq), default(Size8), default(Use>)); + // if (found) + // sum += e.Key; + // } - return sum; - } + // return sum; + // } [Benchmark(Baseline = true)] - public int Search_SIMD_loop() + public int Search_SIMD() { Stack8 hashes = default; Stack8> entries = default; diff --git a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs index 3b1c5e03..bca5b1fd 100644 --- a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs +++ b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs @@ -17,8 +17,6 @@ public struct Issue476_System_ExecutionEngineException_with_nullables_on_repeate public void Run(TestRun t) { TestSmallMap_Lookup_SIMD(t); - TestSmallMap_Lookup_ILP(t); - TestSmallMap_Lookup_loop(t); TestSmallList(t); Original_case(t); } @@ -66,54 +64,6 @@ public void TestSmallList(TestContext t) t.AreEqual(56, doubleSum); } - public void TestSmallMap_Lookup_loop(TestContext t) - { - Stack8 hashes = default; - Stack8> entries = default; - - for (var n = 0; n < 8; ++n) - { - hashes.GetSurePresentItemRef(n) = default(IntEq).GetHashCode(n); - entries.GetSurePresentItemRef(n) = new SmallMap.Entry(n); - } - - var sum = 0; - for (var i = 12; i >= -4; --i) - { - ref var e = ref entries.TryGetEntryRef_loop( - ref hashes, i, out var found, default(IntEq), - default(Use>)); - if (found) - sum += e.Key; - } - - t.AreEqual(28, sum); - } - - public void TestSmallMap_Lookup_ILP(TestContext t) - { - Stack8 hashes = default; - Stack8> entries = default; - - for (var n = 0; n < 8; ++n) - { - hashes.GetSurePresentItemRef(n) = default(IntEq).GetHashCode(n); - entries.GetSurePresentItemRef(n) = new SmallMap.Entry(n); - } - - var sum = 0; - for (var i = 12; i >= -4; --i) - { - ref var e = ref entries.TryGetEntryRef_ILP( - ref hashes, i, out var found, - default(IntEq), default(Size8), default(Use>)); - if (found) - sum += e.Key; - } - - t.AreEqual(28, sum); - } - public void TestSmallMap_Lookup_SIMD(TestContext t) { Stack8 hashes = default; From a723711eb2ea3b052a062d152958d871d29eacbe Mon Sep 17 00:00:00 2001 From: dadhi Date: Sun, 8 Jun 2025 10:46:40 +0200 Subject: [PATCH 24/32] abstractong things @wip --- src/FastExpressionCompiler/ImTools.cs | 94 +++++++++++-------- .../ArrayCopy_vs_ArrayResize_vs_ForLoop.cs | 2 +- ..._repeated_calls_to_ConcurrentDictionary.cs | 2 +- 3 files changed, 57 insertions(+), 41 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index e04d3253..712bc4ee 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -158,6 +158,24 @@ public static ref T GetSurePresentItemRef(this T[] items, int index) #endif } + /// Get the item by-ref without bounds check + [MethodImpl((MethodImplOptions)256)] + public static ref T GetItemRef( +#if NET7_0_OR_GREATER + this ref T first, int index) where T : struct => ref Unsafe.Add(ref first, index); +#else + this T[] first, int index) where T : struct => ref first[index]; +#endif + + /// Get the item without bounds check + [MethodImpl((MethodImplOptions)256)] + internal static T GetItem( +#if NET7_0_OR_GREATER + this ref T start, int index) where T : struct => Unsafe.Add(ref start, index); +#else + this T[] start, int index) => start[index]; +#endif + // todo: @perf add the not null variant /// Appends the new default item to the list and returns ref to it for write or read [MethodImpl((MethodImplOptions)256)] @@ -881,19 +899,19 @@ public struct Entry : IEntry /// Binary representation of the `int` public static string ToB(int x) => System.Convert.ToString(x, 2).PadLeft(32, '0'); - [MethodImpl((MethodImplOptions)256)] -#if NET7_0_OR_GREATER - internal static ref int NextHashRef(ref int start, int distance) => ref Unsafe.Add(ref start, distance); -#else - internal static ref int NextHashRef(ref int[] start, int distance) => ref start[distance]; -#endif + // [MethodImpl((MethodImplOptions)256)] + // #if NET7_0_OR_GREATER + // internal static ref int NextHashRef(ref int start, int distance) => ref Unsafe.Add(ref start, distance); + // #else + // internal static ref int NextHashRef(ref int[] start, int distance) => ref start[distance]; + // #endif - [MethodImpl((MethodImplOptions)256)] -#if NET7_0_OR_GREATER - internal static int NextHash(ref int start, int distance) => Unsafe.Add(ref start, distance); -#else - internal static int NextHash(ref int[] start, int distance) => start[distance]; -#endif + // [MethodImpl((MethodImplOptions)256)] + // #if NET7_0_OR_GREATER + // internal static int NextHash(ref int start, int distance) => Unsafe.Add(ref start, distance); + // #else + // internal static int NextHash(ref int[] start, int distance) => start[distance]; + // #endif /// Abstraction to configure your own entries data structure. Check the derived types for the examples public interface IEntries @@ -947,7 +965,7 @@ public ref TEntry AddKeyAndGetEntryRef(K key, int index) /// Lookup for the K in the TStackEntries, first by calculating it hash with TEq and searching the hash in the TStackHashes public static ref TEntry TryGetEntryRef( - this ref TStackEntries entries, ref TStackHashes hashes, K key, out bool found, + this ref TStackEntries entries, ref TStackHashes hashes, int count, K key, out bool found, TEq eq = default, TCap cap = default, Use _ = default) where TEntry : struct, IEntry where TEq : struct, IEq @@ -955,10 +973,12 @@ public static ref TEntry TryGetEntryRef where TCap : struct, ISize2Plus { + Debug.Assert(count <= cap.Size, $"SmallMap.TryGetEntryRef: count {count} should be <= stack capacity {cap.Size}"); + var hash = eq.GetHashCode(key); #if NET8_0_OR_GREATER - if (cap.Size >= 8 & Vector256.IsHardwareAccelerated) + if (count >= 8 & cap.Size >= 8 & Vector256.IsHardwareAccelerated) { var vHash = Vector256.Create(hash); var vHashes = MemoryMarshal.Cast>(hashes.AsSpan()); @@ -1084,7 +1104,7 @@ public struct SmallMap.Stack' is never assigned to, and will always have its default value - internal TStackHashes StackHashes; + internal TStackHashes _stackHashes; internal TStackEntries StackEntries; #pragma warning restore CS0649 @@ -1143,7 +1163,7 @@ private ref TEntry AddOrGetRefInEntries(K key, out bool found) #else var hashesAndIndexes = _packedHashesAndIndexes; #endif - ref var h = ref NextHashRef(ref hashesAndIndexes, hashIndex); + ref var h = ref hashesAndIndexes.GetItemRef(hashIndex); // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions var probes = 1; @@ -1156,7 +1176,7 @@ private ref TEntry AddOrGetRefInEntries(K key, out bool found) if (found = default(TEq).Equals(e.Key, key)) return ref e; } - h = ref NextHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); + h = ref hashesAndIndexes.GetItemRef(++hashIndex & indexMask); ++probes; } found = false; @@ -1170,7 +1190,7 @@ private ref TEntry AddOrGetRefInEntries(K key, out bool found) probes = hRobinHooded >>> ProbeCountShift; while (hRobinHooded != 0) { - h = ref NextHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); + h = ref hashesAndIndexes.GetItemRef(++hashIndex & indexMask); if ((h >>> ProbeCountShift) < ++probes) { var tmp = h; @@ -1193,11 +1213,11 @@ private void AddJustHashAndEntryIndexWithoutResizing(int hash, int index) var hashesAndIndexes = _packedHashesAndIndexes; #endif // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions - ref var h = ref NextHashRef(ref hashesAndIndexes, hashIndex); + ref var h = ref hashesAndIndexes.GetItemRef(hashIndex); var probes = 1; while ((h >>> ProbeCountShift) >= probes) { - h = ref NextHashRef(ref hashesAndIndexes, ++hashIndex & IndexMask); + h = ref hashesAndIndexes.GetItemRef(++hashIndex & IndexMask); ++probes; } @@ -1210,7 +1230,7 @@ private void AddJustHashAndEntryIndexWithoutResizing(int hash, int index) probes = hRobinHooded >>> ProbeCountShift; while (hRobinHooded != 0) { - h = ref NextHashRef(ref hashesAndIndexes, ++hashIndex & IndexMask); + h = ref hashesAndIndexes.GetItemRef(++hashIndex & IndexMask); if ((h >>> ProbeCountShift) < ++probes) { var tmp = h; @@ -1229,20 +1249,16 @@ public ref TEntry AddOrGetEntryRef(K key, out bool found) if (_count > StackEntries.Capacity) return ref AddOrGetRefInEntries(key, out found); - // Linear search in stack (which has a few items) by comparing the keys without calculating the hashes - // Saving on the hash calculation. Losing on the bigger number of comparisons. - for (var i = 0; i < _count; ++i) - { - ref var e = ref GetSurePresentEntryRef(i); - if (found = default(TEq).Equals(e.Key, key)) - return ref e; - } - found = false; + ref var e = ref StackEntries.TryGetEntryRef(ref _stackHashes, _count, key, out found, + default(TEq), default(TStackCap), default(Use)); + if (found) + return ref e; // Add the new entry to the stack if there is still space in stack if (_count < StackEntries.Capacity) { var newIndex = _count++; + _stackHashes.GetSurePresentItemRef(newIndex) = default(TEq).GetHashCode(key); ref var newEntry = ref StackEntries.GetSurePresentItemRef(newIndex); newEntry.Key = key; return ref newEntry; @@ -1286,13 +1302,13 @@ private ref TEntry AddSureAbsentDefaultAndGetRefInEntries(K key) #else var hashesAndIndexes = _packedHashesAndIndexes; #endif - ref var h = ref NextHashRef(ref hashesAndIndexes, hashIndex); + ref var h = ref hashesAndIndexes.GetItemRef(hashIndex); // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions var probes = 1; while ((h >>> ProbeCountShift) >= probes) { - h = ref NextHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); + h = ref hashesAndIndexes.GetItemRef(++hashIndex & indexMask); ++probes; } @@ -1305,7 +1321,7 @@ private ref TEntry AddSureAbsentDefaultAndGetRefInEntries(K key) probes = hRobinHooded >>> ProbeCountShift; while (hRobinHooded != 0) { - h = ref NextHashRef(ref hashesAndIndexes, ++hashIndex & indexMask); + h = ref hashesAndIndexes.GetItemRef(++hashIndex & indexMask); if ((h >>> ProbeCountShift) < ++probes) { var tmp = h; @@ -1384,7 +1400,7 @@ internal ref TEntry TryGetRefInEntries(K key, out bool found) var hashesAndIndexes = _packedHashesAndIndexes; #endif - var h = NextHash(ref hashesAndIndexes, hashIndex); + var h = hashesAndIndexes.GetItem(hashIndex); // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions var probes = 1; @@ -1398,7 +1414,7 @@ internal ref TEntry TryGetRefInEntries(K key, out bool found) return ref e; } - h = NextHash(ref hashesAndIndexes, ++hashIndex & indexMask); + h = hashesAndIndexes.GetItem(++hashIndex & indexMask); ++probes; } @@ -1445,7 +1461,7 @@ internal int ResizeHashes(int indexMask) // Overflow segment is wrapped-around hashes and! the hashes at the beginning robin hooded by the wrapped-around hashes var i = 0; while ((oldHash >>> ProbeCountShift) > 1) - oldHash = NextHash(ref oldHashes, ++i); + oldHash = oldHashes.GetItem(++i); var oldCapacityWithOverflowSegment = i + oldCapacity; while (true) @@ -1457,10 +1473,10 @@ internal int ResizeHashes(int indexMask) // no need for robin-hooding because we already did it for the old hashes and now just filling the hashes into the new array which are already in order var probes = 1; - ref var newHash = ref NextHashRef(ref newHashes, indexWithNextBit); + ref var newHash = ref newHashes.GetItemRef(indexWithNextBit); while (newHash != 0) { - newHash = ref NextHashRef(ref newHashes, ++indexWithNextBit & newIndexMask); + newHash = ref newHashes.GetItemRef(++indexWithNextBit & newIndexMask); ++probes; } newHash = (probes << ProbeCountShift) | (oldHash & newHashAndIndexMask); @@ -1468,7 +1484,7 @@ internal int ResizeHashes(int indexMask) if (++i >= oldCapacityWithOverflowSegment) break; - oldHash = NextHash(ref oldHashes, i & indexMask); + oldHash = oldHashes.GetItem(i & indexMask); } ++_capacityBitShift; _packedHashesAndIndexes = newHashesAndIndexes; diff --git a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs index 5e3d8313..8b600422 100644 --- a/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs +++ b/test/FastExpressionCompiler.Benchmarks/ArrayCopy_vs_ArrayResize_vs_ForLoop.cs @@ -283,7 +283,7 @@ public int Search_SIMD() for (var i = 12; i >= -4; --i) { ref var e = ref entries.TryGetEntryRef( - ref hashes, i, out var found, + ref hashes, 8, i, out var found, default(IntEq), default(Size8), default(Use>)); if (found) sum += e.Key; diff --git a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs index bca5b1fd..84622d5f 100644 --- a/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs +++ b/test/FastExpressionCompiler.IssueTests/Issue476_System_ExecutionEngineException_with_nullables_on_repeated_calls_to_ConcurrentDictionary.cs @@ -79,7 +79,7 @@ public void TestSmallMap_Lookup_SIMD(TestContext t) for (var i = 12; i >= -4; --i) { ref var e = ref entries.TryGetEntryRef( - ref hashes, i, out var found, + ref hashes, 8, i, out var found, default(IntEq), default(Size8), default(Use>)); if (found) sum += e.Key; From 32cbe18ca4a6d4f678263bd06a7c751bb7f0ae91 Mon Sep 17 00:00:00 2001 From: dadhi Date: Sun, 8 Jun 2025 11:24:44 +0200 Subject: [PATCH 25/32] using TryGetEntryRef everwhere --- src/FastExpressionCompiler/ImTools.cs | 84 ++++++++++++--------------- 1 file changed, 36 insertions(+), 48 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 712bc4ee..36c03d79 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -149,31 +149,27 @@ public static ref T GetSurePresentItemRef(this ref SmallList source, int i /// Returns surely present item ref by its index without boundary checks [MethodImpl((MethodImplOptions)256)] - public static ref T GetSurePresentItemRef(this T[] items, int index) - { + public static ref T GetSurePresentItemRef(this T[] source, int index) => #if SUPPORTS_UNSAFE - return ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(items), index); + ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(source), index); #else - return ref items[index]; + ref source[index]; #endif - } +#if NET7_0_OR_GREATER /// Get the item by-ref without bounds check [MethodImpl((MethodImplOptions)256)] - public static ref T GetItemRef( -#if NET7_0_OR_GREATER - this ref T first, int index) where T : struct => ref Unsafe.Add(ref first, index); -#else - this T[] first, int index) where T : struct => ref first[index]; + public static ref T GetSurePresentItemRef(this ref T source, int index) where T : struct => + ref Unsafe.Add(ref source, index); #endif /// Get the item without bounds check [MethodImpl((MethodImplOptions)256)] - internal static T GetItem( + internal static T GetSurePresentItem( #if NET7_0_OR_GREATER - this ref T start, int index) where T : struct => Unsafe.Add(ref start, index); + this ref T source, int index) where T : struct => Unsafe.Add(ref source, index); #else - this T[] start, int index) => start[index]; + this T[] source, int index) => source[index]; #endif // todo: @perf add the not null variant @@ -964,6 +960,7 @@ public ref TEntry AddKeyAndGetEntryRef(K key, int index) } /// Lookup for the K in the TStackEntries, first by calculating it hash with TEq and searching the hash in the TStackHashes + [MethodImpl((MethodImplOptions)256)] public static ref TEntry TryGetEntryRef( this ref TStackEntries entries, ref TStackHashes hashes, int count, K key, out bool found, TEq eq = default, TCap cap = default, Use _ = default) @@ -1161,9 +1158,9 @@ private ref TEntry AddOrGetRefInEntries(K key, out bool found) #if NET7_0_OR_GREATER ref var hashesAndIndexes = ref MemoryMarshal.GetArrayDataReference(_packedHashesAndIndexes); #else - var hashesAndIndexes = _packedHashesAndIndexes; + ref var hashesAndIndexes = ref _packedHashesAndIndexes; #endif - ref var h = ref hashesAndIndexes.GetItemRef(hashIndex); + ref var h = ref hashesAndIndexes.GetSurePresentItemRef(hashIndex); // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions var probes = 1; @@ -1176,7 +1173,7 @@ private ref TEntry AddOrGetRefInEntries(K key, out bool found) if (found = default(TEq).Equals(e.Key, key)) return ref e; } - h = ref hashesAndIndexes.GetItemRef(++hashIndex & indexMask); + h = ref hashesAndIndexes.GetSurePresentItemRef(++hashIndex & indexMask); ++probes; } found = false; @@ -1190,7 +1187,7 @@ private ref TEntry AddOrGetRefInEntries(K key, out bool found) probes = hRobinHooded >>> ProbeCountShift; while (hRobinHooded != 0) { - h = ref hashesAndIndexes.GetItemRef(++hashIndex & indexMask); + h = ref hashesAndIndexes.GetSurePresentItemRef(++hashIndex & indexMask); if ((h >>> ProbeCountShift) < ++probes) { var tmp = h; @@ -1213,11 +1210,11 @@ private void AddJustHashAndEntryIndexWithoutResizing(int hash, int index) var hashesAndIndexes = _packedHashesAndIndexes; #endif // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions - ref var h = ref hashesAndIndexes.GetItemRef(hashIndex); + ref var h = ref hashesAndIndexes.GetSurePresentItemRef(hashIndex); var probes = 1; while ((h >>> ProbeCountShift) >= probes) { - h = ref hashesAndIndexes.GetItemRef(++hashIndex & IndexMask); + h = ref hashesAndIndexes.GetSurePresentItemRef(++hashIndex & IndexMask); ++probes; } @@ -1230,7 +1227,7 @@ private void AddJustHashAndEntryIndexWithoutResizing(int hash, int index) probes = hRobinHooded >>> ProbeCountShift; while (hRobinHooded != 0) { - h = ref hashesAndIndexes.GetItemRef(++hashIndex & IndexMask); + h = ref hashesAndIndexes.GetSurePresentItemRef(++hashIndex & IndexMask); if ((h >>> ProbeCountShift) < ++probes) { var tmp = h; @@ -1302,13 +1299,13 @@ private ref TEntry AddSureAbsentDefaultAndGetRefInEntries(K key) #else var hashesAndIndexes = _packedHashesAndIndexes; #endif - ref var h = ref hashesAndIndexes.GetItemRef(hashIndex); + ref var h = ref hashesAndIndexes.GetSurePresentItemRef(hashIndex); // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions var probes = 1; while ((h >>> ProbeCountShift) >= probes) { - h = ref hashesAndIndexes.GetItemRef(++hashIndex & indexMask); + h = ref hashesAndIndexes.GetSurePresentItemRef(++hashIndex & indexMask); ++probes; } @@ -1321,7 +1318,7 @@ private ref TEntry AddSureAbsentDefaultAndGetRefInEntries(K key) probes = hRobinHooded >>> ProbeCountShift; while (hRobinHooded != 0) { - h = ref hashesAndIndexes.GetItemRef(++hashIndex & indexMask); + h = ref hashesAndIndexes.GetSurePresentItemRef(++hashIndex & indexMask); if ((h >>> ProbeCountShift) < ++probes) { var tmp = h; @@ -1338,7 +1335,6 @@ private ref TEntry AddSureAbsentDefaultAndGetRefInEntries(K key) /// Provides the performance in scenarios where you look for the present key, and using it, and if ABSENT then add the new one. /// So this method optimized NOT to look for the present item for the second time [UnscopedRef] - [MethodImpl((MethodImplOptions)256)] public ref TEntry AddSureAbsentDefaultEntryAndGetRef(K key) { if (_count > StackEntries.Capacity) @@ -1348,6 +1344,7 @@ public ref TEntry AddSureAbsentDefaultEntryAndGetRef(K key) if (_count < StackEntries.Capacity) { var newIndex = _count++; + _stackHashes.GetSurePresentItemRef(newIndex) = default(TEq).GetHashCode(key); ref var newEntry = ref StackEntries.GetSurePresentItemRef(newIndex); newEntry.Key = key; return ref newEntry; @@ -1367,20 +1364,19 @@ public ref TEntry AddSureAbsentDefaultEntryAndGetRef(K key) } /// Lookups for the stored key. If found true, otherwise false - [MethodImpl((MethodImplOptions)256)] public bool ContainsKey(K key) { if (_count > StackEntries.Capacity) { - TryGetRefInEntries(key, out var found); + _ = TryGetRefInEntries(key, out var found); + return found; + } + else + { + _ = ref StackEntries.TryGetEntryRef(ref _stackHashes, _count, key, out var found, + default(TEq), default(TStackCap), default(Use)); return found; } - - for (var i = 0; i < _count; ++i) - if (default(TEq).Equals(key, GetSurePresentEntryRef(i).Key)) - return true; - - return false; } [UnscopedRef] @@ -1400,7 +1396,7 @@ internal ref TEntry TryGetRefInEntries(K key, out bool found) var hashesAndIndexes = _packedHashesAndIndexes; #endif - var h = hashesAndIndexes.GetItem(hashIndex); + var h = hashesAndIndexes.GetSurePresentItem(hashIndex); // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions var probes = 1; @@ -1414,7 +1410,7 @@ internal ref TEntry TryGetRefInEntries(K key, out bool found) return ref e; } - h = hashesAndIndexes.GetItem(++hashIndex & indexMask); + h = hashesAndIndexes.GetSurePresentItem(++hashIndex & indexMask); ++probes; } @@ -1424,21 +1420,13 @@ internal ref TEntry TryGetRefInEntries(K key, out bool found) /// Lookups for the stored entry by key. Returns the ref to the found entry or the null ref [UnscopedRef] - [MethodImpl((MethodImplOptions)256)] public ref TEntry TryGetEntryRef(K key, out bool found) { if (_count > StackEntries.Capacity) return ref TryGetRefInEntries(key, out found); - for (var i = 0; i < _count; ++i) - { - ref var e = ref GetSurePresentEntryRef(i); - if (found = default(TEq).Equals(key, e.Key)) - return ref e; - } - - found = false; - return ref RefTools.GetNullRef(); + return ref StackEntries.TryGetEntryRef(ref _stackHashes, _count, key, out found, + default(TEq), default(TStackCap), default(Use)); } internal int ResizeHashes(int indexMask) @@ -1461,7 +1449,7 @@ internal int ResizeHashes(int indexMask) // Overflow segment is wrapped-around hashes and! the hashes at the beginning robin hooded by the wrapped-around hashes var i = 0; while ((oldHash >>> ProbeCountShift) > 1) - oldHash = oldHashes.GetItem(++i); + oldHash = oldHashes.GetSurePresentItem(++i); var oldCapacityWithOverflowSegment = i + oldCapacity; while (true) @@ -1473,10 +1461,10 @@ internal int ResizeHashes(int indexMask) // no need for robin-hooding because we already did it for the old hashes and now just filling the hashes into the new array which are already in order var probes = 1; - ref var newHash = ref newHashes.GetItemRef(indexWithNextBit); + ref var newHash = ref newHashes.GetSurePresentItemRef(indexWithNextBit); while (newHash != 0) { - newHash = ref newHashes.GetItemRef(++indexWithNextBit & newIndexMask); + newHash = ref newHashes.GetSurePresentItemRef(++indexWithNextBit & newIndexMask); ++probes; } newHash = (probes << ProbeCountShift) | (oldHash & newHashAndIndexMask); @@ -1484,7 +1472,7 @@ internal int ResizeHashes(int indexMask) if (++i >= oldCapacityWithOverflowSegment) break; - oldHash = oldHashes.GetItem(i & indexMask); + oldHash = oldHashes.GetSurePresentItem(i & indexMask); } ++_capacityBitShift; _packedHashesAndIndexes = newHashesAndIndexes; From d994700d0716f51cfd87ac2636b83153400f377c Mon Sep 17 00:00:00 2001 From: dadhi Date: Sun, 8 Jun 2025 11:56:29 +0200 Subject: [PATCH 26/32] nice capacity boundaries --- src/FastExpressionCompiler/ImTools.cs | 37 +++++++++------------------ 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 36c03d79..22161c81 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -841,8 +841,9 @@ public interface Use { } public static class SmallMap { internal const byte MinFreeCapacityShift = 3; // e.g. for the capacity 16: 16 >> 3 => 2, 12.5% of the free hash slots (it does not mean the entries free slot) - internal const byte MinHashesCapacityBitShift = 4; // 1 << 4 == 16 - internal const int IndexMask = (1 << MinHashesCapacityBitShift) - 1; // 0b00000000000000000000000000001111 + internal const byte MinHashesCapacityBitShift = 3; // 1 << 3 == 8 + internal const byte DefaultHashesCapacityBitShift = 4; // 1 << 4 == 16, means the default capacity is 16 int hashes + internal const int IndexMask = (1 << DefaultHashesCapacityBitShift) - 1; // 0b00000000000000000000000000001111 /// Upper hash bits spent on storing the probes, e.g. 5 bits mean 31 probes max. public const byte ProbeBits = 5; internal const byte NotShiftedProbeCountMask = (1 << ProbeBits) - 1; // 0b00000000000000000000000000011111 @@ -895,20 +896,6 @@ public struct Entry : IEntry /// Binary representation of the `int` public static string ToB(int x) => System.Convert.ToString(x, 2).PadLeft(32, '0'); - // [MethodImpl((MethodImplOptions)256)] - // #if NET7_0_OR_GREATER - // internal static ref int NextHashRef(ref int start, int distance) => ref Unsafe.Add(ref start, distance); - // #else - // internal static ref int NextHashRef(ref int[] start, int distance) => ref start[distance]; - // #endif - - // [MethodImpl((MethodImplOptions)256)] - // #if NET7_0_OR_GREATER - // internal static int NextHash(ref int start, int distance) => Unsafe.Add(ref start, distance); - // #else - // internal static int NextHash(ref int[] start, int distance) => start[distance]; - // #endif - /// Abstraction to configure your own entries data structure. Check the derived types for the examples public interface IEntries where TEntry : struct, IEntry @@ -1098,12 +1085,12 @@ public struct SmallMap.Stack' is never assigned to, and will always have its default value internal TStackHashes _stackHashes; internal TStackEntries StackEntries; #pragma warning restore CS0649 +#pragma warning restore IDE0044 /// Capacity bits public int CapacityBitShift => _capacityBitShift; @@ -1120,12 +1107,12 @@ public struct SmallMapCapacity calculates as `1 leftShift capacityBitShift` public SmallMap(byte capacityBitShift) { - _capacityBitShift = capacityBitShift; + // Keep the capacity at least 8 for SIMD Vector256, etc., etc, if you need less space use Stack for that + _capacityBitShift = capacityBitShift < MinHashesCapacityBitShift ? MinHashesCapacityBitShift : capacityBitShift; - // the overflow tail to the hashes is the size of log2N where N==capacityBitShift, + // The overflow tail to the hashes is the size of log2N where N==capacityBitShift, // it is probably fine to have the check for the overflow of capacity because it will be mis-predicted only once at the end of loop (it even rarely for the lookup) _packedHashesAndIndexes = new int[1 << capacityBitShift]; - _entries = default; _entries.Init(capacityBitShift); } @@ -1270,8 +1257,8 @@ public ref TEntry AddOrGetEntryRef(K key, out bool found) // So the values on the stack are guarantied to be stable from the beginning of the map creation, // because they are not copied when the Entries need to Resize (depending on the TEntries implementation). - _capacityBitShift = MinHashesCapacityBitShift; - _packedHashesAndIndexes = new int[1 << MinHashesCapacityBitShift]; + _capacityBitShift = DefaultHashesCapacityBitShift; + _packedHashesAndIndexes = new int[1 << DefaultHashesCapacityBitShift]; for (var i = 0; i < StackEntries.Capacity; ++i) AddJustHashAndEntryIndexWithoutResizing(default(TEq).GetHashCode(GetSurePresentEntryRef(i).Key), i); @@ -1350,8 +1337,8 @@ public ref TEntry AddSureAbsentDefaultEntryAndGetRef(K key) return ref newEntry; } - _capacityBitShift = MinHashesCapacityBitShift; - _packedHashesAndIndexes = new int[1 << MinHashesCapacityBitShift]; + _capacityBitShift = DefaultHashesCapacityBitShift; + _packedHashesAndIndexes = new int[1 << DefaultHashesCapacityBitShift]; for (var i = 0; i < StackEntries.Capacity; ++i) AddJustHashAndEntryIndexWithoutResizing(default(TEq).GetHashCode(GetSurePresentEntryRef(i).Key), i); From 3f9a130433d37a77b6fd7eb7c369986b8b949964 Mon Sep 17 00:00:00 2001 From: dadhi Date: Sun, 8 Jun 2025 18:18:53 +0200 Subject: [PATCH 27/32] @wip tryng the padding --- src/FastExpressionCompiler/ImTools.cs | 81 ++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 22161c81..f4ddbda2 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -1270,6 +1270,43 @@ public ref TEntry AddOrGetEntryRef(K key, out bool found) return ref _entries.AddKeyAndGetEntryRef(key, 0); // add the new key to the entries with the 0 index in the entries } + /* + Insertion step by step: + + 1. Initially the map is empty. Its capacity mask is 7: + + Index: 0 1 2 3 4 5 6 7 + Hash: [0] [0] [0] [0] [0] [0] [0] [0] + + 2. Insert that key A with the hash 13, which is 0b0011_0101. 13 & 7 Mask = 5, so the index is 5. + + Index: 0 1 2 3 4 5 6 7 + Hash: [0] [0] [0] [0] [0] [13] [0] [0] + Probe: 1A + + 3. Insert that key B with the hash 5, which is 0b0000_1011. 5 & 7 Mask = 5, so the index is again 5. + + Index: 0 1 2 3 4 5 6 7 + Hash: [0] [0] [0] [0] [0] [13] [5] [0] + Probe 1A 2B + + 4. Insert that key C with the hash 7, which is 0b0010_0101. 7 & 7 Mask = 7, so the index is 7. + + Index: 0 1 2 3 4 5 6 7 + Hash: [0] [0] [0] [0] [0] [13] [5] [7] + Probe: 1A 2B 1C + + 5. Insert that key D with the hash 21, which is 0b0101_0101. 21 & 7 Mask = 5, so the index is again again 5. + + Index: 0 1 2 3 4 5 6 7 + Hash: [7] [0] [0] [0] [0] [13] [5] [21] + Probe: 2C 1A 2B 3D + // todo: @perf @wip just an idea + 5 (with padding): + Index: 0 1 2 3 4 5 6 7 | 8 9 10 11 + Hash: [7] [0] [0] [0] [0] [13] [5] [21]| [7] [0] [0] [0] + Probe: 2C 1A 2B 3D | 2C + */ [UnscopedRef] private ref TEntry AddSureAbsentDefaultAndGetRefInEntries(K key) { @@ -1405,13 +1442,53 @@ internal ref TEntry TryGetRefInEntries(K key, out bool found) return ref RefTools.GetNullRef(); } - /// Lookups for the stored entry by key. Returns the ref to the found entry or the null ref + // todo: @wip + [UnscopedRef] + [MethodImpl((MethodImplOptions)256)] + internal ref TEntry TryGetRefInEntries2(K key, out bool found) + { + var hash = default(TEq).GetHashCode(key); + + var indexMask = (1 << _capacityBitShift) - 1; + var hashMiddleMask = HashAndIndexMask & ~indexMask; + var hashMiddle = hash & hashMiddleMask; + var hashIndex = hash & indexMask; + +#if NET7_0_OR_GREATER + ref var hashesAndIndexes = ref MemoryMarshal.GetArrayDataReference(_packedHashesAndIndexes); +#else + var hashesAndIndexes = _packedHashesAndIndexes; +#endif + + var h = hashesAndIndexes.GetSurePresentItem(hashIndex); + + // 1. Skip over hashes with the bigger and equal probes. The hashes with bigger probes overlapping from the earlier ideal positions + var probes = 1; + + while ((h >>> ProbeCountShift) >= probes) + { + // 2. For the equal probes check for equality the hash middle part, then check the entry + if (((h >>> ProbeCountShift) == probes) & ((h & hashMiddleMask) == hashMiddle)) + { + ref var e = ref GetSurePresentEntryRef(h & indexMask); + if (found = default(TEq).Equals(e.Key, key)) + return ref e; + } + + h = hashesAndIndexes.GetSurePresentItem(++hashIndex & indexMask); + ++probes; + } + + found = false; + return ref RefTools.GetNullRef(); + } + + /// Lookup for the stored entry by key. Returns the ref to the found entry or the null ref [UnscopedRef] public ref TEntry TryGetEntryRef(K key, out bool found) { if (_count > StackEntries.Capacity) return ref TryGetRefInEntries(key, out found); - return ref StackEntries.TryGetEntryRef(ref _stackHashes, _count, key, out found, default(TEq), default(TStackCap), default(Use)); } From d1c6c1988db5e9bb11f32aab0a2946d2ba014c7f Mon Sep 17 00:00:00 2001 From: dadhi Date: Mon, 9 Jun 2025 13:44:20 +0200 Subject: [PATCH 28/32] cleanup --- src/FastExpressionCompiler/ImTools.cs | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index f4ddbda2..79307d71 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -165,11 +165,10 @@ public static ref T GetSurePresentItemRef(this ref T source, int index) where /// Get the item without bounds check [MethodImpl((MethodImplOptions)256)] - internal static T GetSurePresentItem( #if NET7_0_OR_GREATER - this ref T source, int index) where T : struct => Unsafe.Add(ref source, index); + internal static T GetSurePresentItem(this ref T source, int index) where T : struct => Unsafe.Add(ref source, index); #else - this T[] source, int index) => source[index]; + internal static T GetSurePresentItem(this T[] source, int index) => source[index]; #endif // todo: @perf add the not null variant @@ -261,10 +260,8 @@ public interface IStack /// Creates a span over the stack items public Span AsSpan(); #endif - } -// todo: @wip /// Base marker for collection or container holding some number of items public interface ISize { @@ -1301,11 +1298,6 @@ Probe 1A 2B Index: 0 1 2 3 4 5 6 7 Hash: [7] [0] [0] [0] [0] [13] [5] [21] Probe: 2C 1A 2B 3D - // todo: @perf @wip just an idea - 5 (with padding): - Index: 0 1 2 3 4 5 6 7 | 8 9 10 11 - Hash: [7] [0] [0] [0] [0] [13] [5] [21]| [7] [0] [0] [0] - Probe: 2C 1A 2B 3D | 2C */ [UnscopedRef] private ref TEntry AddSureAbsentDefaultAndGetRefInEntries(K key) @@ -1442,7 +1434,7 @@ internal ref TEntry TryGetRefInEntries(K key, out bool found) return ref RefTools.GetNullRef(); } - // todo: @wip + // todo: @wip @remove [UnscopedRef] [MethodImpl((MethodImplOptions)256)] internal ref TEntry TryGetRefInEntries2(K key, out bool found) From 2ff603c5508dd5beb2c11fd73e036139d093a1b4 Mon Sep 17 00:00:00 2001 From: dadhi Date: Mon, 9 Jun 2025 16:58:23 +0200 Subject: [PATCH 29/32] fixe for the count --- src/FastExpressionCompiler/ImTools.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 79307d71..d4989b46 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -982,6 +982,8 @@ public static ref TEntry TryGetEntryRef.Count; + if (i >= count) + break; } found = false; From 610063cf7556f1b748785d4652ddfdcea6841f08 Mon Sep 17 00:00:00 2001 From: dadhi Date: Tue, 10 Jun 2025 17:25:58 +0200 Subject: [PATCH 30/32] adding Size0 --- src/FastExpressionCompiler/ImTools.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index d4989b46..864f2ab1 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -277,6 +277,13 @@ public interface ISize8Plus : ISize4Plus { } /// Marker for collection or container holding 16 or more items public interface ISize16Plus : ISize8Plus { } +/// Marker for collection or container holding 0 items +public struct Size0 : ISize +{ + /// + public int Size => 0; +} + /// Marker for collection or container holding 4 items public struct Size2 : ISize2Plus { From 0f231e98f761d0b9f617312396fa97e3ff427c85 Mon Sep 17 00:00:00 2001 From: dadhi Date: Tue, 10 Jun 2025 17:31:06 +0200 Subject: [PATCH 31/32] tbd --- src/FastExpressionCompiler/ILReader.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/FastExpressionCompiler/ILReader.cs b/src/FastExpressionCompiler/ILReader.cs index 0f93fc48..e31495c2 100644 --- a/src/FastExpressionCompiler/ILReader.cs +++ b/src/FastExpressionCompiler/ILReader.cs @@ -328,6 +328,7 @@ internal ILInstruction(int offset, OpCode opCode) } } +//todo: @wip APL/DOA like modeling of IL instructions // internal struct BaseIL // { // public OperandType OperandType; @@ -351,7 +352,6 @@ internal ILInstruction(int offset, OpCode opCode) // public int ExtraOpItemIndex; // } -//todo: @wip ///Data-oriented structure SOA of IL instructions. // internal struct ILs // { From de17e430cadeb8991eed8a7c5570849e08713970 Mon Sep 17 00:00:00 2001 From: dadhi Date: Tue, 10 Jun 2025 17:40:05 +0200 Subject: [PATCH 32/32] the fix for the small TryGetRef --- src/FastExpressionCompiler/ImTools.cs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/FastExpressionCompiler/ImTools.cs b/src/FastExpressionCompiler/ImTools.cs index 864f2ab1..97964e8a 100644 --- a/src/FastExpressionCompiler/ImTools.cs +++ b/src/FastExpressionCompiler/ImTools.cs @@ -96,10 +96,8 @@ public static class SmallList internal const int DefaultInitialCapacity = 4; [MethodImpl(MethodImplOptions.NoInlining)] - internal static ref T ThrowIndexOutOfBounds(int index, int count) - { + internal static ref T ThrowIndexOutOfBounds(int index, int count) => throw new IndexOutOfRangeException($"Index {index} is out of range of count {count} for SmallList<{typeof(T)},..>."); - } [MethodImpl((MethodImplOptions)256)] internal static void Expand(ref T[] items) @@ -998,7 +996,7 @@ public static ref TEntry TryGetEntryRef