Skip to content

Optimize ImmutableHashSet<T>.SetEquals to avoid unnecessary allocations#126309

Open
aw0lid wants to merge 1 commit intodotnet:mainfrom
aw0lid:fix-immutablehashset-setequals-allocs
Open

Optimize ImmutableHashSet<T>.SetEquals to avoid unnecessary allocations#126309
aw0lid wants to merge 1 commit intodotnet:mainfrom
aw0lid:fix-immutablehashset-setequals-allocs

Conversation

@aw0lid
Copy link
Copy Markdown

@aw0lid aw0lid commented Mar 30, 2026

Fixes #90986, Part of #127279

Summary

ImmutableHashSet<T>.SetEquals always creates a new intermediate HashSet<T> for the other collection, leading to avoidable allocations and GC pressure, especially for large datasets

Optimization Logic

  • O(1) Pre-Scan: Immediately returns false if other is an ICollection with a smaller Count, avoiding any overhead.
  • Fast-Path Pattern Matching: Detects ImmutableHashSet<T> and HashSet<T> to bypass intermediate allocations.
  • Comparer Guard: Validates EqualityComparer compatibility before triggering fast paths to ensure logical consistency.
  • Short-Circuit Validation: Re-validates Count within specialized paths for an immediate exit before $O(n)$ enumeration.
  • Reverse-Lookup Strategy: An architectural shift where the ImmutableHashSet (The Source) iterates and queries the other collection if was Hashset. This leverages the O(1) lookup of the HashSet instead of the O(log N) lookup of the immutable tree.
  • Zero-Allocation Execution: Direct iteration over compatible collections, eliminating the costly new HashSet<T>(other) fallback.
  • Deferred fallback: Reserves the expensive allocation solely for general IEnumerable types.
Click to expand Benchmark Source Code
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Order;
using BenchmarkDotNet.Running;
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;

namespace ImmutableHashSetBenchmarks
{
    [MemoryDiagnoser]
    [Orderer(SummaryOrderPolicy.FastestToSlowest)]
    [RankColumn]
    public class ImmutableHashSetSetEqualsBenchmark_Int
    {
        private ImmutableHashSet<int> _sourceSet = null!;
        private ImmutableHashSet<int> _immutableHashSetEqual = null!;
        private HashSet<int> _bclHashSetEqual = null!;
        private List<int> _listEqual = null!;
        private IEnumerable<int> _linqSelectEqual = null!;
        private int[] _arrayEqual = null!;
        private List<int> _listLastDiff = null!;
        private List<int> _listSmaller = null!;
        private ImmutableHashSet<int> _immutableLarger = null!;
        private int[] _smallerArray = null!;
        private HashSet<int> _smallerHashSetDiffComparer = null!;

        // Worst case: same count, last element different
        private ImmutableHashSet<int> _immutableHashSetLastDiff = null!;
        private HashSet<int> _bclHashSetLastDiff = null!;
        private List<int> _listWithDuplicates = null!;
        private List<int> _listWithDuplicatesMatch = null!;

        // Different comparers (fallback path)
        private HashSet<int> _bclHashSetDiffComparer = null!;

        // Count mismatch early exit
        private ImmutableHashSet<int> _immutableHashSetSmaller = null!;
        private HashSet<int> _bclHashSetSmaller = null!;

        // Lazy enumerable for worst case
        private IEnumerable<int> _lazyEnumerableLastDiff = null!;
       

        [Params(100000)]
        public int Size { get; set; }

        [GlobalSetup]
        public void Setup()
        {

            var elements = Enumerable.Range(0, Size).ToList();
            var elementsWithLastDiff = Enumerable.Range(0, Size - 1).Concat(new[] { Size + 1000 }).ToList();
            var smallerElements = Enumerable.Range(0, Size / 2).ToList();
            var duplicates = Enumerable.Repeat(1, Size).ToList();
            var smallerList = new List<int>();

            for(int i = 0; i < Size - 1; i++) smallerList.Add(i);

            _sourceSet = ImmutableHashSet.CreateRange(elements);
            _immutableHashSetEqual = ImmutableHashSet.CreateRange(elements);
            _bclHashSetEqual = new HashSet<int>(elements);
            _listEqual = elements;
            _linqSelectEqual = elements.Select(x => x); // Lazy LINQ enumerable
            _arrayEqual = elements.ToArray();

            _immutableHashSetLastDiff = ImmutableHashSet.CreateRange(elementsWithLastDiff);
            _bclHashSetLastDiff = new HashSet<int>(elementsWithLastDiff);
            _listLastDiff = elementsWithLastDiff;

            _bclHashSetDiffComparer = new HashSet<int>(elements, new ReverseComparer<int>());

            _immutableHashSetSmaller = ImmutableHashSet.CreateRange(smallerElements);
            _bclHashSetSmaller = new HashSet<int>(smallerElements);

            _lazyEnumerableLastDiff = elementsWithLastDiff.Select(x => x);
            _immutableLarger = ImmutableHashSet.CreateRange(elements.Concat(new[] { -1 }));
            _listWithDuplicates = duplicates;
            _listWithDuplicatesMatch = elements.Concat(elements).ToList(); // Matches source but with duplicates
           _listSmaller = smallerList;
           _smallerArray = Enumerable.Range(0, Size - 1).ToArray();
           _smallerHashSetDiffComparer = new HashSet<int>(_listSmaller, new ReverseComparer<int>());
        }

        #region Fast Path: Same Type and Comparer (Optimized)

        [Benchmark(Description = "ImmutableHashSet (Match - Same Comparer)")]
        public bool Case_ImmutableHashSet_Match() => _sourceSet.SetEquals(_immutableHashSetEqual);

        [Benchmark(Description = "BCL HashSet (Match - Same Comparer)")]
        public bool Case_BclHashSet_Match() => _sourceSet.SetEquals(_bclHashSetEqual);

        [Benchmark(Description = "ImmutableHashSet (Mismatch - Same Count)")]
        public bool Case_ImmutableHashSet_LastDiff() => _sourceSet.SetEquals(_immutableHashSetLastDiff);

        [Benchmark(Description = "Case 04: BCL HashSet (Mismatch - Same Count)")]
        public bool Case_BclHashSet_LastDiff() => _sourceSet.SetEquals(_bclHashSetLastDiff);

        #endregion

        #region Early Exit: Count Mismatch

        [Benchmark(Description = "ImmutableHashSet (Smaller Count)")]
        public bool Case_ImmutableHashSet_SmallerCount() => _sourceSet.SetEquals(_immutableHashSetSmaller);

        [Benchmark(Description = "BCL HashSet (Smaller Count)")]
        public bool Case_BclHashSet_SmallerCount() => _sourceSet.SetEquals(_bclHashSetSmaller);

        [Benchmark(Description = "Array (Smaller Count)")]
        public bool Case_SmallerCollection_EarlyExit() 
        {
            return _sourceSet.SetEquals(_smallerArray);
        }

        #endregion

        #region Fallback Path: Different Comparer

        [Benchmark(Description = "HashSet (Different Comparer)")]
        public bool Case_HashSet_DifferentComparer() => _sourceSet.SetEquals(_bclHashSetDiffComparer);

        [Benchmark(Description = "HashSet (Smaller Count - Different Comparer)")]
        public bool Case_HashSet_SmallerCount_DiffComparer() => _sourceSet.SetEquals(_smallerHashSetDiffComparer);

        #endregion

        #region Fallback Path: Non-Set Collections (IEnumerable/ICollection)

        [Benchmark(Description = "List (Match - Fallback)")]
        public bool Case_List_Match() => _sourceSet.SetEquals(_listEqual);

        [Benchmark(Description = "LINQ (Mismatch - Lazy IEnumerable)")]
        public bool Case_LazyEnumerable_LastDiff() => _sourceSet.SetEquals(_lazyEnumerableLastDiff);

        [Benchmark(Description = "LINQ (Match - Lazy IEnumerable)")]
        public bool Case_LazyEnumerable_Match() => _sourceSet.SetEquals(_linqSelectEqual);

        [Benchmark(Description = "List (Last Diff - Fallback)")]
        public bool Case_List_LastDiff() => _sourceSet.SetEquals(_listLastDiff);

        [Benchmark(Description = "Array (Match - Fallback)")]
        public bool Case_Array_Match() => _sourceSet.SetEquals(_arrayEqual);

        [Benchmark(Description = "ImmutableHashSet (Larger Count)")]
        public bool Case_LargerCount() => _sourceSet.SetEquals(_immutableLarger);

        #endregion

        #region Handling Duplicates (Fallback Path)

        [Benchmark(Description = "List with Duplicates (Mismatch)")]
        public bool Case_List_Duplicates_Mismatch() => _sourceSet.SetEquals(_listWithDuplicates);

        [Benchmark(Description = "List with Duplicates (Match)")]
        public bool Case_List_Duplicates_Match() => _sourceSet.SetEquals(_listWithDuplicatesMatch);

        #endregion
    }

    public class ReverseComparer<T> : IEqualityComparer<T> where T : IComparable<T>
    {
        public bool Equals(T? x, T? y)
        {
            if (x is null && y is null) return true;
            if (x is null || y is null) return false;
            return x.CompareTo(y) == 0;
        }

        public int GetHashCode(T? obj)
        {
            return obj?.GetHashCode() ?? 0;
        }
    }

    public class Program
    {
        public static void Main(string[] args)
        {
            BenchmarkRunner.Run<ImmutableHashSetSetEqualsBenchmark_Int>();
        }
    }
}
Click to expand Benchmark Results

Benchmark Results (Before Optimization)

Method Size Mean Error StdDev Rank Gen0 Gen1 Gen2 Allocated
'BCL HashSet (Smaller Count)' 100000 313.8 us 6.01 us 6.43 us 1 15.6250 15.6250 15.6250 818.33 KB
'Array (Smaller Count)' 100000 647.9 us 11.20 us 11.50 us 2 26.3672 26.3672 26.3672 1697.7 KB
'List with Duplicates (Mismatch)' 100000 954.1 us 18.77 us 41.60 us 3 31.2500 31.2500 31.2500 1697.77 KB
' HashSet (Smaller Count - Different Comparer)' 100000 1,449.3 us 28.65 us 74.46 us 4 41.0156 41.0156 41.0156 1697.8 KB
' ImmutableHashSet (Smaller Count)' 100000 4,733.2 us 74.18 us 69.39 us 5 23.4375 23.4375 23.4375 818.58 KB
' BCL HashSet (Match - Same Comparer)' 100000 7,084.0 us 65.02 us 57.64 us 6 54.6875 54.6875 54.6875 1697.9 KB
'Array (Match - Fallback)' 100000 7,821.7 us 30.71 us 27.23 us 7 46.8750 46.8750 46.8750 1697.86 KB
'List (Match - Fallback)' 100000 8,428.4 us 30.82 us 28.83 us 8 46.8750 46.8750 46.8750 1697.9 KB
'BCL HashSet (Mismatch - Same Count)' 100000 8,636.3 us 52.37 us 46.42 us 8 46.8750 46.8750 46.8750 1697.86 KB
'List (Last Diff - Fallback)' 100000 9,172.5 us 35.85 us 33.54 us 9 46.8750 46.8750 46.8750 1697.9 KB
'List with Duplicates (Match)' 100000 9,310.2 us 128.11 us 119.83 us 9 109.3750 109.3750 109.3750 3521.42 KB
' ImmutableHashSet (Larger Count)' 100000 9,477.3 us 141.55 us 125.48 us 9 46.8750 46.8750 46.8750 1697.89 KB
' HashSet (Different Comparer)' 100000 9,839.2 us 99.14 us 87.88 us 9 46.8750 46.8750 46.8750 1697.79 KB
'LINQ (Mismatch - Lazy IEnumerable)' 100000 11,274.4 us 63.77 us 56.53 us 10 296.8750 156.2500 156.2500 4717.23 KB
'LINQ (Match - Lazy IEnumerable)' 100000 11,341.5 us 69.37 us 61.49 us 10 296.8750 156.2500 156.2500 4717.23 KB
'ImmutableHashSet (Mismatch - Same Count)' 100000 17,015.5 us 170.03 us 150.73 us 11 31.2500 31.2500 31.2500 1697.88 KB
'ImmutableHashSet (Match - Same Comparer)' 100000 17,410.2 us 334.48 us 312.87 us 11 31.2500 31.2500 31.2500 1697.87 KB

Benchmark Results (After Optimization)

Method Size Mean Error StdDev Rank Gen0 Gen1 Gen2 Allocated
'Array (Smaller Count)' 100000 2.562 ns 0.0995 ns 0.1065 ns 1 - - - -
'BCL HashSet (Smaller Count)' 100000 2.636 ns 0.1022 ns 0.2244 ns 1 - - - -
'HashSet (Smaller Count - Different Comparer)' 100000 2.831 ns 0.0718 ns 0.0637 ns 1 - - - -
'ImmutableHashSet (Smaller Count)' 100000 2.896 ns 0.0870 ns 0.0772 ns 1 - - - -
'ImmutableHashSet (Larger Count)' 100000 3.202 ns 0.1087 ns 0.1594 ns 2 - - - -
'List with Duplicates (Mismatch)' 100000 895,443.977 ns 28,964.9382 ns 84,032.5440 ns 3 30.2734 30.2734 30.2734 1738479 B
'HashSet (Different Comparer)' 100000 5,120,325.720 ns 98,885.3882 ns 128,578.9473 ns 4 54.6875 54.6875 54.6875 1738688 B
'Case 04: BCL HashSet (Mismatch - Same Count)' 100000 6,416,323.272 ns 114,829.9280 ns 107,411.9860 ns 5 - - - -
'LINQ (Match - Lazy IEnumerable)' 100000 6,451,354.978 ns 142,492.3002 ns 415,656.7580 ns 5 281.2500 140.6250 140.6250 4830563 B
'BCL HashSet (Match - Same Comparer)' 100000 6,503,620.516 ns 96,707.7643 ns 85,728.9141 ns 5 - - - -
'LINQ (Mismatch - Lazy IEnumerable)' 100000 6,794,260.035 ns 162,094.6232 ns 470,265.9277 ns 5 281.2500 140.6250 140.6250 4830428 B
'Array (Match - Fallback)' 100000 6,945,468.034 ns 138,312.3473 ns 135,841.2191 ns 5 46.8750 46.8750 46.8750 1738610 B
'List (Match - Fallback)' 100000 7,564,243.026 ns 108,767.2646 ns 101,740.9668 ns 6 46.8750 46.8750 46.8750 1738651 B
'List (Last Diff - Fallback)' 100000 7,589,115.282 ns 112,952.1936 ns 105,655.5521 ns 6 46.8750 46.8750 46.8750 1738651 B
'List with Duplicates (Match)' 100000 8,200,727.556 ns 117,489.9765 ns 98,109.4516 ns 7 109.3750 109.3750 109.3750 3605934 B
'ImmutableHashSet (Match - Same Comparer)' 100000 14,234,561.197 ns 282,021.0558 ns 263,802.6707 ns 8 - - - -
'ImmutableHashSet (Mismatch - Same Count)' 100000 14,354,113.286 ns 146,975.3012 ns 122,731.0331 ns 8 - - - -

Performance Analysis Summary (100,000 Elements)

Case / Method Before (ns) After (ns) Speedup Ratio Memory Improvement
ImmutableHashSet (Larger) 9,477,300 3.202 ~2,959,806x Zero Alloc
ImmutableHashSet (Smaller) 4,733,200 2.896 ~1,634,392x Zero Alloc
HashSet (Smaller - Diff Comp) 1,449,300 2.831 ~511,939x Zero Alloc
Array (Smaller) 647,900 2.562 ~252,888x Zero Alloc
BCL HashSet (Smaller) 313,800 2.636 ~119,044x Zero Alloc
HashSet (Diff Comparer) 9,839,200 5,120,325 1.92x Stable (~1.7 MB)
LINQ (Match/Mismatch) 11,341,500 6,451,354 1.76x Stable (~4.8 MB)
BCL HashSet (Mismatch) 8,636,300 6,416,323 1.35x Zero Alloc
ImmutableHashSet (Match) 17,410,200 14,234,561 1.22x Zero Alloc
ImmutableHashSet (Mismatch) 17,015,500 14,354,113 1.19x Zero Alloc
List (Last Diff - Fallback) 9,172,500 7,589,115 1.21x Stable (~1.7 MB)
List (Match - Fallback) 8,428,400 7,564,243 1.11x Stable (~1.7 MB)
BCL HashSet (Match) 7,084,000 6,503,620 1.09x Zero Alloc
List (Duplicates - Match) 9,310,200 8,200,727 1.14x Stable (~3.6 MB)
Array (Match - Fallback) 7,821,700 6,945,468 1.13x Stable (~1.7 MB)
List (Duplicates - Mismatch) 954,100 895,443 1.07x Stable (~1.7 MB)

@dotnet-policy-service dotnet-policy-service Bot added the community-contribution Indicates that the PR has been added by a community member label Mar 30, 2026
@aw0lid aw0lid force-pushed the fix-immutablehashset-setequals-allocs branch from 9910d86 to ff6af74 Compare April 3, 2026 14:44
@aw0lid aw0lid force-pushed the fix-immutablehashset-setequals-allocs branch from ff6af74 to 5f2749e Compare April 3, 2026 19:27
@aw0lid aw0lid requested a review from stephentoub April 4, 2026 11:46
@aw0lid aw0lid force-pushed the fix-immutablehashset-setequals-allocs branch 3 times, most recently from 3c685c8 to 45c2c14 Compare April 8, 2026 23:05
@aw0lid aw0lid force-pushed the fix-immutablehashset-setequals-allocs branch from 45c2c14 to 6a3ebf6 Compare April 12, 2026 20:53
@aw0lid aw0lid force-pushed the fix-immutablehashset-setequals-allocs branch from 13cc045 to 1ab929a Compare April 12, 2026 21:23
@aw0lid
Copy link
Copy Markdown
Author

aw0lid commented Apr 13, 2026

Gentle ping in case this fell through the cracks
Happy to address any feedback or make adjustments if needed.

@aw0lid aw0lid force-pushed the fix-immutablehashset-setequals-allocs branch from 1ab929a to 6a2294d Compare April 13, 2026 18:43
@tannergooding
Copy link
Copy Markdown
Member

@dotnet/area-system-collections for secondary review

@aw0lid
Copy link
Copy Markdown
Author

aw0lid commented Apr 23, 2026

Gentle ping in case this fell through the cracks.
Ready for any review.


if (other is HashSet<T> otherAsHashSet)
{
if (otherAsHashSet.Comparer == origin.EqualityComparer)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HashSet<T> likewise has optimizations that kick in if the same equality comparer is used. There, it calls the Equals(object?) method of the equality comparer, so that it can detect that the comparers will give the same results even if they aren't exactly the same instance:

/// <summary>
/// Checks if equality comparers are equal. This is used for algorithms that can
/// speed up if it knows the other item has unique elements. I.e. if they're using
/// different equality comparers, then uniqueness assumption between sets break.
/// </summary>
internal static bool EqualityComparersAreEqual(HashSet<T> set1, HashSet<T> set2) => set1.Comparer.Equals(set2.Comparer);
/// <summary>
/// Checks if effective equality comparers are equal. This is used for algorithms that
/// require that both collections use identical hashing implementations for their entries.
/// </summary>
internal static bool EffectiveEqualityComparersAreEqual(HashSet<T> set1, HashSet<T> set2) => set1.EffectiveComparer.Equals(set2.EffectiveComparer);

Copy link
Copy Markdown
Author

@aw0lid aw0lid Apr 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have actually experimented with using origin.EqualityComparer.Equals(otherAsHashSet.Comparer) instead of direct reference equality == to cover cases of different comparer instances, but the results showed clear performance regressions in the Fast Paths.

Method Time Regression Slowdown
BCL HashSet (Smaller Count) 2.859 ns 7.764 ns +171.5%
HashSet (Diff Comparer - Small) 2.917 ns 7.740 ns +165.3%
ImmutableHashSet (Larger Count) 3.740 ns 4.914 ns +31.4%
ImmutableHashSet (Smaller Count) 3.983 ns 5.001 ns +25.5%

In my personal opinion, the Best Practice is to rely on EqualityComparer<T>.Default (which is a Singleton) or to unify comparer references when dealing with large datasets.

Therefore, I believe we should not sacrifice raw performance in these paths (a delay reaching 170%) just to cover cases resulting from the user not following optimal performance practices. Especially since Correctness is still fully guaranteed via the Fallback Path, but with a time penalty paid only by those who do not adhere to the Best Practice.

Copy link
Copy Markdown
Member

@eiriktsarpalis eiriktsarpalis left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is adding a whole lot of runtime type checks. Is there tangible evidence (e.g. in the form of microbenchmarks) showing improvement here (both when other is a set but more importantly when it is not)?.

@aw0lid
Copy link
Copy Markdown
Author

aw0lid commented Apr 24, 2026

This change is adding a whole lot of runtime type checks. Is there tangible evidence (e.g. in the form of microbenchmarks) showing improvement here (both when other is a set but more importantly when it is not)?.

As the benchmark results indicate, there is no performance regression even in the fallback paths. This demonstrates that the added runtime type checks do not impact performance, while providing massive gains in the optimized paths

@aw0lid aw0lid force-pushed the fix-immutablehashset-setequals-allocs branch from 13d12a2 to edeeb71 Compare April 26, 2026 14:06
@aw0lid aw0lid force-pushed the fix-immutablehashset-setequals-allocs branch from edeeb71 to d769373 Compare April 26, 2026 14:12
@eiriktsarpalis
Copy link
Copy Markdown
Member

As the benchmark results indicate, there is no performance regression even in the fallback paths. This demonstrates that the added runtime type checks do not impact performance, while providing massive gains in the optimized paths

What do the numbers show when comparing small (0-10 elements) or collections that are not equal?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

area-System.Collections community-contribution Indicates that the PR has been added by a community member

Projects

None yet

Development

Successfully merging this pull request may close these issues.

ImmutableHashSet<T>.SetEquals always creates a new HashSet<T>

6 participants