From 80cdc9c3f2052c185c09ba0ed813e9b7952cb9b8 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Tue, 24 Mar 2026 17:28:36 +0100 Subject: [PATCH 1/5] Redo GC heap size fix in heavily pinning scenarios This changes puts back the recently reverted change to fix regression of GC heap size with regions when there is heavy pinning. The original fix had a bug that resulted in a hang due to an infinite loop in the allocate_in_condemned_generations. This change also fixes that issue. The issue was caused by a plug with size 0x3FFFD0 (that was with 4MB regions). The problem was caused by the fact that an attempt to relocate that plug into a new plug could never succeed in that code, as at that point, the new region has always added padding at the front and that padding reduces the size so that such a large plug cannot fit into even completely empty region. The fix is to prevent adding the padding in case the plug is so large that it would not fit in with the padding, as the padding is only necessary for short plugs. --- src/coreclr/gc/allocation.cpp | 75 ++++++++++----- src/coreclr/gc/gc.cpp | 10 +- src/coreclr/gc/gcpriv.h | 19 +++- src/coreclr/gc/plan_phase.cpp | 168 +++++++++++++++++++++++----------- 4 files changed, 189 insertions(+), 83 deletions(-) diff --git a/src/coreclr/gc/allocation.cpp b/src/coreclr/gc/allocation.cpp index 1578c5227d727f..6fccdbf27a9f63 100644 --- a/src/coreclr/gc/allocation.cpp +++ b/src/coreclr/gc/allocation.cpp @@ -5473,6 +5473,55 @@ heap_segment* gc_heap::get_next_alloc_seg (generation* gen) #endif //USE_REGIONS } +bool gc_heap::decide_on_gen1_pin_promotion (float pin_frag_ratio, float pin_surv_ratio) +{ + return ((pin_frag_ratio > 0.15) && (pin_surv_ratio > 0.30)); +} + +// Add the size of the pinned plug to the higher generation's pinned allocations. +void gc_heap::attribute_pin_higher_gen_alloc ( +#ifdef USE_REGIONS + heap_segment* seg, int to_gen_number, +#endif + uint8_t* plug, size_t len) +{ + //find out which gen this pinned plug came from + int frgn = object_gennum (plug); + if ((frgn != (int)max_generation) && settings.promotion) + { + generation_pinned_allocation_sweep_size (generation_of (frgn + 1)) += len; + +#ifdef USE_REGIONS + // With regions it's a bit more complicated since we only set the plan_gen_num + // of a region after we've planned it. This means if the pinning plug is in the + // the same seg we are planning, we haven't set its plan_gen_num yet. So we + // need to check for that first. + int togn = (in_range_for_segment (plug, seg) ? to_gen_number : object_gennum_plan (plug)); +#else + int togn = object_gennum_plan (plug); +#endif //USE_REGIONS + if (frgn < togn) + { + generation_pinned_allocation_compact_size (generation_of (togn)) += len; + } + } +} + +#ifdef USE_REGIONS +void gc_heap::attribute_pin_higher_gen_alloc (int frgn, int togn, size_t len) +{ + if ((frgn != (int)max_generation) && settings.promotion) + { + generation_pinned_allocation_sweep_size (generation_of (frgn + 1)) += len; + + if (frgn < togn) + { + generation_pinned_allocation_compact_size (generation_of (togn)) += len; + } + } +} +#endif //USE_REGIONS + uint8_t* gc_heap::allocate_in_condemned_generations (generation* gen, size_t size, int from_gen_number, @@ -5560,28 +5609,12 @@ uint8_t* gc_heap::allocate_in_condemned_generations (generation* gen, generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen); generation_allocation_limit (gen) = heap_segment_plan_allocated (seg); set_allocator_next_pin (gen); - - //Add the size of the pinned plug to the right pinned allocations - //find out which gen this pinned plug came from - int frgn = object_gennum (plug); - if ((frgn != (int)max_generation) && settings.promotion) - { - generation_pinned_allocation_sweep_size (generation_of (frgn + 1)) += len; - + attribute_pin_higher_gen_alloc ( #ifdef USE_REGIONS - // With regions it's a bit more complicated since we only set the plan_gen_num - // of a region after we've planned it. This means if the pinning plug is in the - // the same seg we are planning, we haven't set its plan_gen_num yet. So we - // need to check for that first. - int togn = (in_range_for_segment (plug, seg) ? to_gen_number : object_gennum_plan (plug)); -#else - int togn = object_gennum_plan (plug); -#endif //USE_REGIONS - if (frgn < togn) - { - generation_pinned_allocation_compact_size (generation_of (togn)) += len; - } - } + seg, to_gen_number, +#endif + plug, len); + goto retry; } diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 5ef3e47096c877..ca84b2c8599de5 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -112,16 +112,14 @@ BOOL bgc_heap_walk_for_etw_p = FALSE; #define num_partial_refs 32 #endif //SERVER_GC +#define demotion_plug_len_th (6*1024*1024) + #ifdef USE_REGIONS -// If the pinned survived is 1+% of the region size, we don't demote. -#define demotion_pinned_ratio_th (1) // If the survived / region_size is 90+%, we don't compact this region. #define sip_surv_ratio_th (90) // If the survived due to cards from old generations / region_size is 90+%, // we don't compact this region, also we immediately promote it to gen2. #define sip_old_card_surv_ratio_th (90) -#else -#define demotion_plug_len_th (6*1024*1024) #endif //USE_REGIONS #ifdef HOST_64BIT @@ -2316,6 +2314,8 @@ BOOL gc_heap::last_gc_before_oom = FALSE; BOOL gc_heap::sufficient_gen0_space_p = FALSE; +BOOL gc_heap::decide_promote_gen1_pins_p = TRUE; + #ifdef BACKGROUND_GC uint8_t* gc_heap::background_saved_lowest_address = 0; uint8_t* gc_heap::background_saved_highest_address = 0; @@ -2367,8 +2367,6 @@ uint8_t* gc_heap::demotion_low; uint8_t* gc_heap::demotion_high; -BOOL gc_heap::demote_gen1_p = TRUE; - uint8_t* gc_heap::last_gen1_pin_end; #endif //!USE_REGIONS diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h index 8ec1f84c72fe5b..e0d3721cd9c070 100644 --- a/src/coreclr/gc/gcpriv.h +++ b/src/coreclr/gc/gcpriv.h @@ -1694,7 +1694,10 @@ class gc_heap PER_HEAP_METHOD void set_region_sweep_in_plan (heap_segment* region); PER_HEAP_METHOD void clear_region_sweep_in_plan (heap_segment* region); PER_HEAP_METHOD void clear_region_demoted (heap_segment* region); - PER_HEAP_METHOD void decide_on_demotion_pin_surv (heap_segment* region, int* no_pinned_surv_region_count); + PER_HEAP_METHOD void decide_on_demotion_pin_surv (heap_segment* region, + int* no_pinned_surv_region_count, + bool promote_gen1_pins_p, + bool large_pins_p); PER_HEAP_METHOD void skip_pins_in_alloc_region (generation* consing_gen, int plan_gen_num); PER_HEAP_METHOD void process_last_np_surv_region (generation* consing_gen, int current_plan_gen_num, @@ -2625,6 +2628,15 @@ class gc_heap #ifndef USE_REGIONS PER_HEAP_METHOD generation* ensure_ephemeral_heap_segment (generation* consing_gen); #endif //!USE_REGIONS + + PER_HEAP_ISOLATED_METHOD bool decide_on_gen1_pin_promotion (float pin_frag_ratio, float pin_surv_ratio); + + PER_HEAP_METHOD void attribute_pin_higher_gen_alloc ( +#ifdef USE_REGIONS + heap_segment* seg, int to_gen_number, +#endif + uint8_t* plug, size_t len); + PER_HEAP_METHOD uint8_t* allocate_in_condemned_generations (generation* gen, size_t size, int from_gen_number, @@ -2646,6 +2658,8 @@ class gc_heap PER_HEAP_METHOD size_t get_promoted_bytes(); #ifdef USE_REGIONS + PER_HEAP_METHOD void attribute_pin_higher_gen_alloc (int frgn, int togn, size_t len); + PER_HEAP_ISOLATED_METHOD void sync_promoted_bytes(); PER_HEAP_ISOLATED_METHOD void set_heap_for_contained_basic_regions (heap_segment* region, gc_heap* hp); @@ -3529,6 +3543,8 @@ class gc_heap // Set during a GC and checked by allocator after that GC PER_HEAP_FIELD_SINGLE_GC BOOL sufficient_gen0_space_p; + PER_HEAP_FIELD_SINGLE_GC BOOL decide_promote_gen1_pins_p; + PER_HEAP_FIELD_SINGLE_GC bool no_gc_oom_p; PER_HEAP_FIELD_SINGLE_GC heap_segment* saved_loh_segment_no_gc; @@ -3668,7 +3684,6 @@ class gc_heap PER_HEAP_FIELD_SINGLE_GC uint8_t* demotion_low; PER_HEAP_FIELD_SINGLE_GC uint8_t* demotion_high; - PER_HEAP_FIELD_SINGLE_GC BOOL demote_gen1_p; PER_HEAP_FIELD_SINGLE_GC uint8_t* last_gen1_pin_end; PER_HEAP_FIELD_SINGLE_GC BOOL ephemeral_promotion; diff --git a/src/coreclr/gc/plan_phase.cpp b/src/coreclr/gc/plan_phase.cpp index 517bd8e4f2fe89..32f2dc4d832808 100644 --- a/src/coreclr/gc/plan_phase.cpp +++ b/src/coreclr/gc/plan_phase.cpp @@ -1801,8 +1801,8 @@ void gc_heap::advance_pins_for_demotion (generation* gen) size_t total_space_to_skip = last_gen1_pin_end - generation_allocation_pointer (gen); float pin_frag_ratio = (float)gen1_pins_left / (float)total_space_to_skip; float pin_surv_ratio = (float)gen1_pins_left / (float)(dd_survived_size (dynamic_data_of (max_generation - 1))); - if ((pin_frag_ratio > 0.15) && (pin_surv_ratio > 0.30)) - { + bool actual_promote_gen1_pins_p = decide_on_gen1_pin_promotion (pin_frag_ratio, pin_surv_ratio); + if (actual_promote_gen1_pins_p) { while (!pinned_plug_que_empty_p() && (pinned_plug (oldest_pin()) < original_youngest_start)) { @@ -1815,19 +1815,7 @@ void gc_heap::advance_pins_for_demotion (generation* gen) generation_allocation_pointer (gen) = plug + len; generation_allocation_limit (gen) = heap_segment_plan_allocated (seg); set_allocator_next_pin (gen); - - //Add the size of the pinned plug to the right pinned allocations - //find out which gen this pinned plug came from - int frgn = object_gennum (plug); - if ((frgn != (int)max_generation) && settings.promotion) - { - int togn = object_gennum_plan (plug); - generation_pinned_allocation_sweep_size ((generation_of (frgn +1))) += len; - if (frgn < togn) - { - generation_pinned_allocation_compact_size (generation_of (togn)) += len; - } - } + attribute_pin_higher_gen_alloc (plug, len); dprintf (2, ("skipping gap %zu, pin %p (%zd)", pinned_len (pinned_plug_of (entry)), plug, len)); @@ -1956,7 +1944,7 @@ void gc_heap::process_ephemeral_boundaries (uint8_t* x, if (active_new_gen_number == (max_generation - 1)) { maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)); - if (!demote_gen1_p) + if (decide_promote_gen1_pins_p) advance_pins_for_demotion (consing_gen); } @@ -2499,6 +2487,7 @@ void gc_heap::record_interesting_data_point (interesting_data_point idp) void gc_heap::skip_pins_in_alloc_region (generation* consing_gen, int plan_gen_num) { heap_segment* alloc_region = generation_allocation_segment (consing_gen); + size_t skipped_pins_len = 0; while (!pinned_plug_que_empty_p()) { uint8_t* oldest_plug = pinned_plug (oldest_pin()); @@ -2510,6 +2499,7 @@ void gc_heap::skip_pins_in_alloc_region (generation* consing_gen, int plan_gen_n uint8_t* plug = pinned_plug (m); size_t len = pinned_len (m); + skipped_pins_len += len; set_new_pin_info (m, generation_allocation_pointer (consing_gen)); dprintf (REGIONS_LOG, ("pin %p b: %zx->%zx", plug, brick_of (plug), (size_t)(brick_table[brick_of (plug)]))); @@ -2528,37 +2518,44 @@ void gc_heap::skip_pins_in_alloc_region (generation* consing_gen, int plan_gen_n (heap_segment_swept_in_plan (alloc_region) ? "SIP" : "non SIP"), (heap_segment_swept_in_plan (alloc_region) ? heap_segment_plan_gen_num (alloc_region) : plan_gen_num))); + + attribute_pin_higher_gen_alloc (heap_segment_gen_num (alloc_region), plan_gen_num, skipped_pins_len); + set_region_plan_gen_num_sip (alloc_region, plan_gen_num); heap_segment_plan_allocated (alloc_region) = generation_allocation_pointer (consing_gen); } -void gc_heap::decide_on_demotion_pin_surv (heap_segment* region, int* no_pinned_surv_region_count) +void gc_heap::decide_on_demotion_pin_surv (heap_segment* region, int* no_pinned_surv_region_count, bool promote_gen1_pins_p, bool large_pins_p) { + int gen_num = heap_segment_gen_num (region); int new_gen_num = 0; int pinned_surv = heap_segment_pinned_survived (region); + int promote_pins_p = large_pins_p; if (pinned_surv == 0) { (*no_pinned_surv_region_count)++; - dprintf (REGIONS_LOG, ("region %Ix will be empty", heap_segment_mem (region))); + dprintf (REGIONS_LOG, ("h%d gen%d region %Ix will be empty", heap_number, heap_segment_gen_num (region), heap_segment_mem (region))); } - - // If this region doesn't have much pinned surv left, we demote it; otherwise the region - // will be promoted like normal. - size_t basic_region_size = (size_t)1 << min_segment_size_shr; - int pinned_ratio = (int)(((double)pinned_surv * 100.0) / (double)basic_region_size); - dprintf (REGIONS_LOG, ("h%d g%d region %Ix(%Ix) ps: %d (%d) (%s)", heap_number, - heap_segment_gen_num (region), (size_t)region, heap_segment_mem (region), pinned_surv, pinned_ratio, - ((pinned_ratio >= demotion_pinned_ratio_th) ? "ND" : "D"))); - - if (pinned_ratio >= demotion_pinned_ratio_th) + else { - if (settings.promotion) + if (!promote_pins_p && (gen_num == (max_generation - 1)) && promote_gen1_pins_p) + { + promote_pins_p = true; + } + + if (promote_pins_p) { new_gen_num = get_plan_gen_num (heap_segment_gen_num (region)); } + + attribute_pin_higher_gen_alloc (gen_num, new_gen_num, pinned_surv); } + dprintf (REGIONS_LOG, ("h%d gen%d region pinned surv %d %s -> g%d", + heap_number, gen_num, pinned_surv, (promote_pins_p ? "PROMOTE" : "DEMOTE"), new_gen_num)); + + set_region_plan_gen_num (region, new_gen_num); } @@ -2710,6 +2707,9 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c // Instead of checking for this condition we just set the alloc region to 0 so it's easier to check // later. + // + // set generation_allocation_segment to 0, we know we don't have pins so we will not be going through the while loop below + // generation_allocation_segment (consing_gen) = 0; generation_allocation_pointer (consing_gen) = 0; generation_allocation_limit (consing_gen) = 0; @@ -2720,13 +2720,12 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c // What has been planned doesn't change at this point. So at this point we know exactly which generation still doesn't // have any regions planned and this method is responsible to attempt to plan at least one region in each of those gens. // So we look at each of the remaining regions (that are non SIP, since SIP regions have already been planned) and decide - // which generation it should be planned in. We used the following rules to decide - + // which generation it should be planned in. // - // + if the pinned surv of a region is >= demotion_pinned_ratio_th (this will be dynamically tuned based on memory load), - // it will be promoted to its normal planned generation unconditionally. + // + if we are in a gen1 GC due to cards, we will decide if we need to promote based on the same criteria as segments. And + // we never demote large pins to gen0. // - // + if the pinned surv is < demotion_pinned_ratio_th, we will always demote it to gen0. We will record how many regions - // have no survival at all - those will be empty and can be used to plan any non gen0 generation if needed. + // + we will record how many regions have no survival at all - those will be empty and can be used to plan any non gen0 generation if needed. // // Note! We could actually promote a region with non zero pinned survivors to whichever generation we'd like (eg, we could // promote a gen0 region to gen2). However it means we'd need to set cards on those objects because we will not have a chance @@ -2743,7 +2742,7 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c // + if we don't have enough in regions that will be empty, we'll need to ask for new regions and if we can't, we fall back // to the special sweep mode. // - dprintf (REGIONS_LOG, ("h%d regions in g2: %d, g1: %d, g0: %d, before processing remaining regions", + dprintf (REGIONS_LOG, ("h%d planned regions in g2: %d, g1: %d, g0: %d, before processing remaining regions", heap_number, planned_regions_per_gen[2], planned_regions_per_gen[1], planned_regions_per_gen[0])); dprintf (REGIONS_LOG, ("h%d g2: surv %Id(p: %Id, %.2f%%), g1: surv %Id(p: %Id, %.2f%%), g0: surv %Id(p: %Id, %.2f%%)", @@ -2757,11 +2756,69 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c int to_be_empty_regions = 0; + // If decide_promote_gen1_pins_p is true, We need to see if we should promote what's left in gen1 pins. We either promote + // or demote all that's left. As a future performance improvement, we could sort these regions by the amount of + // pinned survival and only promote the ones with excessive amounts of survival. + // + // First go through the remaining gen1 regions to see if we should demote the remaining pins + heap_segment* current_region = generation_allocation_segment (consing_gen); + bool actual_promote_gen1_pins_p = false; + + if (decide_promote_gen1_pins_p) + { + size_t gen1_pins_left = 0; + size_t total_space_to_skip = 0; + + while (current_region) + { + int gen_num = heap_segment_gen_num (current_region); + if (gen_num != 0) + { + assert (gen_num == (max_generation - 1)); + + if (!heap_segment_swept_in_plan (current_region)) + { + gen1_pins_left += heap_segment_pinned_survived (current_region); + total_space_to_skip += get_region_size (current_region); + } + } + else + { + break; + } + + current_region = heap_segment_next (current_region); + } + + float pin_frag_ratio = 0.0; + float pin_surv_ratio = 0.0; + + if (total_space_to_skip) + { + size_t gen1_surv = dd_survived_size (dynamic_data_of (max_generation - 1)); + if (gen1_surv) + { + pin_frag_ratio = (float)gen1_pins_left / (float)total_space_to_skip; + pin_surv_ratio = (float)gen1_pins_left / (float)gen1_surv; + actual_promote_gen1_pins_p = decide_on_gen1_pin_promotion (pin_frag_ratio, pin_surv_ratio); + } + } + +#ifdef SIMPLE_DPRINTF + dprintf (REGIONS_LOG, ("h%d ad_p_d: PL: %zd, SL: %zd, pfr: %.3f, psr: %.3f, prmoote gen1 %d. gen1_pins_left %Id, total surv %Id (p:%Id), total_space %Id", + heap_number, gen1_pins_left, total_space_to_skip, pin_frag_ratio, pin_surv_ratio, actual_promote_gen1_pins_p, gen1_pins_left, + dd_survived_size (dynamic_data_of (max_generation - 1)), dd_pinned_survived_size (dynamic_data_of (max_generation - 1)), total_space_to_skip)); +#endif + } + + maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)); + + bool large_pins_p = false; + while (!pinned_plug_que_empty_p()) { uint8_t* oldest_plug = pinned_plug (oldest_pin()); - // detect pinned block in segments without pins heap_segment* nseg = heap_segment_rw (generation_allocation_segment (consing_gen)); dprintf (3, ("h%d oldest pin: %p, consing alloc %p, ptr %p, limit %p", heap_number, oldest_plug, heap_segment_mem (nseg), @@ -2771,12 +2828,10 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c while ((oldest_plug < generation_allocation_pointer (consing_gen)) || (oldest_plug >= heap_segment_allocated (nseg))) { - assert ((oldest_plug < heap_segment_mem (nseg)) || - (oldest_plug > heap_segment_reserved (nseg))); - assert (generation_allocation_pointer (consing_gen)>= - heap_segment_mem (nseg)); - assert (generation_allocation_pointer (consing_gen)<= - heap_segment_committed (nseg)); + assert ((oldest_plug < heap_segment_mem (nseg)) || (oldest_plug > heap_segment_reserved (nseg))); + assert (generation_allocation_pointer (consing_gen)>= heap_segment_mem (nseg)); + assert (generation_allocation_pointer (consing_gen)<= heap_segment_committed (nseg)); + assert (!heap_segment_swept_in_plan (nseg)); dprintf (3, ("h%d PRR: in loop, seg %p pa %p -> alloc ptr %p, plan gen %d->%d", heap_number, heap_segment_mem (nseg), @@ -2785,10 +2840,8 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c heap_segment_plan_gen_num (nseg), current_plan_gen_num)); - assert (!heap_segment_swept_in_plan (nseg)); - heap_segment_plan_allocated (nseg) = generation_allocation_pointer (consing_gen); - decide_on_demotion_pin_surv (nseg, &to_be_empty_regions); + decide_on_demotion_pin_surv (nseg, &to_be_empty_regions, actual_promote_gen1_pins_p, large_pins_p); heap_segment* next_seg = heap_segment_next_non_sip (nseg); @@ -2801,6 +2854,7 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c assert (next_seg != 0); nseg = next_seg; + large_pins_p = false; generation_allocation_segment (consing_gen) = nseg; generation_allocation_pointer (consing_gen) = heap_segment_mem (nseg); @@ -2810,6 +2864,11 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c uint8_t* plug = pinned_plug (m); size_t len = pinned_len (m); + if (!large_pins_p) + { + large_pins_p = (len >= demotion_plug_len_th); + } + set_new_pin_info (m, generation_allocation_pointer (consing_gen)); size_t free_size = pinned_len (m); update_planned_gen0_free_space (free_size, plug); @@ -2822,7 +2881,7 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c generation_allocation_pointer (consing_gen); } - heap_segment* current_region = generation_allocation_segment (consing_gen); + current_region = generation_allocation_segment (consing_gen); if (special_sweep_p) { @@ -2841,7 +2900,7 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c if (current_region) { - decide_on_demotion_pin_surv (current_region, &to_be_empty_regions); + decide_on_demotion_pin_surv (current_region, &to_be_empty_regions, actual_promote_gen1_pins_p, large_pins_p); if (!heap_segment_swept_in_plan (current_region)) { @@ -3552,6 +3611,12 @@ void gc_heap::plan_phase (int condemned_gen_number) dprintf(3,( " From %zx to %zx", (size_t)x, (size_t)end)); + // Normally we always demote pins left after plan allocation, but if we are doing a gen1 only because of cards, it means + // we need to decide if we will promote these pins from gen1. + decide_promote_gen1_pins_p = (settings.promotion && + (settings.condemned_generation == (max_generation - 1)) && + gen_to_condemn_reasons.is_only_condition(gen_low_card_p)); + #ifdef USE_REGIONS if (should_sweep_in_plan (seg1)) { @@ -3561,11 +3626,6 @@ void gc_heap::plan_phase (int condemned_gen_number) #else demotion_low = MAX_PTR; demotion_high = heap_segment_allocated (ephemeral_heap_segment); - // If we are doing a gen1 only because of cards, it means we should not demote any pinned plugs - // from gen1. They should get promoted to gen2. - demote_gen1_p = !(settings.promotion && - (settings.condemned_generation == (max_generation - 1)) && - gen_to_condemn_reasons.is_only_condition(gen_low_card_p)); total_ephemeral_size = 0; #endif //!USE_REGIONS @@ -4046,7 +4106,7 @@ void gc_heap::plan_phase (int condemned_gen_number) dd_artificial_pinned_survived_size (dd_active_old) += artificial_pinned_size; #ifndef USE_REGIONS - if (!demote_gen1_p && (active_old_gen_number == (max_generation - 1))) + if (decide_promote_gen1_pins_p && (active_old_gen_number == (max_generation - 1))) { last_gen1_pin_end = plug_end; } @@ -4143,7 +4203,7 @@ void gc_heap::plan_phase (int condemned_gen_number) if (active_new_gen_number == (max_generation - 1)) { maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)); - if (!demote_gen1_p) + if (decide_promote_gen1_pins_p) advance_pins_for_demotion (consing_gen); } From 3719fce75780626a08262cd7f3309216fc2a30f5 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Tue, 24 Mar 2026 17:52:33 +0100 Subject: [PATCH 2/5] Fix the bug --- src/coreclr/gc/allocation.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/coreclr/gc/allocation.cpp b/src/coreclr/gc/allocation.cpp index 6fccdbf27a9f63..0d47584a88e1b6 100644 --- a/src/coreclr/gc/allocation.cpp +++ b/src/coreclr/gc/allocation.cpp @@ -5554,6 +5554,16 @@ uint8_t* gc_heap::allocate_in_condemned_generations (generation* gen, #ifdef SHORT_PLUGS int pad_in_front = ((old_loc != 0) && (to_gen_number != max_generation)) ? USE_PADDING_FRONT : 0; + + // A near-region-sized plug can't fit with front padding even in an empty region, so skip the padding. + // This is safe because front padding only exists to protect short plugs (shorter than sizeof(plug_and_gap)) + // from being overwritten by the plug_and_gap header during compaction — a plug this large is in no such danger. + if ((pad_in_front & USE_PADDING_FRONT) && + (size + Align (min_obj_size) > + ((size_t)1 << min_segment_size_shr) - sizeof (aligned_plug_and_gap))) + { + pad_in_front = 0; + } #else //SHORT_PLUGS int pad_in_front = 0; #endif //SHORT_PLUGS From 81d423be84a08b0fd1dd4dd1ff71975c88cdbe50 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Tue, 24 Mar 2026 19:17:52 +0100 Subject: [PATCH 3/5] Regression test --- .../Github/Runtime_126043/Runtime_126043.cs | 121 ++++++++++++++++++ .../Runtime_126043/Runtime_126043.csproj | 26 ++++ 2 files changed, 147 insertions(+) create mode 100644 src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.cs create mode 100644 src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.csproj diff --git a/src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.cs b/src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.cs new file mode 100644 index 00000000000000..d5ae37e57e1076 --- /dev/null +++ b/src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.cs @@ -0,0 +1,121 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#pragma warning disable CA1825 // Avoid zero-length array allocations (needed for exact region packing) + +// Repro for Server GC hang: infinite loop in gc_heap::allocate_in_condemned_generations +// +// Requires: +// - Server GC (DOTNET_gcServer=1, DOTNET_GCHeapCount=1) +// - 64-bit process +// +// Mechanism: +// 1. Allocate byte[8208] objects (8,224 bytes each = exactly one alloc quantum) +// 2. Compacting gen2 GCs pack objects contiguously (eliminate quantum gaps) +// 3. Pin all objects; gen2 compact -> pinned_surv ~= 4.19MB < 6MB -> demote to gen0 +// 4. Free pins -> one big non-pinned plug = 510 * 8224 = 4,194,240 bytes +// 5. Gen1 compact: plug + 48B SHORT_PLUGS padding = 4,194,288 > 4,194,264 -> HANG + +using System; +using System.Runtime; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using TestLibrary; +using Xunit; + +public class Runtime_126043 +{ + // byte[8200] = 8,224 bytes on heap (24B header + 8200B data). + // 510 per 4MB region = 4,194,240 bytes + 24B gap = 4,194,264 (usable). + // To trigger the bug, we need plug > 4,194,240 (usable - front_pad). + // Interleave one byte[0] (24 bytes) per 510 byte[8200] to fill the gap: + // 510 * 8,224 + 1 * 24 = 4,194,264 = full region. plug = 4,194,264. + // plug + 24B front_pad = 4,194,288 > 4,194,264 -> HANG! + // + // 11,220 objects = 22 groups of (510+1), ensures LOH arrays (>85KB). + private const int ArrayDataLength = 8200; + private const int ObjectsPerGroup = 511; // 510 large + 1 small per region + private const int GroupCount = 22; + private const int ObjectCount = ObjectsPerGroup * GroupCount; // 11,242 + + [Fact] + public static void TestEntryPoint() + { + Console.WriteLine($"Server GC: {GCSettings.IsServerGC} 64-bit: {Environment.Is64BitProcess}"); + if (!GCSettings.IsServerGC || !Environment.Is64BitProcess) + { + throw new Exception("ERROR: Requires server GC and 64-bit process."); + } + + Console.WriteLine($"Allocating {ObjectCount} x byte[{ArrayDataLength}] objects (~{ObjectCount * 8224 / 1024 / 1024}MB)..."); + RunHangScenario(); + + Console.WriteLine("Completed without hang."); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void RunHangScenario() + { + // Measure actual object size on heap + long before = GC.GetAllocatedBytesForCurrentThread(); + byte[] probe = new byte[ArrayDataLength]; + long after = GC.GetAllocatedBytesForCurrentThread(); + Console.WriteLine($" Actual heap size of byte[{ArrayDataLength}]: {after - before} bytes"); + GC.KeepAlive(probe); + + // Phase 1: Clean slate. + GC.Collect(2, GCCollectionMode.Forced, blocking: true, compacting: true); + + // Phase 2: Allocate objects. byte[8208] = 8,224 bytes = one full quantum. + byte[][] live = AllocateObjects(ObjectCount); + Console.WriteLine($" Allocated, gen={GC.GetGeneration(live[0])}"); + + // Phase 3: Promote to gen2 and compact (removes quantum gaps). + for (int i = 0; i < 3; i++) + GC.Collect(2, GCCollectionMode.Forced, blocking: true, compacting: true); + Console.WriteLine($" After compaction: gen={GC.GetGeneration(live[0])}"); + + // Phase 4: Pin ALL objects. + GCHandle[] pins = new GCHandle[ObjectCount]; + for (int i = 0; i < ObjectCount; i++) + pins[i] = GCHandle.Alloc(live[i], GCHandleType.Pinned); + Console.WriteLine($" Pinned {ObjectCount} objects"); + + // Phase 5: Burn through demotion delay (hex 14 = 20 GC cycles). + for (int i = 0; i < 60; i++) + GC.Collect(0, GCCollectionMode.Forced, blocking: true); + + // Phase 6: Gen2 compact with demotion. + Console.WriteLine(" Phase 6: gen2 compact (demotion)..."); + Console.Out.Flush(); + GC.Collect(2, GCCollectionMode.Forced, blocking: true, compacting: true); + Console.WriteLine($" After demotion: gen={GC.GetGeneration(live[0])}"); + + // Phase 7: Free ALL pins. + for (int i = 0; i < pins.Length; i++) + pins[i].Free(); + Console.WriteLine(" All pins freed."); + + // Phase 8: Gen1 compact. Non-pinned plug 4,194,264 + 24B front_pad > 4,194,264 -> HANG. + Console.WriteLine(" Phase 8: gen1 compact (may HANG)..."); + Console.Out.Flush(); + GC.Collect(1, GCCollectionMode.Forced, blocking: true, compacting: true); + + Console.WriteLine(" Survived gen1 GC!"); + GC.KeepAlive(live); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static byte[][] AllocateObjects(int count) + { + byte[][] arr = new byte[count][]; + for (int i = 0; i < count; i++) + { + // Every 511th object is byte[0] (24 bytes) to fill the 24-byte gap + // at the end of each 4MB region. Other objects are byte[8200] (8,224 bytes). + arr[i] = ((i + 1) % ObjectsPerGroup == 0) ? new byte[0] : new byte[ArrayDataLength]; + } + + return arr; + } +} diff --git a/src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.csproj b/src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.csproj new file mode 100644 index 00000000000000..7bb305595eae2b --- /dev/null +++ b/src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.csproj @@ -0,0 +1,26 @@ + + + + true + true + True + 1 + + + + + + + + + + + From ead14dd661737ed62c5d59c8c96325eacfb7be68 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Tue, 24 Mar 2026 19:24:41 +0100 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Jan Vorlicek --- src/coreclr/gc/plan_phase.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/gc/plan_phase.cpp b/src/coreclr/gc/plan_phase.cpp index 32f2dc4d832808..eee724dad0892d 100644 --- a/src/coreclr/gc/plan_phase.cpp +++ b/src/coreclr/gc/plan_phase.cpp @@ -1802,7 +1802,8 @@ void gc_heap::advance_pins_for_demotion (generation* gen) float pin_frag_ratio = (float)gen1_pins_left / (float)total_space_to_skip; float pin_surv_ratio = (float)gen1_pins_left / (float)(dd_survived_size (dynamic_data_of (max_generation - 1))); bool actual_promote_gen1_pins_p = decide_on_gen1_pin_promotion (pin_frag_ratio, pin_surv_ratio); - if (actual_promote_gen1_pins_p) { + if (actual_promote_gen1_pins_p) + { while (!pinned_plug_que_empty_p() && (pinned_plug (oldest_pin()) < original_youngest_start)) { @@ -2530,7 +2531,7 @@ void gc_heap::decide_on_demotion_pin_surv (heap_segment* region, int* no_pinned_ int gen_num = heap_segment_gen_num (region); int new_gen_num = 0; int pinned_surv = heap_segment_pinned_survived (region); - int promote_pins_p = large_pins_p; + bool promote_pins_p = large_pins_p; if (pinned_surv == 0) { @@ -2709,7 +2710,6 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c // later. // // set generation_allocation_segment to 0, we know we don't have pins so we will not be going through the while loop below - // generation_allocation_segment (consing_gen) = 0; generation_allocation_pointer (consing_gen) = 0; generation_allocation_limit (consing_gen) = 0; @@ -2805,7 +2805,7 @@ void gc_heap::process_remaining_regions (int current_plan_gen_num, generation* c } #ifdef SIMPLE_DPRINTF - dprintf (REGIONS_LOG, ("h%d ad_p_d: PL: %zd, SL: %zd, pfr: %.3f, psr: %.3f, prmoote gen1 %d. gen1_pins_left %Id, total surv %Id (p:%Id), total_space %Id", + dprintf (REGIONS_LOG, ("h%d ad_p_d: PL: %zd, SL: %zd, pfr: %.3f, psr: %.3f, promote gen1 %d. gen1_pins_left %Id, total surv %Id (p:%Id), total_space %Id", heap_number, gen1_pins_left, total_space_to_skip, pin_frag_ratio, pin_surv_ratio, actual_promote_gen1_pins_p, gen1_pins_left, dd_survived_size (dynamic_data_of (max_generation - 1)), dd_pinned_survived_size (dynamic_data_of (max_generation - 1)), total_space_to_skip)); #endif From 78fa1266ba66bf9e2c68c04794f6c09a35540164 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Thu, 26 Mar 2026 10:24:27 +0100 Subject: [PATCH 5/5] PR feedback * Comments * Add explicit GC region size setting to the test --- src/coreclr/gc/allocation.cpp | 2 +- .../Github/Runtime_126043/Runtime_126043.cs | 13 +++++++------ .../Github/Runtime_126043/Runtime_126043.csproj | 2 ++ 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/coreclr/gc/allocation.cpp b/src/coreclr/gc/allocation.cpp index 0d47584a88e1b6..947f2330de5a57 100644 --- a/src/coreclr/gc/allocation.cpp +++ b/src/coreclr/gc/allocation.cpp @@ -5493,7 +5493,7 @@ void gc_heap::attribute_pin_higher_gen_alloc ( #ifdef USE_REGIONS // With regions it's a bit more complicated since we only set the plan_gen_num - // of a region after we've planned it. This means if the pinning plug is in the + // of a region after we've planned it. This means if the pinning plug is in // the same seg we are planning, we haven't set its plan_gen_num yet. So we // need to check for that first. int togn = (in_range_for_segment (plug, seg) ? to_gen_number : object_gennum_plan (plug)); diff --git a/src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.cs b/src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.cs index d5ae37e57e1076..6a78d2fcc1d881 100644 --- a/src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.cs +++ b/src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.cs @@ -10,11 +10,12 @@ // - 64-bit process // // Mechanism: -// 1. Allocate byte[8208] objects (8,224 bytes each = exactly one alloc quantum) +// 1. Allocate byte[8200] objects (8,224 bytes each) interleaved with byte[0] (24 bytes) +// to fill each 4MB region exactly: 510 * 8,224 + 1 * 24 = 4,194,264 bytes // 2. Compacting gen2 GCs pack objects contiguously (eliminate quantum gaps) // 3. Pin all objects; gen2 compact -> pinned_surv ~= 4.19MB < 6MB -> demote to gen0 -// 4. Free pins -> one big non-pinned plug = 510 * 8224 = 4,194,240 bytes -// 5. Gen1 compact: plug + 48B SHORT_PLUGS padding = 4,194,288 > 4,194,264 -> HANG +// 4. Free pins -> one big non-pinned plug = 4,194,264 bytes (full region) +// 5. Gen1 compact: plug + 24B SHORT_PLUGS front padding = 4,194,288 > 4,194,264 -> HANG using System; using System.Runtime; @@ -32,7 +33,7 @@ public class Runtime_126043 // 510 * 8,224 + 1 * 24 = 4,194,264 = full region. plug = 4,194,264. // plug + 24B front_pad = 4,194,288 > 4,194,264 -> HANG! // - // 11,220 objects = 22 groups of (510+1), ensures LOH arrays (>85KB). + // 11,242 objects = 22 groups of (510+1), ensures LOH arrays (>85KB). private const int ArrayDataLength = 8200; private const int ObjectsPerGroup = 511; // 510 large + 1 small per region private const int GroupCount = 22; @@ -66,7 +67,7 @@ private static void RunHangScenario() // Phase 1: Clean slate. GC.Collect(2, GCCollectionMode.Forced, blocking: true, compacting: true); - // Phase 2: Allocate objects. byte[8208] = 8,224 bytes = one full quantum. + // Phase 2: Allocate objects. byte[8200] = 8,224 bytes on heap, interleaved with byte[0]. byte[][] live = AllocateObjects(ObjectCount); Console.WriteLine($" Allocated, gen={GC.GetGeneration(live[0])}"); @@ -81,7 +82,7 @@ private static void RunHangScenario() pins[i] = GCHandle.Alloc(live[i], GCHandleType.Pinned); Console.WriteLine($" Pinned {ObjectCount} objects"); - // Phase 5: Burn through demotion delay (hex 14 = 20 GC cycles). + // Phase 5: Burn through GC cycles so demotion kicks in. for (int i = 0; i < 60; i++) GC.Collect(0, GCCollectionMode.Forced, blocking: true); diff --git a/src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.csproj b/src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.csproj index 7bb305595eae2b..d7912e15f3cf18 100644 --- a/src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.csproj +++ b/src/tests/GC/Regressions/Github/Runtime_126043/Runtime_126043.csproj @@ -10,11 +10,13 @@ $(CLRTestBatchPreCommands) set DOTNET_gcServer=1 set DOTNET_GCHeapCount=1 + set DOTNET_GCRegionSize=400000 ]]>