diff --git a/src/coreclr/src/gc/gc.cpp b/src/coreclr/src/gc/gc.cpp index abbabb66af40ff..44020b3b74a155 100644 --- a/src/coreclr/src/gc/gc.cpp +++ b/src/coreclr/src/gc/gc.cpp @@ -2409,7 +2409,10 @@ static static_data static_data_table[latency_level_last - latency_level_first + // gen2 {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, 100000, 100}, // loh - {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0} + {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0}, + // poh + // TODO: tuning https://github.com/dotnet/runtime/issues/13739 + {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0}, }, // latency_level_balanced @@ -2427,6 +2430,9 @@ static static_data static_data_table[latency_level_last - latency_level_first + // gen2 {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, 100000, 100}, // loh + {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0}, + // poh + // TODO: tuning https://github.com/dotnet/runtime/issues/13739 {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0} }, }; @@ -2758,6 +2764,8 @@ size_t gc_heap::bgc_overflow_count = 0; size_t gc_heap::bgc_begin_loh_size = 0; size_t gc_heap::end_loh_size = 0; +size_t gc_heap::bgc_begin_poh_size = 0; +size_t gc_heap::end_poh_size = 0; #ifdef BGC_SERVO_TUNING uint64_t gc_heap::loh_a_no_bgc = 0; @@ -2773,6 +2781,8 @@ uint32_t gc_heap::bgc_alloc_spin_uoh = 0; size_t gc_heap::bgc_loh_size_increased = 0; +size_t gc_heap::bgc_poh_size_increased = 0; + size_t gc_heap::background_soh_alloc_count = 0; size_t gc_heap::background_uoh_alloc_count = 0; @@ -2957,6 +2967,7 @@ BOOL gc_heap::heap_analyze_enabled = FALSE; alloc_list gc_heap::loh_alloc_list [NUM_LOH_ALIST-1]; alloc_list gc_heap::gen2_alloc_list[NUM_GEN2_ALIST-1]; +alloc_list gc_heap::poh_alloc_list [NUM_POH_ALIST-1]; dynamic_data gc_heap::dynamic_data_table [total_generation_count]; gc_history_per_heap gc_heap::gc_data_per_heap; @@ -3002,6 +3013,7 @@ CFinalize* gc_heap::finalize_queue = 0; VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_soh; VOLATILE(bool) gc_heap::card_mark_done_soh; VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_loh; +VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_poh; VOLATILE(bool) gc_heap::card_mark_done_uoh; #endif // FEATURE_CARD_MARKING_STEALING @@ -4332,13 +4344,16 @@ typedef struct imemory_data *initial_memory; imemory_data *initial_normal_heap; // points into initial_memory_array imemory_data *initial_large_heap; // points into initial_memory_array + imemory_data *initial_pinned_heap; // points into initial_memory_array size_t block_size_normal; size_t block_size_large; + size_t block_size_pinned; int block_count; // # of blocks in each int current_block_normal; int current_block_large; + int current_block_pinned; enum { @@ -4355,6 +4370,7 @@ typedef struct { case 0: return block_size_normal; case 1: return block_size_large; + case 2: return block_size_pinned; default: __UNREACHABLE(); } }; @@ -4367,6 +4383,7 @@ typedef struct case soh_gen1: case soh_gen2: return initial_normal_heap[h_number].memory_base; case loh_generation: return initial_large_heap[h_number].memory_base; + case poh_generation: return initial_pinned_heap[h_number].memory_base; default: __UNREACHABLE(); } }; @@ -4379,6 +4396,7 @@ typedef struct case soh_gen1: case soh_gen2: return block_size_normal; case loh_generation: return block_size_large; + case poh_generation: return block_size_pinned; default: __UNREACHABLE(); } }; @@ -4387,15 +4405,15 @@ typedef struct initial_memory_details memory_details; -BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heaps, bool use_large_pages_p) +BOOL reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinned_size, int num_heaps, bool use_large_pages_p) { BOOL reserve_success = FALSE; // should only be called once assert (memory_details.initial_memory == 0); - // soh + loh segments * num_heaps - memory_details.initial_memory = new (nothrow) imemory_data[num_heaps * (total_generation_count - ephemeral_generation_count)]; + // soh + loh + poh segments * num_heaps + memory_details.initial_memory = new (nothrow) imemory_data[num_heaps * (total_generation_count - ephemeral_generation_count)]; if (memory_details.initial_memory == 0) { dprintf (2, ("failed to reserve %Id bytes for imemory_data", num_heaps * (total_generation_count - ephemeral_generation_count) * sizeof (imemory_data))); @@ -4404,13 +4422,16 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heap memory_details.initial_normal_heap = memory_details.initial_memory; memory_details.initial_large_heap = memory_details.initial_normal_heap + num_heaps; + memory_details.initial_pinned_heap = memory_details.initial_large_heap + num_heaps; memory_details.block_size_normal = normal_size; memory_details.block_size_large = large_size; + memory_details.block_size_pinned = pinned_size; memory_details.block_count = num_heaps; memory_details.current_block_normal = 0; memory_details.current_block_large = 0; + memory_details.current_block_pinned = 0; g_gc_lowest_address = MAX_PTR; g_gc_highest_address = 0; @@ -4422,13 +4443,13 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heap return FALSE; } - if (((size_t)MAX_PTR / memory_details.block_count) < (normal_size + large_size)) + if (((size_t)MAX_PTR / memory_details.block_count) < (normal_size + large_size + pinned_size)) { dprintf (2, ("(0x%Ix + 0x%Ix)*0x%Ix overflow", normal_size, large_size, memory_details.block_count)); return FALSE; } - size_t requestedMemory = memory_details.block_count * (normal_size + large_size); + size_t requestedMemory = memory_details.block_count * (normal_size + large_size + pinned_size); uint8_t* allatonce_block = (uint8_t*)virtual_alloc (requestedMemory, use_large_pages_p); if (allatonce_block) @@ -4443,27 +4464,32 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heap (i * normal_size); memory_details.initial_large_heap[i].memory_base = allatonce_block + (memory_details.block_count * normal_size) + (i * large_size); + memory_details.initial_pinned_heap[i].memory_base = allatonce_block + + (memory_details.block_count * (normal_size + large_size)) + (i * pinned_size); reserve_success = TRUE; } } else { - // try to allocate 2 blocks + // try to allocate 3 blocks uint8_t* b1 = (uint8_t*)virtual_alloc (memory_details.block_count * normal_size, use_large_pages_p); uint8_t* b2 = (uint8_t*)virtual_alloc (memory_details.block_count * large_size, use_large_pages_p); + uint8_t* b3 = (uint8_t*)virtual_alloc (memory_details.block_count * pinned_size, use_large_pages_p); - if (b1 && b2) + if (b1 && b2 && b3) { memory_details.allocation_pattern = initial_memory_details::EACH_GENERATION; - g_gc_lowest_address = min (b1, b2); + g_gc_lowest_address = min (b1, min(b2, b3)); g_gc_highest_address = max (b1 + memory_details.block_count * normal_size, - b2 + memory_details.block_count * large_size); + max (b2 + memory_details.block_count * large_size, + b3 + memory_details.block_count * pinned_size)); for (int i = 0; i < memory_details.block_count; i++) { memory_details.initial_normal_heap[i].memory_base = b1 + (i * normal_size); memory_details.initial_large_heap[i].memory_base = b2 + (i * large_size); + memory_details.initial_pinned_heap[i].memory_base = b3 + (i * pinned_size); } reserve_success = TRUE; @@ -4476,6 +4502,8 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heap virtual_free (b1, memory_details.block_count * normal_size); if (b2) virtual_free (b2, memory_details.block_count * large_size); + if (b3) + virtual_free (b3, memory_details.block_count * pinned_size); } if ((b2 == NULL) && (memory_details.block_count > 1)) @@ -4533,12 +4561,15 @@ void destroy_initial_memory() virtual_free (memory_details.initial_large_heap[0].memory_base, memory_details.block_count*memory_details.block_size_large); + + virtual_free (memory_details.initial_pinned_heap[0].memory_base, + memory_details.block_count*memory_details.block_size_pinned); } else { assert (memory_details.allocation_pattern == initial_memory_details::EACH_BLOCK); imemory_data *current_block = memory_details.initial_memory; - for(int i = 0; i < (memory_details.block_count*2); i++, current_block++) + for (int i = 0; i < (memory_details.block_count*(total_generation_count - ephemeral_generation_count)); i++, current_block++) { size_t block_size = memory_details.block_size (i); if (current_block->memory_base != NULL) @@ -4552,6 +4583,7 @@ void destroy_initial_memory() memory_details.initial_memory = NULL; memory_details.initial_normal_heap = NULL; memory_details.initial_large_heap = NULL; + memory_details.initial_pinned_heap = NULL; } } @@ -5012,7 +5044,9 @@ heap_segment* gc_heap::get_segment_for_uoh (int gen_number, size_t size #ifdef MULTIPLE_HEAPS heap_segment_heap (res) = hp; #endif //MULTIPLE_HEAPS - res->flags |= heap_segment_flags_loh; + res->flags |= gen_number == poh_generation ? + heap_segment_flags_poh : + heap_segment_flags_loh; FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(res), (size_t)(heap_segment_reserved (res) - heap_segment_mem(res)), gc_etw_segment_large_object_heap); @@ -6420,7 +6454,7 @@ void gc_heap::fix_uoh_allocation_area (BOOL for_gc_p) { UNREFERENCED_PARAMETER(for_gc_p); - for(int i = uoh_start_generation; i < total_generation_count; i++) + for (int i = uoh_start_generation; i < total_generation_count; i++) { #ifdef _DEBUG alloc_context* acontext = @@ -8201,7 +8235,7 @@ void gc_heap::copy_brick_card_range (uint8_t* la, uint32_t* old_card_table, // We don't need to go through all the card tables here because // we only need to copy from the GC version of the mark array - when we - // mark (even in allocate_large_object) we always use that mark array. + // mark (even in allocate_uoh_object) we always use that mark array. if ((card_table_highest_address (old_ct) >= start) && (card_table_lowest_address (old_ct) <= end)) { @@ -10374,7 +10408,8 @@ size_t gc_heap::get_segment_size_hard_limit (uint32_t* num_heaps, bool should_ad } HRESULT gc_heap::initialize_gc (size_t soh_segment_size, - size_t loh_segment_size + size_t loh_segment_size, + size_t poh_segment_size #ifdef MULTIPLE_HEAPS ,int number_of_heaps #endif //MULTIPLE_HEAPS @@ -10494,7 +10529,7 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, #endif //BACKGROUND_GC reserved_memory = 0; - size_t initial_heap_size = soh_segment_size + loh_segment_size; + size_t initial_heap_size = soh_segment_size + loh_segment_size + poh_segment_size; #ifdef MULTIPLE_HEAPS reserved_memory_limit = initial_heap_size * number_of_heaps; #else //MULTIPLE_HEAPS @@ -10507,7 +10542,7 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, check_commit_cs.Initialize(); } - if (!reserve_initial_memory (soh_segment_size, loh_segment_size, number_of_heaps, use_large_pages_p)) + if (!reserve_initial_memory (soh_segment_size, loh_segment_size, poh_segment_size, number_of_heaps, use_large_pages_p)) return E_OUTOFMEMORY; #ifdef CARD_BUNDLE @@ -11173,7 +11208,7 @@ gc_heap::init_gc_heap (int h_number) #endif //!SEG_MAPPING_TABLE - // Create segments for the large generation + // Create segments for the large and pinned generations heap_segment* lseg = make_initial_segment(loh_generation, h_number); if (!lseg) return 0; @@ -11184,19 +11219,36 @@ gc_heap::init_gc_heap (int h_number) (size_t)(heap_segment_reserved (lseg) - heap_segment_mem(lseg)), gc_etw_segment_large_object_heap); + heap_segment* pseg = make_initial_segment(poh_generation, h_number); + if (!pseg) + return 0; + + pseg->flags |= heap_segment_flags_poh; + + FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(pseg), + (size_t)(heap_segment_reserved (pseg) - heap_segment_mem(pseg)), + gc_etw_segment_pinned_object_heap); + #ifdef SEG_MAPPING_TABLE seg_mapping_table_add_segment (lseg, __this); + seg_mapping_table_add_segment (pseg, __this); #else //SEG_MAPPING_TABLE seg_table->insert ((uint8_t*)lseg, sdelta); + seg_table->insert ((uint8_t*)pseg, sdelta); #endif //SEG_MAPPING_TABLE make_generation (loh_generation, lseg, heap_segment_mem (lseg), 0); + make_generation (poh_generation, pseg, heap_segment_mem (pseg), 0); heap_segment_allocated (lseg) = heap_segment_mem (lseg) + Align (min_obj_size, get_alignment_constant (FALSE)); heap_segment_used (lseg) = heap_segment_allocated (lseg) - plug_skew; + heap_segment_allocated (pseg) = heap_segment_mem (pseg) + Align (min_obj_size, get_alignment_constant (FALSE)); + heap_segment_used (pseg) = heap_segment_allocated (pseg) - plug_skew; + generation_of (max_generation)->free_list_allocator = allocator(NUM_GEN2_ALIST, BASE_GEN2_ALIST, gen2_alloc_list); generation_of (loh_generation)->free_list_allocator = allocator(NUM_LOH_ALIST, BASE_LOH_ALIST, loh_alloc_list); + generation_of (poh_generation)->free_list_allocator = allocator(NUM_POH_ALIST, BASE_POH_ALIST, poh_alloc_list); for (int gen_num = 0; gen_num < total_generation_count; gen_num++) { @@ -11206,10 +11258,12 @@ gc_heap::init_gc_heap (int h_number) #ifdef MULTIPLE_HEAPS heap_segment_heap (lseg) = this; + heap_segment_heap (pseg) = this; //initialize the alloc context heap generation_alloc_context (generation_of (soh_gen0))->set_alloc_heap(vm_heap); generation_alloc_context (generation_of (loh_generation))->set_alloc_heap(vm_heap); + generation_alloc_context (generation_of (poh_generation))->set_alloc_heap(vm_heap); #endif //MULTIPLE_HEAPS @@ -11379,6 +11433,7 @@ gc_heap::init_gc_heap (int h_number) background_uoh_alloc_count = 0; bgc_overflow_count = 0; end_loh_size = dd_min_size (dynamic_data_of (loh_generation)); + end_poh_size = dd_min_size (dynamic_data_of (poh_generation)); #endif //BACKGROUND_GC #ifdef GC_CONFIG_DRIVEN @@ -13438,6 +13493,15 @@ int gc_heap::bgc_loh_allocate_spin() return bgc_allocate_spin(min_gc_size, bgc_begin_size, bgc_size_increased, end_size); } +int gc_heap::bgc_poh_allocate_spin() +{ + size_t min_gc_size = dd_min_size (dynamic_data_of (poh_generation)); + size_t bgc_begin_size = bgc_begin_poh_size; + size_t bgc_size_increased = bgc_poh_size_increased; + size_t end_size = end_poh_size; + + return bgc_allocate_spin(min_gc_size, bgc_begin_size, bgc_size_increased, end_size); +} #endif //BACKGROUND_GC size_t gc_heap::get_uoh_seg_size (size_t size) @@ -13693,7 +13757,9 @@ allocation_state gc_heap::allocate_uoh (int gen_number, } #endif //BGC_SERVO_TUNING - int spin_for_allocation = bgc_loh_allocate_spin(); + int spin_for_allocation = (gen_number == loh_generation) ? + bgc_loh_allocate_spin() : + bgc_poh_allocate_spin(); if (spin_for_allocation >= 0) { @@ -16408,7 +16474,8 @@ int gc_heap::generation_to_condemn (int n_initial, for (int i = 0; i < n_heaps; i++) { if (((g_heaps[i]->current_generation_size (max_generation)) > bgc_min_per_heap) || - ((g_heaps[i]->current_generation_size (loh_generation)) > bgc_min_per_heap)) + ((g_heaps[i]->current_generation_size (loh_generation)) > bgc_min_per_heap) || + ((g_heaps[i]->current_generation_size (poh_generation)) > bgc_min_per_heap)) { bgc_heap_too_small = FALSE; break; @@ -16416,7 +16483,8 @@ int gc_heap::generation_to_condemn (int n_initial, } #else //MULTIPLE_HEAPS if ((current_generation_size (max_generation) > bgc_min_per_heap) || - (current_generation_size (loh_generation) > bgc_min_per_heap)) + (current_generation_size (loh_generation) > bgc_min_per_heap) || + (current_generation_size (poh_generation) > bgc_min_per_heap)) { bgc_heap_too_small = FALSE; } @@ -17647,6 +17715,7 @@ void gc_heap::update_collection_counts () if (i == max_generation) { dd_collection_count (dynamic_data_of (loh_generation))++; + dd_collection_count(dynamic_data_of(poh_generation))++; } dd_gc_clock (dd) = dd_gc_clock (dd0); @@ -21146,9 +21215,12 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) #endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING { dprintf (3, ("Marking cross generation pointers for uoh objects on heap %d", heap_number)); - for(int i = uoh_start_generation; i < total_generation_count; i++) + for (int i = uoh_start_generation; i < total_generation_count; i++) { - mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG); +#ifndef ALLOW_REFERENCES_IN_POH + if (i != poh_generation) +#endif //ALLOW_REFERENCES_IN_POH + mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG); } #if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING) @@ -21172,9 +21244,12 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) if (!hp->card_mark_done_uoh) { dprintf(3, ("Marking cross generation pointers for large objects on heap %d", hp->heap_number)); - for(int i = uoh_start_generation; i < total_generation_count; i++) + for (int i = uoh_start_generation; i < total_generation_count; i++) { - hp->mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG); +#ifndef ALLOW_REFERENCES_IN_POH + if (i != poh_generation) +#endif //ALLOW_REFERENCES_IN_POH + hp->mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG); } hp->card_mark_done_uoh = true; @@ -23921,6 +23996,8 @@ void gc_heap::plan_phase (int condemned_gen_number) GCToEEInterface::DiagWalkLOHSurvivors(__this); sweep_uoh_objects (loh_generation); } + + sweep_uoh_objects (poh_generation); } else { @@ -25894,8 +25971,14 @@ void gc_heap::relocate_phase (int condemned_gen_number, if (!card_mark_done_uoh) #endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING { - dprintf (3, ("Relocating cross generation pointers for large objects on heap %d", heap_number)); - mark_through_cards_for_uoh_objects(&gc_heap::relocate_address, loh_generation, TRUE THIS_ARG); + dprintf (3, ("Relocating cross generation pointers for uoh objects on heap %d", heap_number)); + for (int i = uoh_start_generation; i < total_generation_count; i++) + { +#ifndef ALLOW_REFERENCES_IN_POH + if (i != poh_generation) +#endif //ALLOW_REFERENCES_IN_POH + mark_through_cards_for_uoh_objects(&gc_heap::relocate_address, i, TRUE THIS_ARG); + } #if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING) card_mark_done_uoh = true; @@ -25915,6 +25998,10 @@ void gc_heap::relocate_phase (int condemned_gen_number, { relocate_in_uoh_objects (loh_generation); } + +#ifdef ALLOW_REFERENCES_IN_POH + relocate_in_uoh_objects (poh_generation); +#endif } #ifndef FEATURE_CARD_MARKING_STEALING // moved this code *before* we scan the older generations via mark_through_cards_xxx @@ -25958,8 +26045,14 @@ void gc_heap::relocate_phase (int condemned_gen_number, if (!hp->card_mark_done_uoh) { - dprintf(3, ("Relocating cross generation pointers for large objects on heap %d", hp->heap_number)); - hp->mark_through_cards_for_uoh_objects(&gc_heap::relocate_address, loh_generation, TRUE THIS_ARG); + dprintf(3, ("Relocating cross generation pointers for uoh objects on heap %d", hp->heap_number)); + for (int i = uoh_start_generation; i < total_generation_count; i++) + { +#ifndef ALLOW_REFERENCES_IN_POH + if (i != poh_generation) +#endif //ALLOW_REFERENCES_IN_POH + hp->mark_through_cards_for_uoh_objects(&gc_heap::relocate_address, i, TRUE THIS_ARG); + } hp->card_mark_done_uoh = true; } } @@ -27166,10 +27259,13 @@ void gc_heap::background_mark_phase () size_t total_soh_size = generation_sizes (generation_of (max_generation)); size_t total_loh_size = generation_size (loh_generation); + size_t total_poh_size = generation_size (poh_generation); bgc_begin_loh_size = total_loh_size; + bgc_begin_poh_size = total_poh_size; bgc_loh_size_increased = 0; + bgc_poh_size_increased = 0; - dprintf (GTC_LOG, ("BM: h%d: loh: %Id, soh: %Id", heap_number, total_loh_size, total_soh_size)); + dprintf (GTC_LOG, ("BM: h%d: loh: %Id, soh: %Id, poh: %Id", heap_number, total_loh_size, total_soh_size, total_poh_size)); { //concurrent_print_time_delta ("copying stack roots"); @@ -27446,8 +27542,9 @@ void gc_heap::background_mark_phase () total_soh_size = generation_sizes (generation_of (max_generation)); total_loh_size = generation_size (loh_generation); + total_poh_size = generation_size (poh_generation); - dprintf (GTC_LOG, ("FM: h%d: loh: %Id, soh: %Id", heap_number, total_loh_size, total_soh_size)); + dprintf (GTC_LOG, ("FM: h%d: loh: %Id, soh: %Id, poh: %Id", heap_number, total_loh_size, total_soh_size, total_poh_size)); dprintf (2, ("nonconcurrent marking stack roots")); GCScan::GcScanRoots(background_promote, @@ -27591,8 +27688,9 @@ void gc_heap::background_mark_phase () gen0_bricks_cleared = FALSE; - dprintf (2, ("end of bgc mark: loh: %d, soh: %d", + dprintf (2, ("end of bgc mark: loh: %d, poh: %d, soh: %d", generation_size (loh_generation), + generation_size (poh_generation), generation_sizes (generation_of (max_generation)))); for (int gen_idx = max_generation; gen_idx < total_generation_count; gen_idx++) @@ -27903,7 +28001,7 @@ void gc_heap::revisit_written_pages (BOOL concurrent_p, BOOL reset_only_p) bool reset_watch_state = !!concurrent_p; bool is_runtime_suspended = !concurrent_p; BOOL small_object_segments = TRUE; - for(int i = max_generation; i < total_generation_count; i++) + for (int i = max_generation; i < total_generation_count; i++) { heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (i))); PREFIX_ASSUME(seg != NULL); @@ -32798,6 +32896,10 @@ void gc_heap::compute_new_dynamic_data (int gen_number) #ifdef BACKGROUND_GC if (i == loh_generation) end_loh_size = total_gen_size; + + if (i == poh_generation) + end_poh_size = total_gen_size; + #endif //BACKGROUND_GC //update counter dd_promoted_size (dd) = out; @@ -33370,7 +33472,7 @@ BOOL gc_heap::ephemeral_gen_fit_p (gc_tuning_point tp) } } -CObjectHeader* gc_heap::allocate_large_object (size_t jsize, uint32_t flags, int64_t& alloc_bytes) +CObjectHeader* gc_heap::allocate_uoh_object (size_t jsize, uint32_t flags, int gen_number, int64_t& alloc_bytes) { //create a new alloc context because gen3context is shared. alloc_context acontext; @@ -33393,17 +33495,19 @@ CObjectHeader* gc_heap::allocate_large_object (size_t jsize, uint32_t flags, int size_t size = AlignQword (jsize); int align_const = get_alignment_constant (FALSE); -#ifdef FEATURE_LOH_COMPACTION - size_t pad = Align (loh_padding_obj_size, align_const); -#else size_t pad = 0; +#ifdef FEATURE_LOH_COMPACTION + if (gen_number == loh_generation) + { + pad = Align (loh_padding_obj_size, align_const); + } #endif //FEATURE_LOH_COMPACTION assert (size >= Align (min_obj_size, align_const)); #ifdef _MSC_VER #pragma inline_depth(0) #endif //_MSC_VER - if (! allocate_more_space (&acontext, (size + pad), flags, loh_generation)) + if (! allocate_more_space (&acontext, (size + pad), flags, gen_number)) { return 0; } @@ -33447,8 +33551,6 @@ CObjectHeader* gc_heap::allocate_large_object (size_t jsize, uint32_t flags, int mark_array_clear_marked (result); } #ifdef BACKGROUND_GC - //the object has to cover one full mark uint32_t - assert (size >= mark_word_size); if (current_c_gc_state != c_gc_state_free) { dprintf (3, ("Concurrent allocation of a large object %Ix", @@ -34295,8 +34397,9 @@ void gc_heap::background_sweep() size_t total_soh_size = generation_sizes (generation_of (max_generation)); size_t total_loh_size = generation_size (loh_generation); + size_t total_poh_size = generation_size (poh_generation); - dprintf (GTC_LOG, ("loh: %Id, soh: %Id", heap_number, total_loh_size, total_soh_size)); + dprintf (GTC_LOG, ("h%d: S: poh: %Id, loh: %Id, soh: %Id", heap_number, total_poh_size, total_loh_size, total_soh_size)); dprintf (GTC_LOG, ("end of bgc sweep: gen2 FL: %Id, FO: %Id", generation_free_list_space (generation_of (max_generation)), @@ -34307,6 +34410,11 @@ void gc_heap::background_sweep() generation_free_list_space (generation_of (loh_generation)), generation_free_obj_space (generation_of (loh_generation)))); + dprintf (GTC_LOG, ("h%d: end of bgc sweep: poh FL: %Id, FO: %Id", + heap_number, + generation_free_list_space (generation_of (poh_generation)), + generation_free_obj_space (generation_of (poh_generation)))); + FIRE_EVENT(BGC2ndConEnd); concurrent_print_time_delta ("background sweep"); @@ -34556,7 +34664,9 @@ void gc_heap::mark_through_cards_for_uoh_objects (card_fn fn, size_t total_cards_cleared = 0; #ifdef FEATURE_CARD_MARKING_STEALING - VOLATILE(uint32_t)* chunk_index = (VOLATILE(uint32_t)*) & card_mark_chunk_index_loh; + VOLATILE(uint32_t)* chunk_index = (VOLATILE(uint32_t)*) &(gen_num == loh_generation ? + card_mark_chunk_index_loh : + card_mark_chunk_index_poh); card_marking_enumerator card_mark_enumerator(seg, low, chunk_index); card_word_end = 0; @@ -35451,7 +35561,7 @@ void gc_heap::verify_partial () BOOL bad_ref_p = FALSE; BOOL free_ref_p = FALSE; - for(int i = max_generation; i < total_generation_count; i++) + for (int i = max_generation; i < total_generation_count; i++) { generation* gen = generation_of (i); int align_const = get_alignment_constant (i == max_generation); @@ -36350,9 +36460,11 @@ HRESULT GCHeap::Initialize() #ifdef MULTIPLE_HEAPS gc_heap::n_heaps = nhp; - hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, nhp); + // TODO: tuning https://github.com/dotnet/runtime/issues/13739 + hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, large_seg_size /*poh_segment_size*/, nhp); #else - hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/); + // TODO: tuning https://github.com/dotnet/runtime/issues/13739 + hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, large_seg_size /*poh_segment_size*/); #endif //MULTIPLE_HEAPS if (hr != S_OK) @@ -36411,7 +36523,7 @@ HRESULT GCHeap::Initialize() #ifndef FEATURE_REDHAWK // Redhawk forces relocation a different way #if defined (STRESS_HEAP) && !defined (MULTIPLE_HEAPS) if (GCStress::IsEnabled()) { - for(int i = 0; i < GCHeap::NUM_HEAP_STRESS_OBJS; i++) + for (int i = 0; i < GCHeap::NUM_HEAP_STRESS_OBJS; i++) { m_StressObjs[i] = CreateGlobalHandle(0); } @@ -37157,7 +37269,7 @@ GCHeap::AllocAlign8Common(void* _hp, alloc_context* acontext, size_t size, uint3 alloc_context* acontext = generation_alloc_context (hp->generation_of (loh_generation)); - newAlloc = (Object*) hp->allocate_large_object (size, flags, acontext->alloc_bytes_uoh); + newAlloc = (Object*) hp->allocate_uoh_object (size, flags, loh_generation, acontext->alloc_bytes_uoh); ASSERT(((size_t)newAlloc & 7) == 0); } @@ -37202,7 +37314,7 @@ GCHeap::AllocLHeap( size_t size, uint32_t flags REQD_ALIGN_DCL) #endif //MULTIPLE_HEAPS alloc_context* acontext = generation_alloc_context (hp->generation_of (loh_generation)); - newAlloc = (Object*) hp->allocate_large_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), flags, acontext->alloc_bytes_uoh); + newAlloc = (Object*) hp->allocate_uoh_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), flags, loh_generation, acontext->alloc_bytes_uoh); #ifdef FEATURE_STRUCTALIGN newAlloc = (Object*) hp->pad_for_alignment_large ((uint8_t*) newAlloc, requiredAlignment, size); @@ -37258,7 +37370,7 @@ GCHeap::Alloc(gc_alloc_context* context, size_t size, uint32_t flags REQD_ALIGN_ } else { - newAlloc = (Object*) hp->allocate_large_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), flags, acontext->alloc_bytes_uoh); + newAlloc = (Object*) hp->allocate_uoh_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), flags, loh_generation, acontext->alloc_bytes_uoh); #ifdef FEATURE_STRUCTALIGN newAlloc = (Object*) hp->pad_for_alignment_large ((uint8_t*) newAlloc, requiredAlignment, size); #endif // FEATURE_STRUCTALIGN diff --git a/src/coreclr/src/gc/gc.h b/src/coreclr/src/gc/gc.h index 2ac171028da1aa..51bb27007e4f4e 100644 --- a/src/coreclr/src/gc/gc.h +++ b/src/coreclr/src/gc/gc.h @@ -89,7 +89,8 @@ enum gc_etw_segment_type { gc_etw_segment_small_object_heap = 0, gc_etw_segment_large_object_heap = 1, - gc_etw_segment_read_only_heap = 2 + gc_etw_segment_read_only_heap = 2, + gc_etw_segment_pinned_object_heap = 3 }; // Types of allocations, emitted by the GCAllocationTick ETW event. @@ -119,13 +120,16 @@ enum gc_generation_num // large object heap, technically not a generation, but it is convenient to represent it as such loh_generation = 3, + // pinned heap, a separate generation for the same reasons as loh + poh_generation = 4, + uoh_start_generation = loh_generation, // number of ephemeral generations ephemeral_generation_count = max_generation, // number of all generations - total_generation_count = loh_generation + 1 + total_generation_count = poh_generation + 1 }; #ifdef GC_CONFIG_DRIVEN diff --git a/src/coreclr/src/gc/gcpriv.h b/src/coreclr/src/gc/gcpriv.h index 952eb83c695a83..4801a19f1d487f 100644 --- a/src/coreclr/src/gc/gcpriv.h +++ b/src/coreclr/src/gc/gcpriv.h @@ -106,6 +106,8 @@ inline void FATAL_GC_ERROR() #define CARD_BUNDLE //enable card bundle feature.(requires WRITE_WATCH) +#define ALLOW_REFERENCES_IN_POH //Allow POH objects to contain references. + // If this is defined we use a map for segments in order to find the heap for // a segment fast. But it does use more memory as we have to cover the whole // heap range and for each entry we allocate a struct of 5 ptr-size words @@ -1227,7 +1229,8 @@ class gc_heap static HRESULT initialize_gc (size_t soh_segment_size, - size_t loh_segment_size + size_t loh_segment_size, + size_t poh_segment_size #ifdef MULTIPLE_HEAPS , int number_of_heaps #endif //MULTIPLE_HEAPS @@ -1269,13 +1272,13 @@ class gc_heap void gc_thread_stub (void* arg); #endif //MULTIPLE_HEAPS - // For LOH allocations we only update the alloc_bytes_uoh in allocation + // For UOH allocations we only update the alloc_bytes_uoh in allocation // context - we don't actually use the ptr/limit from it so I am // making this explicit by not passing in the alloc_context. - // Note: This is an instance method, but the heap instance is only used for + // Note: This are instance methods, but the heap instance is only used for // lowest_address and highest_address, which are currently the same accross all heaps. PER_HEAP - CObjectHeader* allocate_large_object (size_t size, uint32_t flags, int64_t& alloc_bytes); + CObjectHeader* allocate_uoh_object (size_t size, uint32_t flags, int gen_num, int64_t& alloc_bytes); #ifdef FEATURE_STRUCTALIGN PER_HEAP @@ -1541,6 +1544,9 @@ class gc_heap PER_HEAP BOOL bgc_loh_allocate_spin(); + + PER_HEAP + BOOL bgc_poh_allocate_spin(); #endif //BACKGROUND_GC #define max_saved_spinlock_info 48 @@ -3724,7 +3730,11 @@ class gc_heap PER_HEAP size_t bgc_begin_loh_size; PER_HEAP + size_t bgc_begin_poh_size; + PER_HEAP size_t end_loh_size; + PER_HEAP + size_t end_poh_size; #ifdef BGC_SERVO_TUNING PER_HEAP @@ -3758,6 +3768,8 @@ class gc_heap // in free list doesn't increase the heap size. PER_HEAP size_t bgc_loh_size_increased; + PER_HEAP + size_t bgc_poh_size_increased; PER_HEAP size_t background_soh_alloc_count; @@ -3895,6 +3907,7 @@ class gc_heap #define youngest_generation (generation_of (0)) #define large_object_generation (generation_of (loh_generation)) +#define pinned_object_generation (generation_of (poh_generation)) // The more_space_lock and gc_lock is used for 3 purposes: // @@ -3982,6 +3995,16 @@ class gc_heap PER_HEAP alloc_list gen2_alloc_list[NUM_GEN2_ALIST-1]; +// TODO: tuning https://github.com/dotnet/runtime/issues/13739 +#define NUM_POH_ALIST (12) +#ifdef BIT64 +#define BASE_POH_ALIST (1*256) +#else +#define BASE_POH_ALIST (1*128) +#endif // BIT64 + PER_HEAP + alloc_list poh_alloc_list[NUM_POH_ALIST-1]; + //------------------------------------------ PER_HEAP @@ -4271,6 +4294,9 @@ class gc_heap PER_HEAP VOLATILE(uint32_t) card_mark_chunk_index_loh; + PER_HEAP + VOLATILE(uint32_t) card_mark_chunk_index_poh; + PER_HEAP VOLATILE(bool) card_mark_done_uoh; @@ -4282,6 +4308,7 @@ class gc_heap card_mark_done_soh = false; card_mark_chunk_index_loh = ~0; + card_mark_chunk_index_poh = ~0; card_mark_done_uoh = false; } @@ -4772,6 +4799,7 @@ struct loh_padding_obj #define heap_segment_flags_ma_pcommitted 128 #define heap_segment_flags_uoh_delete 256 +#define heap_segment_flags_poh 512 #endif //BACKGROUND_GC //need to be careful to keep enough pad items to fit a relocation node @@ -4860,7 +4888,7 @@ BOOL heap_segment_unmappable_p (heap_segment* inst) inline BOOL heap_segment_uoh_p (heap_segment * inst) { - return !!(inst->flags & heap_segment_flags_loh); + return !!(inst->flags & (heap_segment_flags_loh | heap_segment_flags_poh)); } #ifdef BACKGROUND_GC