From c0ca29922db9a4cd1ea6245238c59aa2f212e196 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 22 Dec 2025 11:24:08 -0500 Subject: [PATCH] Implement Ruby heap This heap emulates the growth characteristics of the Ruby default GC's heap. By default, the heap grows by 40%, requires at least 20% empty after a GC, and allows at most 65% empty before it shrinks the heap. This is all configurable via the same environment variables the default GC uses (`RUBY_GC_HEAP_FREE_SLOTS_GOAL_RATIO`, `RUBY_GC_HEAP_FREE_SLOTS_MIN_RATIO`, `RUBY_GC_HEAP_FREE_SLOTS_MAX_RATIO`, respectively). The Ruby heap can be enabled via the `MMTK_HEAP_MODE=ruby` environment variable. Compared to the dynamic heap in MMTk (which uses the MemBalancer algorithm), the Ruby heap allows the heap to grow more generously, which uses a bit more memory but offers significant performance gains because it runs GC much less frequently. We can see in the benchmarks below that this Ruby heap heap gives faster performance than the dynamic heap in every benchmark, with over 2x faster in many of them. We see that memory is often around 10-20% higher with certain outliers that use significantly more memory like hexapdf and erubi-rails. We can also see that this brings MMTk's Ruby heap much closer in performance to the default GC. Ruby heap benchmark results: -------------- -------------- ---------- --------- bench ruby heap (ms) stddev (%) RSS (MiB) activerecord 233.6 10.7 85.9 chunky-png 457.1 1.1 79.3 erubi-rails 1148.0 3.8 133.3 hexapdf 1570.5 2.4 403.0 liquid-c 42.8 5.3 43.4 liquid-compile 41.3 7.6 52.6 liquid-render 102.8 3.8 55.3 lobsters 651.9 8.0 426.3 mail 106.4 1.8 67.2 psych-load 1552.1 0.8 43.4 railsbench 1707.2 6.0 145.6 rubocop 127.2 15.3 148.8 ruby-lsp 136.6 11.7 113.7 sequel 47.2 5.9 44.4 shipit 1197.5 3.6 301.0 -------------- -------------- ---------- --------- Dynamic heap benchmark results: -------------- ----------------- ---------- --------- bench dynamic heap (ms) stddev (%) RSS (MiB) activerecord 845.3 3.1 76.7 chunky-png 525.9 0.4 38.9 erubi-rails 2694.9 3.4 115.8 hexapdf 2344.8 5.6 164.9 liquid-c 73.7 5.0 40.5 liquid-compile 107.1 6.8 40.3 liquid-render 147.2 1.7 39.5 lobsters 697.6 4.5 342.0 mail 224.6 2.1 64.0 psych-load 4326.7 0.6 37.4 railsbench 3218.0 5.5 124.7 rubocop 203.6 6.1 110.9 ruby-lsp 350.7 3.2 79.0 sequel 121.8 2.5 39.6 shipit 1510.1 3.1 220.8 -------------- ----------------- ---------- --------- Default GC benchmark results: -------------- --------------- ---------- --------- bench default GC (ms) stddev (%) RSS (MiB) activerecord 148.4 0.6 67.9 chunky-png 440.2 0.7 57.0 erubi-rails 722.7 0.3 97.8 hexapdf 1466.2 1.7 254.3 liquid-c 32.5 3.6 42.3 liquid-compile 31.2 1.9 35.4 liquid-render 88.3 0.7 30.8 lobsters 633.6 7.0 305.4 mail 76.6 1.6 53.2 psych-load 1166.2 1.3 29.1 railsbench 1262.9 2.3 114.7 rubocop 105.6 0.8 95.4 ruby-lsp 101.6 1.4 75.4 sequel 27.4 1.2 33.1 shipit 1083.1 1.5 163.4 -------------- --------------- ---------- --------- --- gc/mmtk/src/api.rs | 64 +++++++++++++++- gc/mmtk/src/collection.rs | 6 ++ gc/mmtk/src/heap/mod.rs | 4 + gc/mmtk/src/heap/ruby_heap_trigger.rs | 104 ++++++++++++++++++++++++++ gc/mmtk/src/lib.rs | 1 + test/mmtk/test_configuration.rb | 2 +- 6 files changed, 176 insertions(+), 5 deletions(-) create mode 100644 gc/mmtk/src/heap/mod.rs create mode 100644 gc/mmtk/src/heap/ruby_heap_trigger.rs diff --git a/gc/mmtk/src/api.rs b/gc/mmtk/src/api.rs index b99cbdc..d735a81 100644 --- a/gc/mmtk/src/api.rs +++ b/gc/mmtk/src/api.rs @@ -4,6 +4,7 @@ use mmtk::util::alloc::BumpPointer; use mmtk::util::alloc::ImmixAllocator; +use mmtk::util::conversions; use mmtk::util::options::PlanSelector; use std::str::FromStr; use std::sync::atomic::Ordering; @@ -13,6 +14,8 @@ use crate::abi::RubyBindingOptions; use crate::abi::RubyUpcalls; use crate::binding; use crate::binding::RubyBinding; +use crate::heap::RubyHeapTriggerConfig; +use crate::heap::RUBY_HEAP_TRIGGER_CONFIG; use crate::mmtk; use crate::utils::default_heap_max; use crate::utils::parse_capacity; @@ -79,6 +82,29 @@ fn mmtk_builder_default_parse_heap_max() -> usize { parse_env_var_with("MMTK_HEAP_MAX", parse_capacity).unwrap_or_else(default_heap_max) } +fn parse_float_env_var(key: &str, default: f64, min: f64, max: f64) -> f64 { + parse_env_var_with(key, |s| { + let mut float = f64::from_str(s).unwrap_or(default); + + if float <= min { + eprintln!( + "{key} has value {float} which must be greater than {min}, using default instead" + ); + float = default; + } + + if float >= max { + eprintln!( + "{key} has value {float} which must be less than {max}, using default instead" + ); + float = default; + } + + Some(float) + }) + .unwrap_or(default) +} + fn mmtk_builder_default_parse_heap_mode(heap_min: usize, heap_max: usize) -> GCTriggerSelector { let make_fixed = || GCTriggerSelector::FixedHeapSize(heap_max); let make_dynamic = || GCTriggerSelector::DynamicHeapSize(heap_min, heap_max); @@ -86,6 +112,25 @@ fn mmtk_builder_default_parse_heap_mode(heap_min: usize, heap_max: usize) -> GCT parse_env_var_with("MMTK_HEAP_MODE", |s| match s { "fixed" => Some(make_fixed()), "dynamic" => Some(make_dynamic()), + "ruby" => { + let min_ratio = parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_MIN_RATIO", 0.2, 0.0, 1.0); + let goal_ratio = + parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_GOAL_RATIO", 0.4, min_ratio, 1.0); + let max_ratio = + parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_MAX_RATIO", 0.65, goal_ratio, 1.0); + + crate::heap::RUBY_HEAP_TRIGGER_CONFIG + .set(RubyHeapTriggerConfig { + min_heap_pages: conversions::bytes_to_pages_up(heap_min), + max_heap_pages: conversions::bytes_to_pages_up(heap_max), + heap_pages_min_ratio: min_ratio, + heap_pages_goal_ratio: goal_ratio, + heap_pages_max_ratio: max_ratio, + }) + .unwrap_or_else(|_| panic!("RUBY_HEAP_TRIGGER_CONFIG is already set")); + + Some(GCTriggerSelector::Delegated) + } _ => None, }) .unwrap_or_else(make_dynamic) @@ -146,7 +191,7 @@ pub unsafe extern "C" fn mmtk_init_binding( crate::set_panic_hook(); - let builder = unsafe { Box::from_raw(builder) }; + let builder: Box = unsafe { Box::from_raw(builder) }; let binding_options = RubyBindingOptions { ractor_check_mode: false, suffix_size: 0, @@ -388,11 +433,12 @@ pub extern "C" fn mmtk_plan() -> *const u8 { pub extern "C" fn mmtk_heap_mode() -> *const u8 { static FIXED_HEAP: &[u8] = b"fixed\0"; static DYNAMIC_HEAP: &[u8] = b"dynamic\0"; + static RUBY_HEAP: &[u8] = b"ruby\0"; match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger { GCTriggerSelector::FixedHeapSize(_) => FIXED_HEAP.as_ptr(), GCTriggerSelector::DynamicHeapSize(_, _) => DYNAMIC_HEAP.as_ptr(), - _ => panic!("Unknown heap mode"), + GCTriggerSelector::Delegated => RUBY_HEAP.as_ptr(), } } @@ -401,7 +447,12 @@ pub extern "C" fn mmtk_heap_min() -> usize { match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger { GCTriggerSelector::FixedHeapSize(_) => 0, GCTriggerSelector::DynamicHeapSize(min_size, _) => min_size, - _ => panic!("Unknown heap mode"), + GCTriggerSelector::Delegated => conversions::pages_to_bytes( + RUBY_HEAP_TRIGGER_CONFIG + .get() + .expect("RUBY_HEAP_TRIGGER_CONFIG not set") + .min_heap_pages, + ), } } @@ -410,7 +461,12 @@ pub extern "C" fn mmtk_heap_max() -> usize { match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger { GCTriggerSelector::FixedHeapSize(max_size) => max_size, GCTriggerSelector::DynamicHeapSize(_, max_size) => max_size, - _ => panic!("Unknown heap mode"), + GCTriggerSelector::Delegated => conversions::pages_to_bytes( + RUBY_HEAP_TRIGGER_CONFIG + .get() + .expect("RUBY_HEAP_TRIGGER_CONFIG not set") + .max_heap_pages, + ), } } diff --git a/gc/mmtk/src/collection.rs b/gc/mmtk/src/collection.rs index 41c508a..824747b 100644 --- a/gc/mmtk/src/collection.rs +++ b/gc/mmtk/src/collection.rs @@ -1,9 +1,11 @@ use crate::abi::GCThreadTLS; use crate::api::RubyMutator; +use crate::heap::RubyHeapTrigger; use crate::{mmtk, upcalls, Ruby}; use mmtk::memory_manager; use mmtk::scheduler::*; +use mmtk::util::heap::GCTriggerPolicy; use mmtk::util::{VMMutatorThread, VMThread, VMWorkerThread}; use mmtk::vm::{Collection, GCThreadContext}; use std::sync::atomic::Ordering; @@ -67,6 +69,10 @@ impl Collection for VMCollection { fn vm_live_bytes() -> usize { (upcalls().vm_live_bytes)() } + + fn create_gc_trigger() -> Box> { + Box::new(RubyHeapTrigger::default()) + } } impl VMCollection { diff --git a/gc/mmtk/src/heap/mod.rs b/gc/mmtk/src/heap/mod.rs new file mode 100644 index 0000000..6af7c1b --- /dev/null +++ b/gc/mmtk/src/heap/mod.rs @@ -0,0 +1,4 @@ +mod ruby_heap_trigger; +pub use ruby_heap_trigger::RubyHeapTrigger; +pub use ruby_heap_trigger::RubyHeapTriggerConfig; +pub use ruby_heap_trigger::RUBY_HEAP_TRIGGER_CONFIG; diff --git a/gc/mmtk/src/heap/ruby_heap_trigger.rs b/gc/mmtk/src/heap/ruby_heap_trigger.rs new file mode 100644 index 0000000..9215e2e --- /dev/null +++ b/gc/mmtk/src/heap/ruby_heap_trigger.rs @@ -0,0 +1,104 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; + +use mmtk::util::heap::GCTriggerPolicy; +use mmtk::util::heap::SpaceStats; +use mmtk::Plan; +use mmtk::MMTK; +use once_cell::sync::OnceCell; + +use crate::Ruby; + +pub static RUBY_HEAP_TRIGGER_CONFIG: OnceCell = OnceCell::new(); + +pub struct RubyHeapTriggerConfig { + /// Min heap size + pub min_heap_pages: usize, + /// Max heap size + pub max_heap_pages: usize, + /// Minimum ratio of empty space after a GC before the heap will grow + pub heap_pages_min_ratio: f64, + /// Ratio the heap will grow by + pub heap_pages_goal_ratio: f64, + /// Maximum ratio of empty space after a GC before the heap will shrink + pub heap_pages_max_ratio: f64, +} + +pub struct RubyHeapTrigger { + /// Target number of heap pages + target_heap_pages: AtomicUsize, +} + +impl GCTriggerPolicy for RubyHeapTrigger { + fn is_gc_required( + &self, + space_full: bool, + space: Option>, + plan: &dyn Plan, + ) -> bool { + // Let the plan decide + plan.collection_required(space_full, space) + } + + fn on_gc_end(&self, mmtk: &'static MMTK) { + if let Some(plan) = mmtk.get_plan().generational() { + if plan.is_current_gc_nursery() { + // Nursery GC + } else { + // Full GC + } + + panic!("TODO: support for generational GC not implemented") + } else { + let used_pages = mmtk.get_plan().get_used_pages(); + + let target_min = + (used_pages as f64 * (1.0 + Self::get_config().heap_pages_min_ratio)) as usize; + let target_max = + (used_pages as f64 * (1.0 + Self::get_config().heap_pages_max_ratio)) as usize; + let new_target = + (((used_pages as f64) * (1.0 + Self::get_config().heap_pages_goal_ratio)) as usize) + .clamp( + Self::get_config().min_heap_pages, + Self::get_config().max_heap_pages, + ); + + if used_pages < target_min || used_pages > target_max { + self.target_heap_pages.store(new_target, Ordering::Relaxed); + } + } + } + + fn is_heap_full(&self, plan: &dyn Plan) -> bool { + plan.get_reserved_pages() > self.target_heap_pages.load(Ordering::Relaxed) + } + + fn get_current_heap_size_in_pages(&self) -> usize { + self.target_heap_pages.load(Ordering::Relaxed) + } + + fn get_max_heap_size_in_pages(&self) -> usize { + Self::get_config().max_heap_pages + } + + fn can_heap_size_grow(&self) -> bool { + self.target_heap_pages.load(Ordering::Relaxed) < Self::get_config().max_heap_pages + } +} + +impl Default for RubyHeapTrigger { + fn default() -> Self { + let min_heap_pages = Self::get_config().min_heap_pages; + + Self { + target_heap_pages: AtomicUsize::new(min_heap_pages), + } + } +} + +impl RubyHeapTrigger { + fn get_config<'b>() -> &'b RubyHeapTriggerConfig { + RUBY_HEAP_TRIGGER_CONFIG + .get() + .expect("Attempt to use RUBY_HEAP_TRIGGER_CONFIG before it is initialized") + } +} diff --git a/gc/mmtk/src/lib.rs b/gc/mmtk/src/lib.rs index 4bcafb5..8647793 100644 --- a/gc/mmtk/src/lib.rs +++ b/gc/mmtk/src/lib.rs @@ -25,6 +25,7 @@ pub mod active_plan; pub mod api; pub mod binding; pub mod collection; +pub mod heap; pub mod object_model; pub mod reference_glue; pub mod scanning; diff --git a/test/mmtk/test_configuration.rb b/test/mmtk/test_configuration.rb index 427cd9a..d44abc4 100644 --- a/test/mmtk/test_configuration.rb +++ b/test/mmtk/test_configuration.rb @@ -22,7 +22,7 @@ def test_MMTK_THREADS end end - %w(fixed dynamic).each do |heap| + %w(fixed dynamic ruby).each do |heap| define_method(:"test_MMTK_HEAP_MODE_#{heap}") do assert_separately([{ "MMTK_HEAP_MODE" => heap }], <<~RUBY) assert_equal("#{heap}", GC.config[:mmtk_heap_mode])