diff --git a/compiler/rustc_middle/src/mir/mono.rs b/compiler/rustc_middle/src/mir/mono.rs index 1511e25523559..1cbf1a36283f5 100644 --- a/compiler/rustc_middle/src/mir/mono.rs +++ b/compiler/rustc_middle/src/mir/mono.rs @@ -335,10 +335,6 @@ impl<'tcx> CodegenUnit<'tcx> { .expect("create_size_estimate must be called before getting a size_estimate") } - pub fn modify_size_estimate(&mut self, delta: usize) { - *self.size_estimate.as_mut().unwrap() += delta; - } - pub fn contains_item(&self, item: &MonoItem<'tcx>) -> bool { self.items().contains_key(item) } diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs index a74ba8e4a4be9..155aed4490f10 100644 --- a/compiler/rustc_monomorphize/src/partitioning.rs +++ b/compiler/rustc_monomorphize/src/partitioning.rs @@ -166,19 +166,9 @@ where placed }; - // Merge until we have at most `max_cgu_count` codegen units. - // `merge_codegen_units` is responsible for updating the CGU size - // estimates. - { - let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus"); - merge_codegen_units(cx, &mut codegen_units); - debug_dump(tcx, "MERGE", &codegen_units, unique_inlined_stats); - } - - // In the next step, we use the inlining map to determine which additional - // monomorphizations have to go into each codegen unit. These additional - // monomorphizations can be drop-glue, functions from external crates, and - // local functions the definition of which is marked with `#[inline]`. + // Use the usage map to put additional mono items in each codegen unit: + // drop-glue, functions from external crates, and local functions the + // definition of which is marked with `#[inline]`. { let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_place_inline_items"); place_inlined_mono_items(cx, &mut codegen_units); @@ -190,8 +180,17 @@ where debug_dump(tcx, "INLINE", &codegen_units, unique_inlined_stats); } - // Next we try to make as many symbols "internal" as possible, so LLVM has - // more freedom to optimize. + // Merge until we have at most `max_cgu_count` codegen units. + // `merge_codegen_units` is responsible for updating the CGU size + // estimates. + { + let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus"); + merge_codegen_units(cx, &mut codegen_units); + debug_dump(tcx, "MERGE", &codegen_units, unique_inlined_stats); + } + + // Make as many symbols "internal" as possible, so LLVM has more freedom to + // optimize. if !tcx.sess.link_dead_code() { let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_internalize_symbols"); internalize_symbols(cx, &mut codegen_units, internalization_candidates); @@ -314,35 +313,76 @@ fn merge_codegen_units<'tcx>( // worse generated code. So we don't allow CGUs smaller than this (unless // there is just one CGU, of course). Note that CGU sizes of 100,000+ are // common in larger programs, so this isn't all that large. - const NON_INCR_MIN_CGU_SIZE: usize = 1000; + const NON_INCR_MIN_CGU_SIZE: usize = 2000; // Repeatedly merge the two smallest codegen units as long as: // - we have more CGUs than the upper limit, or // - (Non-incremental builds only) the user didn't specify a CGU count, and // there are multiple CGUs, and some are below the minimum size. + // - njn: update this comment // // The "didn't specify a CGU count" condition is because when an explicit // count is requested we observe it as closely as possible. For example, // the `compiler_builtins` crate sets `codegen-units = 10000` and it's // critical they aren't merged. Also, some tests use explicit small values // and likewise won't work if small CGUs are merged. - while codegen_units.len() > cx.tcx.sess.codegen_units().as_usize() - || (cx.tcx.sess.opts.incremental.is_none() - && matches!(cx.tcx.sess.codegen_units(), CodegenUnits::Default(_)) - && codegen_units.len() > 1 - && codegen_units.iter().any(|cgu| cgu.size_estimate() < NON_INCR_MIN_CGU_SIZE)) - { + //eprintln!("-----"); + loop { + // njn: where to put this? // Sort small cgus to the back. codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate())); + //eprintln!("cgus: {:?}", codegen_units.iter().map(|cgu| cgu.size_estimate()).collect::>()); + + let merge1 = codegen_units.len() > cx.tcx.sess.codegen_units().as_usize(); + + let merge2 = cx.tcx.sess.opts.incremental.is_none() + && matches!(cx.tcx.sess.codegen_units(), CodegenUnits::Default(_)) + && codegen_units.len() >= 2 + && codegen_units.iter().any(|cgu| cgu.size_estimate() < NON_INCR_MIN_CGU_SIZE); + + // njn: addition is an imperfect measure, could be overlap + let merge3 = cx.tcx.sess.opts.incremental.is_none() + && matches!(cx.tcx.sess.codegen_units(), CodegenUnits::Default(_)) + && codegen_units.len() >= 3 + && { + // eprintln!( + // "sz: {} >= {} + {}?", + // codegen_units[0].size_estimate(), + // codegen_units[codegen_units.len() - 2].size_estimate(), + // codegen_units[codegen_units.len() - 1].size_estimate()); + + (codegen_units[0].size_estimate() as f64 * 0.8) + >= (codegen_units[codegen_units.len() - 2].size_estimate() + + codegen_units[codegen_units.len() - 1].size_estimate()) + as f64 + }; + + if !(merge1 || merge2 || merge3) { + break; + } + let mut smallest = codegen_units.pop().unwrap(); let second_smallest = codegen_units.last_mut().unwrap(); - // Move the mono-items from `smallest` to `second_smallest` - second_smallest.modify_size_estimate(smallest.size_estimate()); - for (k, v) in smallest.items_mut().drain() { - second_smallest.items_mut().insert(k, v); - } + // let sm_size = smallest.size_estimate(); + // let sec_sm_size = second_smallest.size_estimate(); + + // Move the items from `smallest` to `second_smallest`. Some of them + // may be duplicate inlined items, in which case the destination CGU is + // unaffected. Recalculate size estimates afterwards. + second_smallest.items_mut().extend(smallest.items_mut().drain()); + second_smallest.create_size_estimate(cx.tcx); + + // eprintln!( + // "merge: {} {} {}: {} + {} -> {}", + // merge1, + // merge2, + // merge3, + // sec_sm_size, + // sm_size, + // second_smallest.size_estimate() + // ); // Record that `second_smallest` now contains all the stuff that was // in `smallest` before.