From 59ddb9377af6a3d72e26a569689c93d2cc6acbe2 Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 1 Nov 2025 19:27:23 +0900 Subject: [PATCH 1/6] feat: dynamically adjust merge batch size based on file descriptor limits - Add `effective_merge_batch_size()` function to calculate batch size considering fd soft limit, with minimums and safety margins. - Generalize fd limit handling from Linux-only to Unix systems using `fd_soft_limit()`. - Update merge logic to use dynamic batch size instead of fixed `settings.merge_batch_size` to prevent fd exhaustion. --- src/uu/sort/src/merge.rs | 37 ++++++++++++++++++++++++++++++------- src/uu/sort/src/sort.rs | 35 ++++++++++++++++++++++++----------- 2 files changed, 54 insertions(+), 18 deletions(-) diff --git a/src/uu/sort/src/merge.rs b/src/uu/sort/src/merge.rs index 1e538c6d931..87b74d77077 100644 --- a/src/uu/sort/src/merge.rs +++ b/src/uu/sort/src/merge.rs @@ -30,7 +30,7 @@ use uucore::error::{FromIo, UResult}; use crate::{ GlobalSettings, Output, SortError, chunks::{self, Chunk, RecycledChunk}, - compare_by, open, + compare_by, fd_soft_limit, open, tmp_dir::TmpDirWrapper, }; @@ -62,6 +62,26 @@ fn replace_output_file_in_input_files( Ok(()) } +fn effective_merge_batch_size(settings: &GlobalSettings) -> usize { + const MIN_BATCH_SIZE: usize = 2; + const RESERVED_STDIO: usize = 3; + const RESERVED_OUTPUT: usize = 1; + const SAFETY_MARGIN: usize = 1; + let mut batch_size = settings.merge_batch_size.max(MIN_BATCH_SIZE); + + if let Some(limit) = fd_soft_limit() { + let reserved = RESERVED_STDIO + RESERVED_OUTPUT + SAFETY_MARGIN; + let available_inputs = limit.saturating_sub(reserved); + if available_inputs >= MIN_BATCH_SIZE { + batch_size = batch_size.min(available_inputs); + } else { + batch_size = MIN_BATCH_SIZE; + } + } + + batch_size +} + /// Merge pre-sorted `Box`s. /// /// If `settings.merge_batch_size` is greater than the length of `files`, intermediate files will be used. @@ -94,18 +114,21 @@ pub fn merge_with_file_limit< output: Output, tmp_dir: &mut TmpDirWrapper, ) -> UResult<()> { - if files.len() <= settings.merge_batch_size { + let batch_size = effective_merge_batch_size(settings); + debug_assert!(batch_size >= 2); + + if files.len() <= batch_size { let merger = merge_without_limit(files, settings); merger?.write_all(settings, output) } else { let mut temporary_files = vec![]; - let mut batch = vec![]; + let mut batch = Vec::with_capacity(batch_size); for file in files { batch.push(file); - if batch.len() >= settings.merge_batch_size { - assert_eq!(batch.len(), settings.merge_batch_size); + if batch.len() >= batch_size { + assert_eq!(batch.len(), batch_size); let merger = merge_without_limit(batch.into_iter(), settings)?; - batch = vec![]; + batch = Vec::with_capacity(batch_size); let mut tmp_file = Tmp::create(tmp_dir.next_file()?, settings.compress_prog.as_deref())?; @@ -115,7 +138,7 @@ pub fn merge_with_file_limit< } // Merge any remaining files that didn't get merged in a full batch above. if !batch.is_empty() { - assert!(batch.len() < settings.merge_batch_size); + assert!(batch.len() < batch_size); let merger = merge_without_limit(batch.into_iter(), settings)?; let mut tmp_file = diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 22c96a436ed..683fa21c31c 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -25,8 +25,8 @@ use clap::{Arg, ArgAction, Command}; use custom_str_cmp::custom_str_cmp; use ext_sort::ext_sort; use fnv::FnvHasher; -#[cfg(target_os = "linux")] -use nix::libc::{RLIMIT_NOFILE, getrlimit, rlimit}; +#[cfg(unix)] +use nix::libc; use numeric_str_cmp::{NumInfo, NumInfoParseSettings, human_numeric_str_cmp, numeric_str_cmp}; use rand::{Rng, rng}; use rayon::prelude::*; @@ -1072,18 +1072,31 @@ fn make_sort_mode_arg(mode: &'static str, short: char, help: String) -> Arg { ) } -#[cfg(target_os = "linux")] -fn get_rlimit() -> UResult { - let mut limit = rlimit { +#[cfg(unix)] +pub(crate) fn fd_soft_limit() -> Option { + let mut limit = libc::rlimit { rlim_cur: 0, rlim_max: 0, }; - match unsafe { getrlimit(RLIMIT_NOFILE, &raw mut limit) } { - 0 => Ok(limit.rlim_cur as usize), - _ => Err(UUsageError::new(2, translate!("sort-failed-fetch-rlimit"))), + + let result = unsafe { libc::getrlimit(libc::RLIMIT_NOFILE, &raw mut limit) }; + if result == 0 { + let current = limit.rlim_cur; + if current == libc::RLIM_INFINITY { + None + } else { + usize::try_from(current).ok() + } + } else { + None } } +#[cfg(not(unix))] +pub(crate) fn fd_soft_limit() -> Option { + None +} + const STDIN_FILE: &str = "-"; #[cfg(target_os = "linux")] const LINUX_BATCH_DIVISOR: usize = 4; @@ -1096,12 +1109,12 @@ fn default_merge_batch_size() -> usize { #[cfg(target_os = "linux")] { // Adjust merge batch size dynamically based on available file descriptors. - match get_rlimit() { - Ok(limit) => { + match fd_soft_limit() { + Some(limit) => { let usable_limit = limit.saturating_div(LINUX_BATCH_DIVISOR); usable_limit.clamp(LINUX_BATCH_MIN, LINUX_BATCH_MAX) } - Err(_) => 64, + None => 64, } } From ba5f67353f2feb010141bd414999cc38eedc57b3 Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 1 Nov 2025 19:33:13 +0900 Subject: [PATCH 2/6] fix(sort): update rlimit fetching to use fd_soft_limit with error handling Replace direct call to get_rlimit()? with fd_soft_limit(), adding a check for None value to return a usage error if rlimit cannot be fetched. This improves robustness on Linux by ensuring proper error handling when retrieving the file descriptor soft limit. --- src/uu/sort/src/sort.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 683fa21c31c..9c1e32eeadd 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1292,7 +1292,15 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { translate!( "sort-maximum-batch-size-rlimit", - "rlimit" => get_rlimit()? + "rlimit" => { + let Some(rlimit) = fd_soft_limit() else { + return Err(UUsageError::new( + 2, + translate!("sort-failed-fetch-rlimit"), + )); + }; + rlimit + } ) } #[cfg(not(target_os = "linux"))] From dc309e6965eee30a965469d3e7fdc60b87ee76b1 Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 29 Nov 2025 21:10:23 +0900 Subject: [PATCH 3/6] refactor(sort): restrict nix::libc and fd_soft_limit to Linux Update conditional compilation attributes from #[cfg(unix)] to #[cfg(target_os = "linux")] for the nix::libc import and fd_soft_limit function implementations, ensuring these features are only enabled on Linux systems to improve portability and avoid issues on other Unix-like platforms. --- src/uu/sort/src/sort.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index ae3d820d6f5..b155483fbcd 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -25,7 +25,7 @@ use clap::{Arg, ArgAction, Command}; use custom_str_cmp::custom_str_cmp; use ext_sort::ext_sort; use fnv::FnvHasher; -#[cfg(unix)] +#[cfg(target_os = "linux")] use nix::libc; use numeric_str_cmp::{NumInfo, NumInfoParseSettings, human_numeric_str_cmp, numeric_str_cmp}; use rand::{Rng, rng}; @@ -1072,7 +1072,7 @@ fn make_sort_mode_arg(mode: &'static str, short: char, help: String) -> Arg { ) } -#[cfg(unix)] +#[cfg(target_os = "linux")] pub(crate) fn fd_soft_limit() -> Option { let mut limit = libc::rlimit { rlim_cur: 0, @@ -1092,7 +1092,7 @@ pub(crate) fn fd_soft_limit() -> Option { } } -#[cfg(not(unix))] +#[cfg(not(target_os = "linux"))] pub(crate) fn fd_soft_limit() -> Option { None } From bf6d291589d6579ad2daadccf0e51efef183f352 Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 20 Dec 2025 10:18:20 +0900 Subject: [PATCH 4/6] refactor: improve thread management and replace unsafe libc calls Replace unsafe libc::getrlimit calls in fd_soft_limit with safe nix crate usage. Update Rayon thread configuration to use ThreadPoolBuilder instead of environment variables for better control. Add documentation comment to effective_merge_batch_size function for clarity. --- src/uu/sort/src/merge.rs | 2 ++ src/uu/sort/src/sort.rs | 32 ++++++++++++++------------------ 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/uu/sort/src/merge.rs b/src/uu/sort/src/merge.rs index 0948dbce902..502dcda82a6 100644 --- a/src/uu/sort/src/merge.rs +++ b/src/uu/sort/src/merge.rs @@ -62,6 +62,8 @@ fn replace_output_file_in_input_files( Ok(()) } +/// Determine the effective merge batch size, enforcing a minimum and respecting the +/// file-descriptor soft limit after reserving stdio/output and a safety margin. fn effective_merge_batch_size(settings: &GlobalSettings) -> usize { const MIN_BATCH_SIZE: usize = 2; const RESERVED_STDIO: usize = 3; diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index b155483fbcd..7a01b26cd00 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -26,7 +26,7 @@ use custom_str_cmp::custom_str_cmp; use ext_sort::ext_sort; use fnv::FnvHasher; #[cfg(target_os = "linux")] -use nix::libc; +use nix::sys::resource::{getrlimit, Resource, RLIM_INFINITY}; use numeric_str_cmp::{NumInfo, NumInfoParseSettings, human_numeric_str_cmp, numeric_str_cmp}; use rand::{Rng, rng}; use rayon::prelude::*; @@ -1074,21 +1074,11 @@ fn make_sort_mode_arg(mode: &'static str, short: char, help: String) -> Arg { #[cfg(target_os = "linux")] pub(crate) fn fd_soft_limit() -> Option { - let mut limit = libc::rlimit { - rlim_cur: 0, - rlim_max: 0, - }; - - let result = unsafe { libc::getrlimit(libc::RLIMIT_NOFILE, &raw mut limit) }; - if result == 0 { - let current = limit.rlim_cur; - if current == libc::RLIM_INFINITY { - None - } else { - usize::try_from(current).ok() - } - } else { + let (soft, _hard) = getrlimit(Resource::RLIMIT_NOFILE).ok()?; + if soft == RLIM_INFINITY { None + } else { + usize::try_from(soft).ok() } } @@ -1239,9 +1229,15 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { settings.threads = matches .get_one::(options::PARALLEL) .map_or_else(|| "0".to_string(), String::from); - unsafe { - env::set_var("RAYON_NUM_THREADS", &settings.threads); - } + let num_threads = match settings.threads.parse::() { + Ok(0) | Err(_) => std::thread::available_parallelism() + .map(|n| n.get()) + .unwrap_or(1), + Ok(n) => n, + }; + let _ = rayon::ThreadPoolBuilder::new() + .num_threads(num_threads) + .build_global(); } if let Some(size_str) = matches.get_one::(options::BUF_SIZE) { From 38749c69865a9892679c540d7b3a602c7a9144ee Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 20 Dec 2025 10:26:13 +0900 Subject: [PATCH 5/6] refactor(linux): improve error handling in fd_soft_limit function Extract the rlimit fetching logic into a separate `get_rlimit` function that returns `UResult` and properly handles errors with `UUsageError`, instead of silently returning `None` on failure or infinity. This provides better error reporting for resource limit issues on Linux platforms. --- src/uu/sort/src/sort.rs | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 7a01b26cd00..eaae00596c5 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -25,8 +25,6 @@ use clap::{Arg, ArgAction, Command}; use custom_str_cmp::custom_str_cmp; use ext_sort::ext_sort; use fnv::FnvHasher; -#[cfg(target_os = "linux")] -use nix::sys::resource::{getrlimit, Resource, RLIM_INFINITY}; use numeric_str_cmp::{NumInfo, NumInfoParseSettings, human_numeric_str_cmp, numeric_str_cmp}; use rand::{Rng, rng}; use rayon::prelude::*; @@ -1073,13 +1071,21 @@ fn make_sort_mode_arg(mode: &'static str, short: char, help: String) -> Arg { } #[cfg(target_os = "linux")] -pub(crate) fn fd_soft_limit() -> Option { - let (soft, _hard) = getrlimit(Resource::RLIMIT_NOFILE).ok()?; - if soft == RLIM_INFINITY { - None - } else { - usize::try_from(soft).ok() +fn get_rlimit() -> UResult { + use nix::sys::resource::{getrlimit, Resource, RLIM_INFINITY}; + + let (rlim_cur, _rlim_max) = getrlimit(Resource::RLIMIT_NOFILE) + .map_err(|_| UUsageError::new(2, translate!("sort-failed-fetch-rlimit")))?; + if rlim_cur == RLIM_INFINITY { + return Err(UUsageError::new(2, translate!("sort-failed-fetch-rlimit"))); } + usize::try_from(rlim_cur) + .map_err(|_| UUsageError::new(2, translate!("sort-failed-fetch-rlimit"))) +} + +#[cfg(target_os = "linux")] +pub(crate) fn fd_soft_limit() -> Option { + get_rlimit().ok() } #[cfg(not(target_os = "linux"))] From 10aea61ac852d2af5c7128bee412f7ec6c00b103 Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 20 Dec 2025 10:27:42 +0900 Subject: [PATCH 6/6] refactor(sort): reorder imports in get_rlimit for consistency Reordered the nix::sys::resource imports to group constants first (RLIM_INFINITY), then types (Resource), and finally functions (getrlimit), improving code readability and adhering to import style guidelines. --- src/uu/sort/src/sort.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index bbfe579e6f4..6122089e2f3 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1073,7 +1073,7 @@ fn make_sort_mode_arg(mode: &'static str, short: char, help: String) -> Arg { #[cfg(target_os = "linux")] fn get_rlimit() -> UResult { - use nix::sys::resource::{getrlimit, Resource, RLIM_INFINITY}; + use nix::sys::resource::{RLIM_INFINITY, Resource, getrlimit}; let (rlim_cur, _rlim_max) = getrlimit(Resource::RLIMIT_NOFILE) .map_err(|_| UUsageError::new(2, translate!("sort-failed-fetch-rlimit")))?;