From e55e0c9f364771ff72c84750a40d5e682fcf0a66 Mon Sep 17 00:00:00 2001 From: mattsu Date: Mon, 29 Dec 2025 10:00:19 +0900 Subject: [PATCH 1/7] feat(sort): add warning messages for obsolescent keys and options Added localized warning messages in en-US.ftl for various sort command issues, including obsolescent key formats, ignored options, and locale-related warnings. Implemented LegacyKeyWarning struct and GlobalOptionFlags in sort.rs to detect deprecated key syntax (e.g., +field -field) and suggest modern -k replacements, improving user guidance and compatibility. --- src/uu/sort/locales/en-US.ftl | 16 ++ src/uu/sort/src/sort.rs | 347 +++++++++++++++++++++++++++++++++- 2 files changed, 354 insertions(+), 9 deletions(-) diff --git a/src/uu/sort/locales/en-US.ftl b/src/uu/sort/locales/en-US.ftl index a5c5d01b69b..f571c56313e 100644 --- a/src/uu/sort/locales/en-US.ftl +++ b/src/uu/sort/locales/en-US.ftl @@ -51,6 +51,22 @@ sort-error-write-failed = write failed: {$output} sort-failed-to-delete-temporary-directory = failed to delete temporary directory: {$error} sort-failed-to-set-up-signal-handler = failed to set up signal handler: {$error} +# Warning messages +sort-warning-failed-to-set-locale = failed to set locale +sort-warning-simple-byte-comparison = text ordering performed using simple byte comparison +sort-warning-key-zero-width = key {$key} has zero width and will be ignored +sort-warning-key-numeric-spans-fields = key {$key} is numeric and spans multiple fields +sort-warning-leading-blanks-significant = leading blanks are significant in key {$key}; consider also specifying 'b' +sort-warning-numbers-use-decimal-point = numbers use '.' as a decimal point in this locale +sort-warning-options-ignored = options '-{$options}' are ignored +sort-warning-option-ignored = option '-{$option}' is ignored +sort-warning-option-reverse-last-resort = option '-r' only applies to last-resort comparison +sort-warning-obsolescent-key = obsolescent key '{$key}' used; consider '-k {$replacement}' instead +sort-warning-separator-grouping = field separator '{$sep}' is treated as a group separator in numbers +sort-warning-separator-decimal = field separator '{$sep}' is treated as a decimal point in numbers +sort-warning-separator-minus = field separator '{$sep}' is treated as a minus sign in numbers +sort-warning-separator-plus = field separator '{$sep}' is treated as a plus sign in numbers + # Help messages sort-help-help = Print help information. sort-help-version = Print version information. diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 071163c5aee..8bf02d29a3d 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -21,7 +21,7 @@ mod tmp_dir; use bigdecimal::BigDecimal; use chunks::LineData; use clap::builder::ValueParser; -use clap::{Arg, ArgAction, Command}; +use clap::{Arg, ArgAction, ArgMatches, Command}; use custom_str_cmp::custom_str_cmp; use ext_sort::ext_sort; use fnv::FnvHasher; @@ -1119,6 +1119,83 @@ struct LegacyKeyPart { opts: String, } +#[derive(Debug, Clone)] +struct LegacyKeyWarning { + key_index: usize, + from_field: usize, + to_field: Option, + to_char: Option, +} + +impl LegacyKeyWarning { + fn legacy_key_display(&self) -> String { + match self.to_field { + Some(to) => format!("+{} -{}", self.from_field, to), + None => format!("+{}", self.from_field), + } + } + + fn replacement_key_display(&self) -> String { + let start_field = self.from_field.saturating_add(1); + match self.to_field { + Some(to_field) => { + let end_field = match self.to_char { + Some(0) | None => to_field.max(1), + Some(_) => to_field.saturating_add(1), + }; + format!("{start_field},{end_field}") + } + None => start_field.to_string(), + } + } +} + +#[derive(Default)] +struct GlobalOptionFlags { + keys_specified: bool, + ignore_leading_blanks: bool, + dictionary_order: bool, + ignore_case: bool, + ignore_non_printing: bool, + reverse: bool, + stable: bool, + unique: bool, + mode_numeric: bool, + mode_general: bool, + mode_human: bool, + mode_month: bool, + mode_random: bool, + mode_version: bool, +} + +impl GlobalOptionFlags { + fn from_matches(matches: &ArgMatches) -> Self { + let sort_value = matches + .get_one::(options::modes::SORT) + .map(|s| s.as_str()); + Self { + keys_specified: matches.contains_id(options::KEY), + ignore_leading_blanks: matches.get_flag(options::IGNORE_LEADING_BLANKS), + dictionary_order: matches.get_flag(options::DICTIONARY_ORDER), + ignore_case: matches.get_flag(options::IGNORE_CASE), + ignore_non_printing: matches.get_flag(options::IGNORE_NONPRINTING), + reverse: matches.get_flag(options::REVERSE), + stable: matches.get_flag(options::STABLE), + unique: matches.get_flag(options::UNIQUE), + mode_human: matches.get_flag(options::modes::HUMAN_NUMERIC) + || sort_value == Some("human-numeric"), + mode_month: matches.get_flag(options::modes::MONTH) || sort_value == Some("month"), + mode_general: matches.get_flag(options::modes::GENERAL_NUMERIC) + || sort_value == Some("general-numeric"), + mode_numeric: matches.get_flag(options::modes::NUMERIC) + || sort_value == Some("numeric"), + mode_version: matches.get_flag(options::modes::VERSION) + || sort_value == Some("version"), + mode_random: matches.get_flag(options::modes::RANDOM) || sort_value == Some("random"), + } + } +} + fn parse_usize_or_max(num: &str) -> Option { match num.parse::() { Ok(v) => Some(v), @@ -1192,16 +1269,18 @@ fn legacy_key_to_k(from: &LegacyKeyPart, to: Option<&LegacyKeyPart>) -> String { /// Preprocess argv to handle legacy +POS1 [-POS2] syntax by converting it into -k forms /// before clap sees the arguments. -fn preprocess_legacy_args(args: I) -> Vec +fn preprocess_legacy_args(args: I) -> (Vec, Vec) where I: IntoIterator, I::Item: Into, { if !allows_traditional_usage() { - return args.into_iter().map(Into::into).collect(); + return (args.into_iter().map(Into::into).collect(), Vec::new()); } let mut processed = Vec::new(); + let mut legacy_warnings = Vec::new(); + let mut key_index = 0usize; let mut iter = args.into_iter().map(Into::into).peekable(); while let Some(arg) = iter.next() { @@ -1234,15 +1313,47 @@ where } let keydef = legacy_key_to_k(&from, to_part.as_ref()); + key_index = key_index.saturating_add(1); + legacy_warnings.push(LegacyKeyWarning { + key_index, + from_field: from.field, + to_field: to_part.as_ref().map(|p| p.field), + to_char: to_part.as_ref().map(|p| p.char_pos), + }); processed.push(OsString::from(format!("-k{keydef}"))); continue; } } + if as_str == "-k" || as_str == "--key" { + processed.push(arg); + if let Some(next_arg) = iter.next() { + processed.push(next_arg); + } + key_index = key_index.saturating_add(1); + continue; + } + + if let Some(spec) = as_str.strip_prefix("-k") { + if !spec.is_empty() { + processed.push(arg); + key_index = key_index.saturating_add(1); + continue; + } + } + + if let Some(spec) = as_str.strip_prefix("--key=") { + if !spec.is_empty() { + processed.push(arg); + key_index = key_index.saturating_add(1); + continue; + } + } + processed.push(arg); } - processed + (processed, legacy_warnings) } #[cfg(target_os = "linux")] @@ -1271,16 +1382,230 @@ fn default_merge_batch_size() -> usize { } } +fn locale_failed_to_set() -> bool { + matches!(env::var("LC_ALL").ok().as_deref(), Some("missing")) +} + +fn key_zero_width(selector: &FieldSelector) -> bool { + let Some(to) = &selector.to else { + return false; + }; + if to.field < selector.from.field { + return true; + } + if to.field == selector.from.field { + return to.char != 0 && to.char < selector.from.char; + } + false +} + +fn key_spans_multiple_fields(selector: &FieldSelector) -> bool { + if !matches!( + selector.settings.mode, + SortMode::Numeric | SortMode::HumanNumeric | SortMode::GeneralNumeric + ) { + return false; + } + match &selector.to { + None => true, + Some(to) => to.field > selector.from.field, + } +} + +fn key_leading_blanks_significant(selector: &FieldSelector) -> bool { + selector.settings.mode == SortMode::Default + && !selector.from.ignore_blanks + && !selector.settings.ignore_blanks +} + +fn emit_debug_warnings( + settings: &GlobalSettings, + flags: &GlobalOptionFlags, + legacy_warnings: &[LegacyKeyWarning], +) { + if locale_failed_to_set() { + show_error!("{}", translate!("sort-warning-failed-to-set-locale")); + } + + show_error!("{}", translate!("sort-warning-simple-byte-comparison")); + + for (idx, selector) in settings.selectors.iter().enumerate() { + let key_index = idx + 1; + if let Some(legacy) = legacy_warnings + .iter() + .find(|warning| warning.key_index == key_index) + { + show_error!( + "{}", + translate!( + "sort-warning-obsolescent-key", + "key" => legacy.legacy_key_display(), + "replacement" => legacy.replacement_key_display() + ) + ); + } + + if key_zero_width(selector) { + show_error!( + "{}", + translate!("sort-warning-key-zero-width", "key" => key_index) + ); + continue; + } + + if flags.keys_specified && key_spans_multiple_fields(selector) { + show_error!( + "{}", + translate!( + "sort-warning-key-numeric-spans-fields", + "key" => key_index + ) + ); + } else if flags.keys_specified && key_leading_blanks_significant(selector) { + show_error!( + "{}", + translate!( + "sort-warning-leading-blanks-significant", + "key" => key_index + ) + ); + } + } + + let numeric_used = settings.selectors.iter().any(|selector| { + matches!( + selector.settings.mode, + SortMode::Numeric | SortMode::HumanNumeric | SortMode::GeneralNumeric + ) + }); + + let mut suppress_decimal_warning = false; + if numeric_used { + if let Some(sep) = settings.separator { + match sep { + b'.' => { + show_error!( + "{}", + translate!("sort-warning-separator-decimal", "sep" => ".") + ); + suppress_decimal_warning = true; + } + b'-' => { + show_error!( + "{}", + translate!("sort-warning-separator-minus", "sep" => "-") + ); + } + b'+' => { + show_error!( + "{}", + translate!("sort-warning-separator-plus", "sep" => "+") + ); + } + _ => {} + } + } + + if !suppress_decimal_warning { + show_error!("{}", translate!("sort-warning-numbers-use-decimal-point")); + } + } + + let uses_reverse = settings + .selectors + .iter() + .any(|selector| selector.settings.reverse); + let uses_blanks = settings + .selectors + .iter() + .any(|selector| selector.settings.ignore_blanks || selector.from.ignore_blanks); + let uses_dictionary = settings + .selectors + .iter() + .any(|selector| selector.settings.dictionary_order); + let uses_case = settings + .selectors + .iter() + .any(|selector| selector.settings.ignore_case); + let uses_non_printing = settings + .selectors + .iter() + .any(|selector| selector.settings.ignore_non_printing); + + let uses_mode = |mode| { + settings + .selectors + .iter() + .any(|selector| selector.settings.mode == mode) + }; + + let reverse_unused = flags.reverse && !uses_reverse; + let last_resort_active = + settings.mode != SortMode::Random && !settings.stable && !settings.unique; + let reverse_ignored = reverse_unused && !last_resort_active; + let reverse_last_resort_warning = reverse_unused && last_resort_active; + + let mut ignored_opts = String::new(); + if flags.ignore_leading_blanks && !uses_blanks { + ignored_opts.push('b'); + } + if flags.dictionary_order && !uses_dictionary { + ignored_opts.push('d'); + } + if flags.ignore_case && !uses_case { + ignored_opts.push('f'); + } + if flags.ignore_non_printing && !uses_non_printing { + ignored_opts.push('i'); + } + if flags.mode_general && !uses_mode(SortMode::GeneralNumeric) { + ignored_opts.push('g'); + } + if flags.mode_human && !uses_mode(SortMode::HumanNumeric) { + ignored_opts.push('h'); + } + if flags.mode_month && !uses_mode(SortMode::Month) { + ignored_opts.push('M'); + } + if flags.mode_numeric && !uses_mode(SortMode::Numeric) { + ignored_opts.push('n'); + } + if flags.mode_random && !uses_mode(SortMode::Random) { + ignored_opts.push('R'); + } + if reverse_ignored { + ignored_opts.push('r'); + } + if flags.mode_version && !uses_mode(SortMode::Version) { + ignored_opts.push('V'); + } + + if ignored_opts.len() == 1 { + show_error!( + "{}", + translate!("sort-warning-option-ignored", "option" => ignored_opts) + ); + } else if ignored_opts.len() > 1 { + show_error!( + "{}", + translate!("sort-warning-options-ignored", "options" => ignored_opts) + ); + } + + if reverse_last_resort_warning { + show_error!("{}", translate!("sort-warning-option-reverse-last-resort")); + } +} + #[uucore::main] #[allow(clippy::cognitive_complexity)] pub fn uumain(args: impl uucore::Args) -> UResult<()> { let mut settings = GlobalSettings::default(); - let matches = uucore::clap_localization::handle_clap_result_with_exit_code( - uu_app(), - preprocess_legacy_args(args), - 2, - )?; + let (processed_args, legacy_warnings) = preprocess_legacy_args(args); + let matches = + uucore::clap_localization::handle_clap_result_with_exit_code(uu_app(), processed_args, 2)?; + let global_flags = GlobalOptionFlags::from_matches(&matches); // Prevent -o/--output to be specified multiple times if matches @@ -1569,6 +1894,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let output = Output::new(matches.get_one::(options::OUTPUT))?; + if settings.debug { + emit_debug_warnings(&settings, &global_flags, &legacy_warnings); + } + settings.init_precomputed(); let result = exec(&mut files, &settings, output, &mut tmp_dir); From 8c71a20bf5dba9fb0a7f01a290a59c62cc401d16 Mon Sep 17 00:00:00 2001 From: mattsu Date: Mon, 29 Dec 2025 12:03:42 +0900 Subject: [PATCH 2/7] refactor(sort): remove stable and unique flags from GlobalOptionFlags Removes the unused `stable` and `unique` boolean fields from the GlobalOptionFlags struct and their initialization in the impl block, simplifying the code by eliminating redundant options. --- src/uu/sort/src/sort.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 8bf02d29a3d..e30e014d26b 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1158,8 +1158,6 @@ struct GlobalOptionFlags { ignore_case: bool, ignore_non_printing: bool, reverse: bool, - stable: bool, - unique: bool, mode_numeric: bool, mode_general: bool, mode_human: bool, @@ -1180,8 +1178,6 @@ impl GlobalOptionFlags { ignore_case: matches.get_flag(options::IGNORE_CASE), ignore_non_printing: matches.get_flag(options::IGNORE_NONPRINTING), reverse: matches.get_flag(options::REVERSE), - stable: matches.get_flag(options::STABLE), - unique: matches.get_flag(options::UNIQUE), mode_human: matches.get_flag(options::modes::HUMAN_NUMERIC) || sort_value == Some("human-numeric"), mode_month: matches.get_flag(options::modes::MONTH) || sort_value == Some("month"), From 6d22316b591df43a88ada46434e8e0045b812634 Mon Sep 17 00:00:00 2001 From: mattsu Date: Mon, 29 Dec 2025 13:47:14 +0900 Subject: [PATCH 3/7] feat(sort): allow multiple sort modes by removing mutual conflicts Previously, sort mode flags were mutually exclusive, preventing users from specifying more than one mode. This change removes the conflicts to enable combining sort options for more flexible sorting behavior. --- src/uu/sort/src/sort.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index e30e014d26b..9d59b61a856 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1067,18 +1067,13 @@ impl FieldSelector { } } -/// Creates an `Arg` that conflicts with all other sort modes. +/// Creates an `Arg` for a sort mode flag. fn make_sort_mode_arg(mode: &'static str, short: char, help: String) -> Arg { Arg::new(mode) .short(short) .long(mode) .help(help) .action(ArgAction::SetTrue) - .conflicts_with_all( - options::modes::ALL_SORT_MODES - .iter() - .filter(|&&m| m != mode), - ) } #[cfg(target_os = "linux")] From c5cfd4592deb942e071afa42e03898ca3993aa8c Mon Sep 17 00:00:00 2001 From: mattsu Date: Wed, 31 Dec 2025 15:37:19 +0900 Subject: [PATCH 4/7] refactor(sort): separate arg_index and key_index in legacy key warnings - Update LegacyKeyWarning struct to include arg_index and make key_index optional - Modify preprocess_legacy_args to set arg_index instead of key_index during parsing - Add index_legacy_warnings function to compute key_index after arg processing - Adjust emit_debug_warnings to match updated key_index type - This ensures accurate indexing for legacy key warnings in sort utility --- src/uu/sort/src/sort.rs | 89 ++++++++++++++++++++++++++++------------- 1 file changed, 62 insertions(+), 27 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 9d59b61a856..a8346821e9e 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1116,7 +1116,8 @@ struct LegacyKeyPart { #[derive(Debug, Clone)] struct LegacyKeyWarning { - key_index: usize, + arg_index: usize, + key_index: Option, from_field: usize, to_field: Option, to_char: Option, @@ -1271,7 +1272,6 @@ where let mut processed = Vec::new(); let mut legacy_warnings = Vec::new(); - let mut key_index = 0usize; let mut iter = args.into_iter().map(Into::into).peekable(); while let Some(arg) = iter.next() { @@ -1304,9 +1304,10 @@ where } let keydef = legacy_key_to_k(&from, to_part.as_ref()); - key_index = key_index.saturating_add(1); + let arg_index = processed.len(); legacy_warnings.push(LegacyKeyWarning { - key_index, + arg_index, + key_index: None, from_field: from.field, to_field: to_part.as_ref().map(|p| p.field), to_char: to_part.as_ref().map(|p| p.char_pos), @@ -1316,35 +1317,61 @@ where } } - if as_str == "-k" || as_str == "--key" { - processed.push(arg); - if let Some(next_arg) = iter.next() { - processed.push(next_arg); - } - key_index = key_index.saturating_add(1); - continue; + processed.push(arg); + } + + (processed, legacy_warnings) +} + +fn index_legacy_warnings(processed_args: &[OsString], legacy_warnings: &mut [LegacyKeyWarning]) { + if legacy_warnings.is_empty() { + return; + } + + let mut index_by_arg = std::collections::HashMap::new(); + for (warning_idx, warning) in legacy_warnings.iter().enumerate() { + index_by_arg.insert(warning.arg_index, warning_idx); + } + + let mut key_index = 0usize; + let mut i = 0usize; + while i < processed_args.len() { + let arg = &processed_args[i]; + if arg == OsStr::new("--") { + break; } - if let Some(spec) = as_str.strip_prefix("-k") { - if !spec.is_empty() { - processed.push(arg); + let mut matched_key = false; + if arg == OsStr::new("-k") || arg == OsStr::new("--key") { + if i + 1 < processed_args.len() { key_index = key_index.saturating_add(1); - continue; + matched_key = true; + i += 2; + } else { + i += 1; + } + } else { + let as_str = arg.to_string_lossy(); + if let Some(spec) = as_str.strip_prefix("-k") { + if !spec.is_empty() { + key_index = key_index.saturating_add(1); + matched_key = true; + } + } else if let Some(spec) = as_str.strip_prefix("--key=") { + if !spec.is_empty() { + key_index = key_index.saturating_add(1); + matched_key = true; + } } + i += 1; } - if let Some(spec) = as_str.strip_prefix("--key=") { - if !spec.is_empty() { - processed.push(arg); - key_index = key_index.saturating_add(1); - continue; + if matched_key { + if let Some(&warning_idx) = index_by_arg.get(&i.saturating_sub(1)) { + legacy_warnings[warning_idx].key_index = Some(key_index); } } - - processed.push(arg); } - - (processed, legacy_warnings) } #[cfg(target_os = "linux")] @@ -1424,7 +1451,7 @@ fn emit_debug_warnings( let key_index = idx + 1; if let Some(legacy) = legacy_warnings .iter() - .find(|warning| warning.key_index == key_index) + .find(|warning| warning.key_index == Some(key_index)) { show_error!( "{}", @@ -1593,10 +1620,14 @@ fn emit_debug_warnings( pub fn uumain(args: impl uucore::Args) -> UResult<()> { let mut settings = GlobalSettings::default(); - let (processed_args, legacy_warnings) = preprocess_legacy_args(args); + let (processed_args, mut legacy_warnings) = preprocess_legacy_args(args); + let processed_args_for_debug = if legacy_warnings.is_empty() { + None + } else { + Some(processed_args.clone()) + }; let matches = uucore::clap_localization::handle_clap_result_with_exit_code(uu_app(), processed_args, 2)?; - let global_flags = GlobalOptionFlags::from_matches(&matches); // Prevent -o/--output to be specified multiple times if matches @@ -1886,6 +1917,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let output = Output::new(matches.get_one::(options::OUTPUT))?; if settings.debug { + if let Some(ref processed) = processed_args_for_debug { + index_legacy_warnings(processed, &mut legacy_warnings); + } + let global_flags = GlobalOptionFlags::from_matches(&matches); emit_debug_warnings(&settings, &global_flags, &legacy_warnings); } From 804018126d4dd86d1ff56e968716e67b23274aba Mon Sep 17 00:00:00 2001 From: mattsu Date: Wed, 31 Dec 2025 16:01:57 +0900 Subject: [PATCH 5/7] feat(sort): add legacy sort key detection for '+' prefixed arguments - Introduce `starts_with_plus` function to check for arguments starting with '+' in a platform-specific manner - Modify `parse_sort_arguments` to detect and process legacy sort keys, enabling proper handling of deprecated options with warnings - This ensures backward compatibility for users relying on old sort syntax while guiding migration to modern flags --- src/uu/sort/src/sort.rs | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index a8346821e9e..e2e00a1d60c 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -36,6 +36,8 @@ use std::hash::{Hash, Hasher}; use std::io::{BufRead, BufReader, BufWriter, Read, Write, stdin, stdout}; use std::num::IntErrorKind; use std::ops::Range; +#[cfg(unix)] +use std::os::unix::ffi::OsStrExt; use std::path::Path; use std::path::PathBuf; use std::str::Utf8Error; @@ -1270,9 +1272,22 @@ where return (args.into_iter().map(Into::into).collect(), Vec::new()); } + let mut args_vec: Vec = Vec::new(); + let mut has_plus = false; + for arg in args { + let os_arg: OsString = arg.into(); + if !has_plus && starts_with_plus(&os_arg) { + has_plus = true; + } + args_vec.push(os_arg); + } + if !has_plus { + return (args_vec, Vec::new()); + } + let mut processed = Vec::new(); let mut legacy_warnings = Vec::new(); - let mut iter = args.into_iter().map(Into::into).peekable(); + let mut iter = args_vec.into_iter().peekable(); while let Some(arg) = iter.next() { if arg == "--" { @@ -1323,6 +1338,17 @@ where (processed, legacy_warnings) } +fn starts_with_plus(arg: &OsStr) -> bool { + #[cfg(unix)] + { + arg.as_bytes().first() == Some(&b'+') + } + #[cfg(not(unix))] + { + arg.to_string_lossy().starts_with('+') + } +} + fn index_legacy_warnings(processed_args: &[OsString], legacy_warnings: &mut [LegacyKeyWarning]) { if legacy_warnings.is_empty() { return; From 1ffc81380b24ff553f9481743c3f2b635ac0abb8 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 1 Jan 2026 16:35:24 +0900 Subject: [PATCH 6/7] refactor(sort): simplify legacy args preprocessing Remove intermediate vector creation in preprocess_legacy_args and streamline the handling of legacy '+' prefixed arguments to improve efficiency and readability without altering functionality. Additionally, refactor uumain to directly index legacy warnings when present. --- src/uu/sort/src/sort.rs | 86 +++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 51 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index e2e00a1d60c..eb2fa0ff5da 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1272,22 +1272,9 @@ where return (args.into_iter().map(Into::into).collect(), Vec::new()); } - let mut args_vec: Vec = Vec::new(); - let mut has_plus = false; - for arg in args { - let os_arg: OsString = arg.into(); - if !has_plus && starts_with_plus(&os_arg) { - has_plus = true; - } - args_vec.push(os_arg); - } - if !has_plus { - return (args_vec, Vec::new()); - } - let mut processed = Vec::new(); let mut legacy_warnings = Vec::new(); - let mut iter = args_vec.into_iter().peekable(); + let mut iter = args.into_iter().map(Into::into).peekable(); while let Some(arg) = iter.next() { if arg == "--" { @@ -1296,39 +1283,41 @@ where break; } - let as_str = arg.to_string_lossy(); - if let Some(from_spec) = as_str.strip_prefix('+') { - if let Some(from) = parse_legacy_part(from_spec) { - let mut to_part = None; - - let next_candidate = iter.peek().map(|next| next.to_string_lossy().to_string()); - - if let Some(next_str) = next_candidate { - if let Some(stripped) = next_str.strip_prefix('-') { - if stripped.starts_with(|c: char| c.is_ascii_digit()) { - let next_arg = iter.next().unwrap(); - if let Some(parsed) = parse_legacy_part(stripped) { - to_part = Some(parsed); - } else { - processed.push(arg); - processed.push(next_arg); - continue; + if starts_with_plus(&arg) { + let as_str = arg.to_string_lossy(); + if let Some(from_spec) = as_str.strip_prefix('+') { + if let Some(from) = parse_legacy_part(from_spec) { + let mut to_part = None; + + let next_candidate = iter.peek().map(|next| next.to_string_lossy().to_string()); + + if let Some(next_str) = next_candidate { + if let Some(stripped) = next_str.strip_prefix('-') { + if stripped.starts_with(|c: char| c.is_ascii_digit()) { + let next_arg = iter.next().unwrap(); + if let Some(parsed) = parse_legacy_part(stripped) { + to_part = Some(parsed); + } else { + processed.push(arg); + processed.push(next_arg); + continue; + } } } } - } - let keydef = legacy_key_to_k(&from, to_part.as_ref()); - let arg_index = processed.len(); - legacy_warnings.push(LegacyKeyWarning { - arg_index, - key_index: None, - from_field: from.field, - to_field: to_part.as_ref().map(|p| p.field), - to_char: to_part.as_ref().map(|p| p.char_pos), - }); - processed.push(OsString::from(format!("-k{keydef}"))); - continue; + let keydef = legacy_key_to_k(&from, to_part.as_ref()); + let arg_index = processed.len(); + legacy_warnings.push(LegacyKeyWarning { + arg_index, + key_index: None, + from_field: from.field, + to_field: to_part.as_ref().map(|p| p.field), + to_char: to_part.as_ref().map(|p| p.char_pos), + }); + processed.push(OsString::from(format!("-k{keydef}"))); + continue; + } } } @@ -1647,11 +1636,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let mut settings = GlobalSettings::default(); let (processed_args, mut legacy_warnings) = preprocess_legacy_args(args); - let processed_args_for_debug = if legacy_warnings.is_empty() { - None - } else { - Some(processed_args.clone()) - }; + if !legacy_warnings.is_empty() { + index_legacy_warnings(&processed_args, &mut legacy_warnings); + } let matches = uucore::clap_localization::handle_clap_result_with_exit_code(uu_app(), processed_args, 2)?; @@ -1943,9 +1930,6 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let output = Output::new(matches.get_one::(options::OUTPUT))?; if settings.debug { - if let Some(ref processed) = processed_args_for_debug { - index_legacy_warnings(processed, &mut legacy_warnings); - } let global_flags = GlobalOptionFlags::from_matches(&matches); emit_debug_warnings(&settings, &global_flags, &legacy_warnings); } From 87b21013224ae387213e16944b88ca8d07e9b613 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 1 Jan 2026 17:30:57 +0900 Subject: [PATCH 7/7] refactor(sort/benches): optimize locale UTF8 benchmarks by predefining args Move output file creation and argument setup outside benchmark loops in sort_locale_utf8_bench.rs to avoid measuring initialization time in each iteration, ensuring accurate performance measurements for sorting operations. --- src/uu/sort/benches/sort_locale_utf8_bench.rs | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/uu/sort/benches/sort_locale_utf8_bench.rs b/src/uu/sort/benches/sort_locale_utf8_bench.rs index b0ebb340d99..6f61dc322d0 100644 --- a/src/uu/sort/benches/sort_locale_utf8_bench.rs +++ b/src/uu/sort/benches/sort_locale_utf8_bench.rs @@ -21,11 +21,10 @@ fn sort_ascii_utf8_locale(bencher: Bencher) { let output_file = NamedTempFile::new().unwrap(); let output_path = output_file.path().to_str().unwrap().to_string(); + let args = ["-o", &output_path, file_path.to_str().unwrap()]; + black_box(run_util_function(uumain, &args)); bencher.bench(|| { - black_box(run_util_function( - uumain, - &["-o", &output_path, file_path.to_str().unwrap()], - )); + black_box(run_util_function(uumain, &args)); }); } @@ -37,11 +36,10 @@ fn sort_mixed_utf8_locale(bencher: Bencher) { let output_file = NamedTempFile::new().unwrap(); let output_path = output_file.path().to_str().unwrap().to_string(); + let args = ["-o", &output_path, file_path.to_str().unwrap()]; + black_box(run_util_function(uumain, &args)); bencher.bench(|| { - black_box(run_util_function( - uumain, - &["-o", &output_path, file_path.to_str().unwrap()], - )); + black_box(run_util_function(uumain, &args)); }); } @@ -54,12 +52,13 @@ fn sort_numeric_utf8_locale(bencher: Bencher) { data.extend_from_slice(line.as_bytes()); } let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap().to_string(); + let args = ["-n", "-o", &output_path, file_path.to_str().unwrap()]; + black_box(run_util_function(uumain, &args)); bencher.bench(|| { - black_box(run_util_function( - uumain, - &["-n", file_path.to_str().unwrap()], - )); + black_box(run_util_function(uumain, &args)); }); } @@ -68,12 +67,13 @@ fn sort_numeric_utf8_locale(bencher: Bencher) { fn sort_reverse_utf8_locale(bencher: Bencher) { let data = text_data::generate_mixed_locale_data(50_000); let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap().to_string(); + let args = ["-r", "-o", &output_path, file_path.to_str().unwrap()]; + black_box(run_util_function(uumain, &args)); bencher.bench(|| { - black_box(run_util_function( - uumain, - &["-r", file_path.to_str().unwrap()], - )); + black_box(run_util_function(uumain, &args)); }); } @@ -82,12 +82,13 @@ fn sort_reverse_utf8_locale(bencher: Bencher) { fn sort_unique_utf8_locale(bencher: Bencher) { let data = text_data::generate_mixed_locale_data(50_000); let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap().to_string(); + let args = ["-u", "-o", &output_path, file_path.to_str().unwrap()]; + black_box(run_util_function(uumain, &args)); bencher.bench(|| { - black_box(run_util_function( - uumain, - &["-u", file_path.to_str().unwrap()], - )); + black_box(run_util_function(uumain, &args)); }); }