From 287815f7b4246cfc48bd590112fbbbc324f5435b Mon Sep 17 00:00:00 2001 From: karanabe <152078880+karanabe@users.noreply.github.com> Date: Fri, 28 Nov 2025 08:02:37 +0900 Subject: [PATCH 1/6] sort: add legacy +POS/-POS parsing for GNU compat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support GNU’s obsolescent +POS1 [-POS2] syntax by translating it to -k before clap parses args, gated by _POSIX2_VERSION. Adds tests for accept and reject cases to ensure sort-field-limit GNU test passes. --- src/uu/sort/src/sort.rs | 163 ++++++++++++++++++++++++++++++++++++- tests/by-util/test_sort.rs | 26 ++++++ 2 files changed, 187 insertions(+), 2 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index ec9ab5b9305..b46ae150e97 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -7,7 +7,7 @@ // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html // https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html -// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal hexdigit +// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal hexdigit behaviour keydef mod buffer_hint; mod check; @@ -51,6 +51,7 @@ use uucore::line_ending::LineEnding; use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError}; use uucore::parser::parse_size::{ParseSizeError, Parser}; use uucore::parser::shortcut_value_parser::ShortcutValueParser; +use uucore::posix::{MODERN, TRADITIONAL}; use uucore::show_error; use uucore::translate; use uucore::version_cmp::version_cmp; @@ -1085,6 +1086,160 @@ fn get_rlimit() -> UResult { } const STDIN_FILE: &str = "-"; + +/// Legacy `+POS1 [-POS2]` syntax is permitted unless `_POSIX2_VERSION` is in +/// the [TRADITIONAL, MODERN) range (matches GNU behaviour). +fn allows_traditional_usage() -> bool { + !matches!(uucore::posix::posix_version(), Some(ver) if (TRADITIONAL..MODERN).contains(&ver)) +} + +#[derive(Debug, Clone)] +struct LegacyKeyPart { + field: usize, + char: usize, + opts: String, +} + +fn parse_usize_or_max(num: &str) -> Option { + match num.parse::() { + Ok(v) => Some(v), + Err(e) if *e.kind() == IntErrorKind::PosOverflow => Some(usize::MAX), + Err(_) => None, + } +} + +fn parse_legacy_part(spec: &str, default_char: usize) -> Option { + let mut idx = 0; + let bytes = spec.as_bytes(); + while idx < bytes.len() && bytes[idx].is_ascii_digit() { + idx += 1; + } + if idx == 0 { + return None; + } + + let field = parse_usize_or_max(&spec[..idx])?; + let mut char = default_char; + let mut rest = &spec[idx..]; + + if let Some(stripped) = rest.strip_prefix('.') { + let mut char_idx = 0; + let stripped_bytes = stripped.as_bytes(); + while char_idx < stripped_bytes.len() && stripped_bytes[char_idx].is_ascii_digit() { + char_idx += 1; + } + if char_idx == 0 { + return None; + } + char = parse_usize_or_max(&stripped[..char_idx])?; + rest = &stripped[char_idx..]; + } + + Some(LegacyKeyPart { + field, + char, + opts: rest.to_string(), + }) +} + +/// Convert legacy +POS1 [-POS2] into a `-k` key specification using saturating arithmetic. +fn legacy_key_to_k(from: &LegacyKeyPart, to: Option<&LegacyKeyPart>) -> String { + let start_field = from.field.saturating_add(1); + let start_char = from.char.saturating_add(1); + + let mut keydef = format!( + "{}{}{}", + start_field, + if from.char == 0 { + String::new() + } else { + format!(".{start_char}") + }, + from.opts + ); + + if let Some(to) = to { + let end_field = if to.char == 0 { + // When the end character index is zero, GNU keeps the field number as-is. + // Clamp to 1 to avoid generating an invalid field 0. + to.field.max(1) + } else { + to.field.saturating_add(1) + }; + + let mut end_part = end_field.to_string(); + if to.char != 0 { + end_part.push('.'); + end_part.push_str(&to.char.to_string()); + } + end_part.push_str(&to.opts); + + keydef.push(','); + keydef.push_str(&end_part); + } + + keydef +} + +/// Preprocess argv to handle legacy +POS1 [-POS2] syntax by converting it into -k forms +/// before clap sees the arguments. +fn preprocess_legacy_args(args: I) -> Vec +where + I: IntoIterator, + I::Item: Into, +{ + if !allows_traditional_usage() { + return args.into_iter().map(Into::into).collect(); + } + + let mut processed = Vec::new(); + let mut iter = args.into_iter().map(Into::into).peekable(); + + while let Some(arg) = iter.next() { + if arg == "--" { + processed.push(arg); + processed.extend(iter); + break; + } + + let as_str = arg.to_string_lossy(); + if let Some(from_spec) = as_str.strip_prefix('+') { + if let Some(from) = parse_legacy_part(from_spec, 0) { + let mut to_part = None; + + let next_candidate = iter.peek().map(|next| next.to_string_lossy().to_string()); + + if let Some(next_str) = next_candidate { + if let Some(stripped) = next_str.strip_prefix('-') { + if stripped + .as_bytes() + .first() + .is_some_and(|c| c.is_ascii_digit()) + { + let next_arg = iter.next().unwrap(); + if let Some(parsed) = parse_legacy_part(stripped, 0) { + to_part = Some(parsed); + } else { + processed.push(arg); + processed.push(next_arg); + continue; + } + } + } + } + + let keydef = legacy_key_to_k(&from, to_part.as_ref()); + processed.push(OsString::from(format!("-k{keydef}"))); + continue; + } + } + + processed.push(arg); + } + + processed +} + #[cfg(target_os = "linux")] const LINUX_BATCH_DIVISOR: usize = 4; #[cfg(target_os = "linux")] @@ -1116,7 +1271,11 @@ fn default_merge_batch_size() -> usize { pub fn uumain(args: impl uucore::Args) -> UResult<()> { let mut settings = GlobalSettings::default(); - let matches = uucore::clap_localization::handle_clap_result_with_exit_code(uu_app(), args, 2)?; + let matches = uucore::clap_localization::handle_clap_result_with_exit_code( + uu_app(), + preprocess_legacy_args(args), + 2, + )?; // Prevent -o/--output to be specified multiple times if matches diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 8bce9d69cb4..7b1d5dc9b50 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -107,6 +107,32 @@ fn test_invalid_buffer_size() { } } +#[test] +fn test_legacy_plus_minus_accepts_when_modern_posix2() { + let (at, mut ucmd) = at_and_ucmd!(); + at.write("input.txt", "aa\nbb\n"); + + ucmd.env("_POSIX2_VERSION", "200809") + .args(&["+1", "-1.5"]) + .arg("input.txt") + .succeeds() + .stdout_is("aa\nbb\n"); +} + +#[test] +fn test_legacy_plus_minus_rejected_in_traditional_range() { + let (at, mut ucmd) = at_and_ucmd!(); + at.write("input.txt", "aa\nbb\n"); + + let result = ucmd + .env("_POSIX2_VERSION", "200112") + .args(&["+1", "-1.5"]) + .arg("input.txt") + .fails(); + + result.stderr_contains("unexpected argument '-1'"); +} + #[test] fn test_ext_sort_stable() { new_ucmd!() From 6656199dd37cb3afaa0c26d9e0f009648c1890a1 Mon Sep 17 00:00:00 2001 From: karanabe <152078880+karanabe@users.noreply.github.com> Date: Fri, 28 Nov 2025 22:16:15 +0900 Subject: [PATCH 2/6] sort: align legacy key tests with GNU field limit --- tests/by-util/test_sort.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 7b1d5dc9b50..bac829d1dc0 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -109,11 +109,12 @@ fn test_invalid_buffer_size() { #[test] fn test_legacy_plus_minus_accepts_when_modern_posix2() { + let size_max = usize::MAX; let (at, mut ucmd) = at_and_ucmd!(); at.write("input.txt", "aa\nbb\n"); ucmd.env("_POSIX2_VERSION", "200809") - .args(&["+1", "-1.5"]) + .arg(format!("+0.{size_max}R")) .arg("input.txt") .succeeds() .stdout_is("aa\nbb\n"); @@ -121,16 +122,16 @@ fn test_legacy_plus_minus_accepts_when_modern_posix2() { #[test] fn test_legacy_plus_minus_rejected_in_traditional_range() { + let size_max = usize::MAX; let (at, mut ucmd) = at_and_ucmd!(); at.write("input.txt", "aa\nbb\n"); - let result = ucmd - .env("_POSIX2_VERSION", "200112") - .args(&["+1", "-1.5"]) + ucmd.env("_POSIX2_VERSION", "200809") + .arg("+1") + .arg(format!("-1.{size_max}R")) .arg("input.txt") - .fails(); - - result.stderr_contains("unexpected argument '-1'"); + .succeeds() + .stdout_is("aa\nbb\n"); } #[test] From 335b6e4f3cef05b776d440b30812438750bedd1e Mon Sep 17 00:00:00 2001 From: karanabe <152078880+karanabe@users.noreply.github.com> Date: Tue, 2 Dec 2025 01:54:36 +0900 Subject: [PATCH 3/6] sort: rename legacy max-field test for clarity --- tests/by-util/test_sort.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index bac829d1dc0..26d7f587d1a 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -121,7 +121,7 @@ fn test_legacy_plus_minus_accepts_when_modern_posix2() { } #[test] -fn test_legacy_plus_minus_rejected_in_traditional_range() { +fn test_legacy_plus_minus_accepts_with_size_max() { let size_max = usize::MAX; let (at, mut ucmd) = at_and_ucmd!(); at.write("input.txt", "aa\nbb\n"); From 3b52b5d8ad147bb0cc17f08da4d1de9519512a25 Mon Sep 17 00:00:00 2001 From: karanabe <152078880+karanabe@users.noreply.github.com> Date: Sun, 14 Dec 2025 16:20:05 +0900 Subject: [PATCH 4/6] Simplify legacy key parsing inputs --- src/uu/sort/src/sort.rs | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index b46ae150e97..8e2fb53ab70 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1096,7 +1096,7 @@ fn allows_traditional_usage() -> bool { #[derive(Debug, Clone)] struct LegacyKeyPart { field: usize, - char: usize, + char_pos: usize, opts: String, } @@ -1108,36 +1108,28 @@ fn parse_usize_or_max(num: &str) -> Option { } } -fn parse_legacy_part(spec: &str, default_char: usize) -> Option { - let mut idx = 0; - let bytes = spec.as_bytes(); - while idx < bytes.len() && bytes[idx].is_ascii_digit() { - idx += 1; - } +fn parse_legacy_part(spec: &str) -> Option { + let idx = spec.chars().take_while(|c| c.is_ascii_digit()).count(); if idx == 0 { return None; } let field = parse_usize_or_max(&spec[..idx])?; - let mut char = default_char; + let mut char_pos = 0; let mut rest = &spec[idx..]; if let Some(stripped) = rest.strip_prefix('.') { - let mut char_idx = 0; - let stripped_bytes = stripped.as_bytes(); - while char_idx < stripped_bytes.len() && stripped_bytes[char_idx].is_ascii_digit() { - char_idx += 1; - } + let char_idx = stripped.chars().take_while(|c| c.is_ascii_digit()).count(); if char_idx == 0 { return None; } - char = parse_usize_or_max(&stripped[..char_idx])?; + char_pos = parse_usize_or_max(&stripped[..char_idx])?; rest = &stripped[char_idx..]; } Some(LegacyKeyPart { field, - char, + char_pos, opts: rest.to_string(), }) } @@ -1145,12 +1137,12 @@ fn parse_legacy_part(spec: &str, default_char: usize) -> Option { /// Convert legacy +POS1 [-POS2] into a `-k` key specification using saturating arithmetic. fn legacy_key_to_k(from: &LegacyKeyPart, to: Option<&LegacyKeyPart>) -> String { let start_field = from.field.saturating_add(1); - let start_char = from.char.saturating_add(1); + let start_char = from.char_pos.saturating_add(1); let mut keydef = format!( "{}{}{}", start_field, - if from.char == 0 { + if from.char_pos == 0 { String::new() } else { format!(".{start_char}") @@ -1159,7 +1151,7 @@ fn legacy_key_to_k(from: &LegacyKeyPart, to: Option<&LegacyKeyPart>) -> String { ); if let Some(to) = to { - let end_field = if to.char == 0 { + let end_field = if to.char_pos == 0 { // When the end character index is zero, GNU keeps the field number as-is. // Clamp to 1 to avoid generating an invalid field 0. to.field.max(1) @@ -1168,9 +1160,9 @@ fn legacy_key_to_k(from: &LegacyKeyPart, to: Option<&LegacyKeyPart>) -> String { }; let mut end_part = end_field.to_string(); - if to.char != 0 { + if to.char_pos != 0 { end_part.push('.'); - end_part.push_str(&to.char.to_string()); + end_part.push_str(&to.char_pos.to_string()); } end_part.push_str(&to.opts); @@ -1204,7 +1196,7 @@ where let as_str = arg.to_string_lossy(); if let Some(from_spec) = as_str.strip_prefix('+') { - if let Some(from) = parse_legacy_part(from_spec, 0) { + if let Some(from) = parse_legacy_part(from_spec) { let mut to_part = None; let next_candidate = iter.peek().map(|next| next.to_string_lossy().to_string()); @@ -1217,7 +1209,7 @@ where .is_some_and(|c| c.is_ascii_digit()) { let next_arg = iter.next().unwrap(); - if let Some(parsed) = parse_legacy_part(stripped, 0) { + if let Some(parsed) = parse_legacy_part(stripped) { to_part = Some(parsed); } else { processed.push(arg); From bf7a8ab56f3fc3a4f9a02834ec97938acd705992 Mon Sep 17 00:00:00 2001 From: karanabe <152078880+karanabe@users.noreply.github.com> Date: Sun, 14 Dec 2025 16:20:28 +0900 Subject: [PATCH 5/6] Inline legacy key end serialization --- src/uu/sort/src/sort.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 8e2fb53ab70..85ef238256b 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1159,15 +1159,13 @@ fn legacy_key_to_k(from: &LegacyKeyPart, to: Option<&LegacyKeyPart>) -> String { to.field.saturating_add(1) }; - let mut end_part = end_field.to_string(); + keydef.push(','); + keydef.push_str(&end_field.to_string()); if to.char_pos != 0 { - end_part.push('.'); - end_part.push_str(&to.char_pos.to_string()); + keydef.push('.'); + keydef.push_str(&to.char_pos.to_string()); } - end_part.push_str(&to.opts); - - keydef.push(','); - keydef.push_str(&end_part); + keydef.push_str(&to.opts); } keydef From e477de9e00b3b9e6178a2cbc5b746aa2b75c649c Mon Sep 17 00:00:00 2001 From: karanabe <152078880+karanabe@users.noreply.github.com> Date: Sun, 14 Dec 2025 16:20:47 +0900 Subject: [PATCH 6/6] Use starts_with for legacy arg digit check --- src/uu/sort/src/sort.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 85ef238256b..c25ef48147d 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1201,11 +1201,7 @@ where if let Some(next_str) = next_candidate { if let Some(stripped) = next_str.strip_prefix('-') { - if stripped - .as_bytes() - .first() - .is_some_and(|c| c.is_ascii_digit()) - { + if stripped.starts_with(|c: char| c.is_ascii_digit()) { let next_arg = iter.next().unwrap(); if let Some(parsed) = parse_legacy_part(stripped) { to_part = Some(parsed);