Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 147 additions & 2 deletions src/uu/sort/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html
// https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html

// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal hexdigit
// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal hexdigit behaviour keydef

mod buffer_hint;
mod check;
Expand Down Expand Up @@ -51,6 +51,7 @@ use uucore::line_ending::LineEnding;
use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError};
use uucore::parser::parse_size::{ParseSizeError, Parser};
use uucore::parser::shortcut_value_parser::ShortcutValueParser;
use uucore::posix::{MODERN, TRADITIONAL};
use uucore::show_error;
use uucore::translate;
use uucore::version_cmp::version_cmp;
Expand Down Expand Up @@ -1085,6 +1086,146 @@ fn get_rlimit() -> UResult<usize> {
}

const STDIN_FILE: &str = "-";

/// Legacy `+POS1 [-POS2]` syntax is permitted unless `_POSIX2_VERSION` is in
/// the [TRADITIONAL, MODERN) range (matches GNU behaviour).
fn allows_traditional_usage() -> bool {
!matches!(uucore::posix::posix_version(), Some(ver) if (TRADITIONAL..MODERN).contains(&ver))
}

#[derive(Debug, Clone)]
struct LegacyKeyPart {
field: usize,
char_pos: usize,
opts: String,
}

fn parse_usize_or_max(num: &str) -> Option<usize> {
match num.parse::<usize>() {
Ok(v) => Some(v),
Err(e) if *e.kind() == IntErrorKind::PosOverflow => Some(usize::MAX),
Err(_) => None,
}
}

fn parse_legacy_part(spec: &str) -> Option<LegacyKeyPart> {
let idx = spec.chars().take_while(|c| c.is_ascii_digit()).count();
if idx == 0 {
return None;
}

let field = parse_usize_or_max(&spec[..idx])?;
let mut char_pos = 0;
let mut rest = &spec[idx..];

if let Some(stripped) = rest.strip_prefix('.') {
let char_idx = stripped.chars().take_while(|c| c.is_ascii_digit()).count();
if char_idx == 0 {
return None;
}
char_pos = parse_usize_or_max(&stripped[..char_idx])?;
rest = &stripped[char_idx..];
}

Some(LegacyKeyPart {
field,
char_pos,
opts: rest.to_string(),
})
}

/// Convert legacy +POS1 [-POS2] into a `-k` key specification using saturating arithmetic.
fn legacy_key_to_k(from: &LegacyKeyPart, to: Option<&LegacyKeyPart>) -> String {
let start_field = from.field.saturating_add(1);
let start_char = from.char_pos.saturating_add(1);

let mut keydef = format!(
"{}{}{}",
start_field,
if from.char_pos == 0 {
String::new()
} else {
format!(".{start_char}")
},
from.opts
);

if let Some(to) = to {
let end_field = if to.char_pos == 0 {
// When the end character index is zero, GNU keeps the field number as-is.
// Clamp to 1 to avoid generating an invalid field 0.
to.field.max(1)
} else {
to.field.saturating_add(1)
};

keydef.push(',');
keydef.push_str(&end_field.to_string());
if to.char_pos != 0 {
keydef.push('.');
keydef.push_str(&to.char_pos.to_string());
}
keydef.push_str(&to.opts);
}

keydef
}

/// Preprocess argv to handle legacy +POS1 [-POS2] syntax by converting it into -k forms
/// before clap sees the arguments.
fn preprocess_legacy_args<I>(args: I) -> Vec<OsString>
where
I: IntoIterator,
I::Item: Into<OsString>,
{
if !allows_traditional_usage() {
return args.into_iter().map(Into::into).collect();
}

let mut processed = Vec::new();
let mut iter = args.into_iter().map(Into::into).peekable();

while let Some(arg) = iter.next() {
if arg == "--" {
processed.push(arg);
processed.extend(iter);
break;
}

let as_str = arg.to_string_lossy();
if let Some(from_spec) = as_str.strip_prefix('+') {
if let Some(from) = parse_legacy_part(from_spec) {
let mut to_part = None;

let next_candidate = iter.peek().map(|next| next.to_string_lossy().to_string());

if let Some(next_str) = next_candidate {
if let Some(stripped) = next_str.strip_prefix('-') {
if stripped.starts_with(|c: char| c.is_ascii_digit()) {
let next_arg = iter.next().unwrap();
if let Some(parsed) = parse_legacy_part(stripped) {
to_part = Some(parsed);
} else {
processed.push(arg);
processed.push(next_arg);
continue;
}
}
}
}

let keydef = legacy_key_to_k(&from, to_part.as_ref());
processed.push(OsString::from(format!("-k{keydef}")));
continue;
}
}

processed.push(arg);
}

processed
}

#[cfg(target_os = "linux")]
const LINUX_BATCH_DIVISOR: usize = 4;
#[cfg(target_os = "linux")]
Expand Down Expand Up @@ -1116,7 +1257,11 @@ fn default_merge_batch_size() -> usize {
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let mut settings = GlobalSettings::default();

let matches = uucore::clap_localization::handle_clap_result_with_exit_code(uu_app(), args, 2)?;
let matches = uucore::clap_localization::handle_clap_result_with_exit_code(
uu_app(),
preprocess_legacy_args(args),
2,
)?;

// Prevent -o/--output to be specified multiple times
if matches
Expand Down
27 changes: 27 additions & 0 deletions tests/by-util/test_sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,33 @@ fn test_invalid_buffer_size() {
}
}

#[test]
fn test_legacy_plus_minus_accepts_when_modern_posix2() {
let size_max = usize::MAX;
let (at, mut ucmd) = at_and_ucmd!();
at.write("input.txt", "aa\nbb\n");

ucmd.env("_POSIX2_VERSION", "200809")
.arg(format!("+0.{size_max}R"))
.arg("input.txt")
.succeeds()
.stdout_is("aa\nbb\n");
}

#[test]
fn test_legacy_plus_minus_accepts_with_size_max() {
let size_max = usize::MAX;
let (at, mut ucmd) = at_and_ucmd!();
at.write("input.txt", "aa\nbb\n");

ucmd.env("_POSIX2_VERSION", "200809")
.arg("+1")
.arg(format!("-1.{size_max}R"))
.arg("input.txt")
.succeeds()
.stdout_is("aa\nbb\n");
}

#[test]
fn test_ext_sort_stable() {
new_ucmd!()
Expand Down
Loading