diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index fd3fdab4df7..df362fd0258 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -502,6 +502,17 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" +[[package]] +name = "fixed_decimal" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35eabf480f94d69182677e37571d3be065822acfafd12f2f085db44fbbcc8e57" +dependencies = [ + "displaydoc", + "smallvec", + "writeable", +] + [[package]] name = "flate2" version = "1.1.5" @@ -676,6 +687,27 @@ dependencies = [ "zerovec", ] +[[package]] +name = "icu_decimal" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a38c52231bc348f9b982c1868a2af3195199623007ba2c7650f432038f5b3e8e" +dependencies = [ + "fixed_decimal", + "icu_decimal_data", + "icu_locale", + "icu_locale_core", + "icu_provider", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_decimal_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2905b4044eab2dd848fe84199f9195567b63ab3a93094711501363f63546fef7" + [[package]] name = "icu_locale" version = "2.1.1" @@ -1731,7 +1763,9 @@ dependencies = [ "glob", "hex", "icu_collator", + "icu_decimal", "icu_locale", + "icu_provider", "itertools", "libc", "md-5", diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index 8a9570eaa30..4763755160e 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -34,7 +34,12 @@ self_cell = { workspace = true } tempfile = { workspace = true } thiserror = { workspace = true } unicode-width = { workspace = true } -uucore = { workspace = true, features = ["fs", "parser-size", "version-cmp"] } +uucore = { workspace = true, features = [ + "fs", + "parser-size", + "version-cmp", + "i18n-decimal", +] } fluent = { workspace = true } [target.'cfg(unix)'.dependencies] diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 071163c5aee..a3b63c29bc4 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -45,6 +45,7 @@ use uucore::error::{FromIo, strip_errno}; use uucore::error::{UError, UResult, USimpleError, UUsageError}; use uucore::extendedbigdecimal::ExtendedBigDecimal; use uucore::format_usage; +use uucore::i18n::decimal::locale_decimal_separator; use uucore::line_ending::LineEnding; use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError}; use uucore::parser::parse_size::{ParseSizeError, Parser}; @@ -113,6 +114,14 @@ mod options { const DECIMAL_PT: u8 = b'.'; +fn locale_decimal_pt() -> u8 { + match locale_decimal_separator().as_bytes().first().copied() { + Some(b'.') => b'.', + Some(b',') => b',', + _ => DECIMAL_PT, + } +} + const NEGATIVE: &u8 = &b'-'; const POSITIVE: &u8 = &b'+'; @@ -637,8 +646,8 @@ impl<'a> Line<'a> { } SortMode::GeneralNumeric => { let initial_selection = &self.line[selection.clone()]; - - let leading = get_leading_gen(initial_selection); + let decimal_pt = locale_decimal_pt(); + let leading = get_leading_gen(initial_selection, decimal_pt); // Shorten selection to leading. selection.start += leading.start; @@ -965,7 +974,11 @@ impl FieldSelector { Selection::WithNumInfo(range_str, info) } else if self.settings.mode == SortMode::GeneralNumeric { // Parse this number as BigDecimal, as this is the requirement for general numeric sorting. - Selection::AsBigDecimal(general_bd_parse(&range_str[get_leading_gen(range_str)])) + let decimal_pt = locale_decimal_pt(); + Selection::AsBigDecimal(general_bd_parse( + &range_str[get_leading_gen(range_str, decimal_pt)], + decimal_pt, + )) } else { // This is not a numeric sort, so we don't need a NumCache. Selection::Str(range_str) @@ -2020,7 +2033,7 @@ fn ascii_case_insensitive_cmp(a: &[u8], b: &[u8]) -> Ordering { // scientific notation, so we strip those lines only after the end of the following numeric string. // For example, 5e10KFD would be 5e10 or 5x10^10 and +10000HFKJFK would become 10000. #[allow(clippy::cognitive_complexity)] -fn get_leading_gen(inp: &[u8]) -> Range { +fn get_leading_gen(inp: &[u8], decimal_pt: u8) -> Range { let trimmed = inp.trim_ascii_start(); let leading_whitespace_len = inp.len() - trimmed.len(); @@ -2058,7 +2071,7 @@ fn get_leading_gen(inp: &[u8]) -> Range { continue; } - if c == DECIMAL_PT && !had_decimal_pt && !had_e_notation { + if c == decimal_pt && !had_decimal_pt && !had_e_notation { had_decimal_pt = true; continue; } @@ -2101,9 +2114,16 @@ pub enum GeneralBigDecimalParseResult { /// Parse the beginning string into a [`GeneralBigDecimalParseResult`]. /// Using a [`GeneralBigDecimalParseResult`] instead of [`ExtendedBigDecimal`] is necessary to correctly order floats. #[inline(always)] -fn general_bd_parse(a: &[u8]) -> GeneralBigDecimalParseResult { +fn general_bd_parse(a: &[u8], decimal_pt: u8) -> GeneralBigDecimalParseResult { + let parsed_bytes = (decimal_pt != DECIMAL_PT).then(|| { + a.iter() + .map(|&b| if b == decimal_pt { DECIMAL_PT } else { b }) + .collect::>() + }); + let input = parsed_bytes.as_deref().unwrap_or(a); + // The string should be valid ASCII to be parsed. - let Ok(a) = std::str::from_utf8(a) else { + let Ok(a) = std::str::from_utf8(input) else { return GeneralBigDecimalParseResult::Invalid; }; diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 6330f759df0..40b14ad984b 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -1559,6 +1559,32 @@ fn test_g_float() { .stdout_is(output); } +#[test] +fn test_g_float_locale_decimal_separator() { + let Ok(locale_fr_utf8) = env::var("LOCALE_FR_UTF8") else { + return; + }; + if locale_fr_utf8 == "none" { + return; + } + + let ts = TestScenario::new("sort"); + + ts.ucmd() + .env("LC_ALL", &locale_fr_utf8) + .args(&["-g", "--stable"]) + .pipe_in("1,9\n1,10\n") + .succeeds() + .stdout_is("1,10\n1,9\n"); + + ts.ucmd() + .env("LC_ALL", &locale_fr_utf8) + .args(&["-g", "--stable"]) + .pipe_in("1.9\n1.10\n") + .succeeds() + .stdout_is("1.10\n1.9\n"); +} + #[test] // Test misc numbers ("'a" is not interpreted as literal, trailing text is ignored...) fn test_g_misc() {