From 5a6f2636a22dc322a1bccb6406756c2b5ae26e7f Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 20 Dec 2025 21:35:16 +0900 Subject: [PATCH 1/8] wc: align SIMD policy integration - use SimdPolicy::detect with hardware feature labeling - keep SIMD behavior respecting GLIBC_TUNABLES - consolidate wc SIMD debug output and tests --- .github/workflows/GnuTests.yml | 73 ++--- .../cspell.dictionaries/jargon.wordlist.txt | 9 + Cargo.lock | 24 +- Cargo.toml | 6 +- DEVELOPMENT.md | 2 - README.md | 6 +- fuzz/Cargo.lock | 20 +- fuzz/uufuzz/src/lib.rs | 9 +- src/uu/base32/src/base_common.rs | 116 ++++--- src/uu/chroot/src/chroot.rs | 2 +- src/uu/cp/src/cp.rs | 3 +- src/uu/date/Cargo.toml | 2 +- src/uu/date/src/date.rs | 55 ++-- src/uu/date/src/locale.rs | 177 +++++++++++ src/uu/fold/src/fold.rs | 2 +- src/uu/id/src/id.rs | 4 +- src/uu/kill/src/kill.rs | 4 +- src/uu/ls/src/ls.rs | 6 +- src/uu/nl/src/nl.rs | 34 +-- src/uu/nohup/src/nohup.rs | 20 +- src/uu/sort/Cargo.toml | 4 +- src/uu/sort/src/merge.rs | 39 ++- src/uu/sort/src/sort.rs | 55 +++- src/uu/stdbuf/src/stdbuf.rs | 3 +- src/uu/truncate/src/truncate.rs | 288 +++++++----------- src/uu/wc/Cargo.toml | 13 +- src/uu/wc/locales/en-US.ftl | 6 + src/uu/wc/locales/fr-FR.ftl | 6 + src/uu/wc/src/count_fast.rs | 14 +- src/uu/wc/src/wc.rs | 58 ++++ src/uucore/src/lib/features/entries.rs | 6 +- src/uucore/src/lib/features/format/spec.rs | 10 +- src/uucore/src/lib/features/fsext.rs | 20 +- src/uucore/src/lib/features/systemd_logind.rs | 37 +-- src/uucore/src/lib/features/uptime.rs | 3 +- src/uucore/src/lib/features/utmpx.rs | 2 +- src/uucore/src/lib/mods/locale.rs | 3 +- tests/by-util/test_date.rs | 76 +++++ tests/by-util/test_kill.rs | 24 ++ tests/by-util/test_nl.rs | 35 ++- tests/by-util/test_nohup.rs | 13 +- tests/by-util/test_printf.rs | 10 + tests/by-util/test_wc.rs | 66 ++++ tests/uutests/src/lib/util.rs | 4 +- util/build-gnu.sh | 49 +-- util/fetch-gnu.sh | 9 + util/run-gnu-test.sh | 32 +- util/why-error.md | 6 +- util/why-skip.md | 2 - 49 files changed, 953 insertions(+), 514 deletions(-) create mode 100644 src/uu/date/src/locale.rs create mode 100755 util/fetch-gnu.sh diff --git a/.github/workflows/GnuTests.yml b/.github/workflows/GnuTests.yml index d4627af27ac..6c528dbd32a 100644 --- a/.github/workflows/GnuTests.yml +++ b/.github/workflows/GnuTests.yml @@ -2,7 +2,7 @@ name: GnuTests # spell-checker:ignore (abbrev/names) CodeCov gnulib GnuTests Swatinem # spell-checker:ignore (jargon) submodules devel -# spell-checker:ignore (libs/utils) autopoint chksum getenforce gperf lcov libexpect limactl pyinotify setenforce shopt texinfo valgrind libattr libcap taiki-e +# spell-checker:ignore (libs/utils) autopoint chksum dpkg getenforce getlimits gperf lcov libexpect limactl pyinotify setenforce shopt texinfo valgrind libattr libcap taiki-e # spell-checker:ignore (options) Ccodegen Coverflow Cpanic Zpanic # spell-checker:ignore (people) Dawid Dziurla * dawidd dtolnay # spell-checker:ignore (vars) FILESET SUBDIRS XPASS @@ -42,16 +42,6 @@ jobs: with: path: 'uutils' persist-credentials: false - - name: Extract GNU version from build-gnu.sh - id: gnu-version - run: | - GNU_VERSION=$(grep '^release_tag_GNU=' uutils/util/build-gnu.sh | cut -d'"' -f2) - if [ -z "$GNU_VERSION" ]; then - echo "Error: Failed to extract GNU version from build-gnu.sh" - exit 1 - fi - echo "REPO_GNU_REF=${GNU_VERSION}" >> $GITHUB_ENV - echo "Extracted GNU version: ${GNU_VERSION}" - uses: dtolnay/rust-toolchain@master with: toolchain: stable @@ -60,21 +50,18 @@ jobs: with: workspaces: "./uutils -> target" - name: Checkout code (GNU coreutils) - uses: actions/checkout@v6 + run: (mkdir -p gnu && cd gnu && bash ../uutils/util/fetch-gnu.sh) + - name: Restore files for faster configure and skipping make + uses: actions/cache@v5 + id: cache-config-gnu with: - repository: 'coreutils/coreutils' - path: 'gnu' - ref: ${{ env.REPO_GNU_REF }} - submodules: false - persist-credentials: false - - name: Override submodule URL and initialize submodules - # Use github instead of upstream git server - run: | - git submodule sync --recursive - git config submodule.gnulib.url https://github.com/coreutils/gnulib.git - git submodule update --init --recursive --depth 1 - working-directory: gnu - + path: | + gnu/config.cache + gnu/src/getlimits + key: ${{ runner.os }}-gnu-config-${{ env.REPO_GNU_REF }}-${{ hashFiles('gnu/configure') }} + restore-keys: | + ${{ runner.os }}-gnu-config-${{ env.REPO_GNU_REF }}- + ${{ runner.os }}-gnu-config- #### Build environment setup - name: Install dependencies shell: bash @@ -83,6 +70,8 @@ jobs: sudo apt-get update ## Check that build-gnu.sh works on the non SELinux system by installing libselinux only on lima sudo apt-get install -y autopoint gperf gdb python3-pyinotify valgrind libexpect-perl libacl1-dev libattr1-dev libcap-dev attr quilt + curl http://launchpadlibrarian.net/831710181/automake_1.18.1-3_all.deb > automake-1.18.deb + sudo dpkg -i --force-depends automake-1.18.deb - name: Add various locales shell: bash run: | @@ -115,6 +104,15 @@ jobs: ## Build binaries cd 'uutils' env PROFILE=release-small bash util/build-gnu.sh + + - name: Save files for faster configure and skipping make + uses: actions/cache/save@v5 + if: always() && steps.cache-config-gnu.outputs.cache-hit != 'true' + with: + path: | + gnu/config.cache + gnu/src/getlimits + key: ${{ runner.os }}-gnu-config-${{ env.REPO_GNU_REF }}-${{ hashFiles('gnu/configure') }} ### Run tests as user - name: Run GNU tests @@ -206,16 +204,6 @@ jobs: with: path: 'uutils' persist-credentials: false - - name: Extract GNU version from build-gnu.sh - id: gnu-version-selinux - run: | - GNU_VERSION=$(grep '^release_tag_GNU=' uutils/util/build-gnu.sh | cut -d'"' -f2) - if [ -z "$GNU_VERSION" ]; then - echo "Error: Failed to extract GNU version from build-gnu.sh" - exit 1 - fi - echo "REPO_GNU_REF=${GNU_VERSION}" >> $GITHUB_ENV - echo "Extracted GNU version: ${GNU_VERSION}" - uses: dtolnay/rust-toolchain@master with: toolchain: stable @@ -224,20 +212,7 @@ jobs: with: workspaces: "./uutils -> target" - name: Checkout code (GNU coreutils) - uses: actions/checkout@v6 - with: - repository: 'coreutils/coreutils' - path: 'gnu' - ref: ${{ env.REPO_GNU_REF }} - submodules: false - persist-credentials: false - - name: Override submodule URL and initialize submodules - # Use github instead of upstream git server - run: | - git submodule sync --recursive - git config submodule.gnulib.url https://github.com/coreutils/gnulib.git - git submodule update --init --recursive --depth 1 - working-directory: gnu + run: (mkdir -p gnu && cd gnu && bash ../uutils/util/fetch-gnu.sh) #### Lima build environment setup - name: Setup Lima diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index d2febb7724f..abb35daf362 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -1,4 +1,6 @@ AFAICT +asimd +ASIMD alloc arity autogenerate @@ -71,11 +73,13 @@ hardlink hardlinks hasher hashsums +hwcaps infile iflag iflags kibi kibibytes +langinfo libacl lcase listxattr @@ -129,6 +133,7 @@ semiprimes setcap setfacl setfattr +setlocale shortcode shortcodes siginfo @@ -147,6 +152,8 @@ tokenize toolchain totalram truthy +tunables +TUNABLES ucase unbuffered udeps @@ -163,6 +170,8 @@ xattrs xpass # * abbreviations +AMPM +ampm consts deps dev diff --git a/Cargo.lock b/Cargo.lock index fe0ee52a136..37b3362e612 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -367,9 +367,9 @@ dependencies = [ [[package]] name = "clap_complete" -version = "4.5.61" +version = "4.5.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39615915e2ece2550c0149addac32fb5bd312c657f43845bb9088cb9c8a7c992" +checksum = "004eef6b14ce34759aa7de4aea3217e368f463f46a3ed3764ca4b5a4404003b4" dependencies = [ "clap", ] @@ -699,9 +699,9 @@ checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" [[package]] name = "crc-fast" -version = "1.8.1" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c15e7f62c7d6e256e6d0fc3fc1ef395348e4bc395dcf14d6990da0e5aa6e8b0" +checksum = "85d9be5297a59f1b7651fd2711a1f4461929f53b182b394df0df15b3a387ef51" dependencies = [ "crc", "digest", @@ -1575,7 +1575,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -1873,7 +1873,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2439,7 +2439,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2745,7 +2745,7 @@ dependencies = [ "getrandom 0.3.3", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3191,7 +3191,7 @@ dependencies = [ "clap", "fluent", "jiff", - "libc", + "nix", "parse_datetime", "uucore", "windows-sys 0.61.2", @@ -4409,7 +4409,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4815,9 +4815,9 @@ dependencies = [ [[package]] name = "zip" -version = "6.0.0" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb2a05c7c36fde6c09b08576c9f7fb4cda705990f73b58fe011abf7dfb24168b" +checksum = "bdd8a47718a4ee5fe78e07667cd36f3de80e7c2bfe727c7074245ffc7303c037" dependencies = [ "arbitrary", "crc32fast", diff --git a/Cargo.toml b/Cargo.toml index 7c44e64d3ba..b388373a2aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -379,7 +379,7 @@ walkdir = "2.5" winapi-util = "0.1.8" windows-sys = { version = "0.61.0", default-features = false } xattr = "1.3.1" -zip = { version = "6.0.0", default-features = false, features = ["deflate"] } +zip = { version = "7.0.0", default-features = false, features = ["deflate"] } hex = "0.4.3" md-5 = "0.10.6" @@ -666,11 +666,7 @@ should_panic_without_expect = "allow" # 2 doc_markdown = "allow" unused_self = "allow" -map_unwrap_or = "allow" enum_glob_use = "allow" -ptr_cast_constness = "allow" -borrow_as_ptr = "allow" -ptr_as_ptr = "allow" needless_raw_string_hashes = "allow" unreadable_literal = "allow" unnested_or_patterns = "allow" diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index f9636625b01..4f885e085dd 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -244,8 +244,6 @@ DEBUG=1 bash util/run-gnu-test.sh tests/misc/sm3sum.pl ***Tip:*** First time you run `bash util/build-gnu.sh` command, it will provide instructions on how to checkout GNU coreutils repository at the correct release tag. Please follow those instructions and when done, run `bash util/build-gnu.sh` command again. -Note that GNU test suite relies on individual utilities (not the multicall binary). - You also need to install [quilt](https://savannah.nongnu.org/projects/quilt), a tool used to manage a stack of patches for modifying GNU tests. On FreeBSD, you need to install packages for GNU coreutils and sed (used in shell scripts instead of system commands): diff --git a/README.md b/README.md index 3ed607d42b6..b60fa5cd41b 100644 --- a/README.md +++ b/README.md @@ -29,10 +29,14 @@ options might be missing or different behavior might be experienced.
+We provide prebuilt binaries at https://github.com/uutils/coreutils/releases/latest . +It is recommended to install from main branch if you install from source. + To install it: ```shell -cargo install coreutils +cargo install --git https://github.com/uutils/coreutils coreutils +# cargo install --git https://github.com/uutils/coreutils uu_true # for one util only ~/.cargo/bin/coreutils ``` diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index ccb71eaffbe..90934a2712c 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -53,7 +53,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -64,7 +64,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -271,9 +271,9 @@ checksum = "120133d4db2ec47efe2e26502ee984747630c67f51974fca0b6c1340cf2368d3" [[package]] name = "console" -version = "0.16.1" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b430743a6eb14e9764d4260d4c0d8123087d504eeb9c48f2b2a5e810dd369df4" +checksum = "03e45a4a8926227e4197636ba97a9fc9b00477e9f4bd711395687c5f0734bec4" dependencies = [ "encode_unicode", "libc", @@ -504,7 +504,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -834,7 +834,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -1281,7 +1281,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -1447,7 +1447,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -1597,7 +1597,7 @@ dependencies = [ "clap", "fluent", "jiff", - "libc", + "nix", "parse_datetime", "uucore", "windows-sys 0.61.2", @@ -1903,7 +1903,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] diff --git a/fuzz/uufuzz/src/lib.rs b/fuzz/uufuzz/src/lib.rs index 4a7b2ea7208..e94ffd8b189 100644 --- a/fuzz/uufuzz/src/lib.rs +++ b/fuzz/uufuzz/src/lib.rs @@ -193,13 +193,8 @@ fn read_from_fd(fd: RawFd) -> String { let mut captured_output = Vec::new(); let mut read_buffer = [0; 1024]; loop { - let bytes_read = unsafe { - libc::read( - fd, - read_buffer.as_mut_ptr() as *mut libc::c_void, - read_buffer.len(), - ) - }; + let bytes_read = + unsafe { libc::read(fd, read_buffer.as_mut_ptr().cast(), read_buffer.len()) }; if bytes_read == -1 { eprintln!("Failed to read from the pipe"); diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index c44d6f7ee2f..d7f7a9ce9f4 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -8,7 +8,7 @@ use clap::{Arg, ArgAction, Command}; use std::ffi::OsString; use std::fs::File; -use std::io::{self, BufReader, ErrorKind, Read, Write}; +use std::io::{self, BufRead, BufReader, ErrorKind, Write}; use std::path::{Path, PathBuf}; use uucore::display::Quotable; use uucore::encoding::{ @@ -146,20 +146,26 @@ pub fn base_app(about: String, usage: String) -> Command { ) } -pub fn get_input(config: &Config) -> UResult> { +pub fn get_input(config: &Config) -> UResult> { match &config.to_read { Some(path_buf) => { let file = File::open(path_buf).map_err_context(|| path_buf.maybe_quote().to_string())?; - Ok(Box::new(BufReader::new(file))) + Ok(Box::new(BufReader::with_capacity( + DEFAULT_BUFFER_SIZE, + file, + ))) } None => { // Stdin is already buffered by the OS; wrap once more to reduce syscalls per read. - Ok(Box::new(BufReader::new(io::stdin()))) + Ok(Box::new(BufReader::with_capacity( + DEFAULT_BUFFER_SIZE, + io::stdin(), + ))) } } } -pub fn handle_input(input: &mut R, format: Format, config: Config) -> UResult<()> { +pub fn handle_input(input: &mut R, format: Format, config: Config) -> UResult<()> { // Always allow padding for Base64 to avoid a full pre-scan of the input. let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format, config.decode, true); @@ -292,11 +298,11 @@ pub fn get_supports_fast_decode_and_encode( } pub mod fast_encode { - use crate::base_common::{DEFAULT_BUFFER_SIZE, WRAP_DEFAULT}; + use crate::base_common::WRAP_DEFAULT; use std::{ cmp::min, collections::VecDeque, - io::{self, Read, Write}, + io::{self, BufRead, Write}, num::NonZeroUsize, }; use uucore::{ @@ -519,7 +525,7 @@ pub mod fast_encode { /// Remaining bytes are encoded and flushed at the end. I/O or encoding /// failures are propagated via `UResult`. pub fn fast_encode_stream( - input: &mut dyn Read, + input: &mut dyn BufRead, output: &mut dyn Write, supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, wrap: Option, @@ -544,47 +550,79 @@ pub mod fast_encode { }; // Buffers - let mut leftover_buffer = VecDeque::::new(); let mut encoded_buffer = VecDeque::::new(); - - let mut read_buffer = vec![0u8; encode_in_chunks_of_size.max(DEFAULT_BUFFER_SIZE)]; + let mut leftover_buffer = Vec::::with_capacity(encode_in_chunks_of_size); loop { - let read = input - .read(&mut read_buffer) + let read_buffer = input + .fill_buf() .map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?; - if read == 0 { + if read_buffer.is_empty() { break; } - leftover_buffer.extend(&read_buffer[..read]); + let mut consumed = 0; - while leftover_buffer.len() >= encode_in_chunks_of_size { - { - let contiguous = leftover_buffer.make_contiguous(); + if !leftover_buffer.is_empty() { + let needed = encode_in_chunks_of_size - leftover_buffer.len(); + let take = needed.min(read_buffer.len()); + leftover_buffer.extend_from_slice(&read_buffer[..take]); + consumed += take; + + if leftover_buffer.len() == encode_in_chunks_of_size { encode_in_chunks_to_buffer( supports_fast_decode_and_encode, - &contiguous[..encode_in_chunks_of_size], + leftover_buffer.as_slice(), + &mut encoded_buffer, + )?; + leftover_buffer.clear(); + + write_to_output( + &mut line_wrapping, &mut encoded_buffer, + output, + false, + wrap == Some(0), )?; } + } - // Drop the data we just encoded - leftover_buffer.drain(..encode_in_chunks_of_size); + let remaining = &read_buffer[consumed..]; + let full_chunk_bytes = + (remaining.len() / encode_in_chunks_of_size) * encode_in_chunks_of_size; - write_to_output( - &mut line_wrapping, - &mut encoded_buffer, - output, - false, - wrap == Some(0), - )?; + if full_chunk_bytes > 0 { + for chunk in remaining[..full_chunk_bytes].chunks_exact(encode_in_chunks_of_size) { + encode_in_chunks_to_buffer( + supports_fast_decode_and_encode, + chunk, + &mut encoded_buffer, + )?; + write_to_output( + &mut line_wrapping, + &mut encoded_buffer, + output, + false, + wrap == Some(0), + )?; + } + consumed += full_chunk_bytes; + } + + if consumed < read_buffer.len() { + leftover_buffer.extend_from_slice(&read_buffer[consumed..]); + consumed = read_buffer.len(); } + + input.consume(consumed); + + // `leftover_buffer` should never exceed one partial chunk. + debug_assert!(leftover_buffer.len() < encode_in_chunks_of_size); } // Encode any remaining bytes and flush supports_fast_decode_and_encode - .encode_to_vec_deque(leftover_buffer.make_contiguous(), &mut encoded_buffer)?; + .encode_to_vec_deque(&leftover_buffer, &mut encoded_buffer)?; write_to_output( &mut line_wrapping, @@ -599,8 +637,7 @@ pub mod fast_encode { } pub mod fast_decode { - use crate::base_common::DEFAULT_BUFFER_SIZE; - use std::io::{self, Read, Write}; + use std::io::{self, BufRead, Write}; use uucore::{ encoding::SupportsFastDecodeAndEncode, error::{UResult, USimpleError}, @@ -630,7 +667,6 @@ pub mod fast_decode { fn write_to_output(decoded_buffer: &mut Vec, output: &mut dyn Write) -> io::Result<()> { // Write all data in `decoded_buffer` to `output` output.write_all(decoded_buffer.as_slice())?; - output.flush()?; decoded_buffer.clear(); @@ -764,7 +800,7 @@ pub mod fast_decode { } pub fn fast_decode_stream( - input: &mut dyn Read, + input: &mut dyn BufRead, output: &mut dyn Write, supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, ignore_garbage: bool, @@ -783,17 +819,17 @@ pub mod fast_decode { let mut buffer = Vec::with_capacity(decode_in_chunks_of_size); let mut decoded_buffer = Vec::::new(); - let mut read_buffer = [0u8; DEFAULT_BUFFER_SIZE]; loop { - let read = input - .read(&mut read_buffer) + let read_buffer = input + .fill_buf() .map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?; - if read == 0 { + let read_len = read_buffer.len(); + if read_len == 0 { break; } - for &byte in &read_buffer[..read] { + for &byte in read_buffer { if byte == b'\n' || byte == b'\r' { continue; } @@ -845,6 +881,8 @@ pub mod fast_decode { buffer.clear(); } } + + input.consume(read_len); } if supports_partial_decode { @@ -902,7 +940,7 @@ fn format_read_error(kind: ErrorKind) -> String { /// Determines if the input buffer contains any padding ('=') ignoring trailing whitespace. #[cfg(test)] -fn read_and_has_padding(input: &mut R) -> UResult<(bool, Vec)> { +fn read_and_has_padding(input: &mut R) -> UResult<(bool, Vec)> { let mut buf = Vec::new(); input .read_to_end(&mut buf) diff --git a/src/uu/chroot/src/chroot.rs b/src/uu/chroot/src/chroot.rs index 0ac59df17be..6f615885054 100644 --- a/src/uu/chroot/src/chroot.rs +++ b/src/uu/chroot/src/chroot.rs @@ -439,7 +439,7 @@ fn enter_chroot(root: &Path, skip_chdir: bool) -> UResult<()> { .map_err(|e| ChrootError::CannotEnter("root".to_string(), e.into()))? .as_bytes_with_nul() .as_ptr() - .cast::(), + .cast(), ) }; diff --git a/src/uu/cp/src/cp.rs b/src/uu/cp/src/cp.rs index 650ec134882..c1df9ed139a 100644 --- a/src/uu/cp/src/cp.rs +++ b/src/uu/cp/src/cp.rs @@ -2319,8 +2319,7 @@ fn copy_file( let initial_dest_metadata = dest.symlink_metadata().ok(); let dest_is_symlink = initial_dest_metadata .as_ref() - .map(|md| md.file_type().is_symlink()) - .unwrap_or(false); + .is_some_and(|md| md.file_type().is_symlink()); let dest_target_exists = dest.try_exists().unwrap_or(false); // Fail if dest is a dangling symlink or a symlink this program created previously if dest_is_symlink { diff --git a/src/uu/date/Cargo.toml b/src/uu/date/Cargo.toml index 2d5f53d4b81..431868b9175 100644 --- a/src/uu/date/Cargo.toml +++ b/src/uu/date/Cargo.toml @@ -30,7 +30,7 @@ parse_datetime = { workspace = true } uucore = { workspace = true, features = ["parser"] } [target.'cfg(unix)'.dependencies] -libc = { workspace = true } +nix = { workspace = true, features = ["time"] } [target.'cfg(windows)'.dependencies] windows-sys = { workspace = true, features = [ diff --git a/src/uu/date/src/date.rs b/src/uu/date/src/date.rs index 5321256007d..93c08546691 100644 --- a/src/uu/date/src/date.rs +++ b/src/uu/date/src/date.rs @@ -5,14 +5,12 @@ // spell-checker:ignore strtime ; (format) DATEFILE MMDDhhmm ; (vars) datetime datetimes getres AWST ACST AEST +mod locale; + use clap::{Arg, ArgAction, Command}; use jiff::fmt::strtime; use jiff::tz::{TimeZone, TimeZoneDatabase}; use jiff::{Timestamp, Zoned}; -#[cfg(all(unix, not(target_os = "macos"), not(target_os = "redox")))] -use libc::clock_settime; -#[cfg(all(unix, not(target_os = "redox")))] -use libc::{CLOCK_REALTIME, clock_getres, timespec}; use std::collections::HashMap; use std::fs::File; use std::io::{BufRead, BufReader}; @@ -491,6 +489,7 @@ pub fn uu_app() -> Command { .short('s') .long(OPT_SET) .value_name("STRING") + .allow_hyphen_values(true) .help({ #[cfg(not(any(target_os = "macos", target_os = "redox")))] { @@ -537,7 +536,7 @@ fn make_format_string(settings: &Settings) -> &str { }, Format::Resolution => "%s.%N", Format::Custom(ref fmt) => fmt, - Format::Default => "%a %b %e %X %Z %Y", + Format::Default => locale::get_locale_default_format(), } } @@ -700,25 +699,20 @@ fn get_clock_resolution() -> Timestamp { } #[cfg(all(unix, not(target_os = "redox")))] +/// Returns the resolution of the system’s realtime clock. +/// +/// # Panics +/// +/// Panics if `clock_getres` fails. On a POSIX-compliant system this should not occur, +/// as `CLOCK_REALTIME` is required to be supported. +/// Failure would indicate a non-conforming or otherwise broken implementation. fn get_clock_resolution() -> Timestamp { - let mut timespec = timespec { - tv_sec: 0, - tv_nsec: 0, - }; - unsafe { - // SAFETY: the timespec struct lives for the full duration of this function call. - // - // The clock_getres function can only fail if the passed clock_id is not - // a known clock. All compliant posix implementors must support - // CLOCK_REALTIME, therefore this function call cannot fail on any - // compliant posix implementation. - // - // See more here: - // https://pubs.opengroup.org/onlinepubs/9799919799/functions/clock_getres.html - clock_getres(CLOCK_REALTIME, &raw mut timespec); - } + use nix::time::{ClockId, clock_getres}; + + let timespec = clock_getres(ClockId::CLOCK_REALTIME).unwrap(); + #[allow(clippy::unnecessary_cast)] // Cast required on 32-bit platforms - Timestamp::constant(timespec.tv_sec as i64, timespec.tv_nsec as i32) + Timestamp::constant(timespec.tv_sec() as _, timespec.tv_nsec() as _) } #[cfg(all(unix, target_os = "redox"))] @@ -766,20 +760,13 @@ fn set_system_datetime(_date: Zoned) -> UResult<()> { /// `` /// `` fn set_system_datetime(date: Zoned) -> UResult<()> { - let ts = date.timestamp(); - let timespec = timespec { - tv_sec: ts.as_second() as _, - tv_nsec: ts.subsec_nanosecond() as _, - }; + use nix::{sys::time::TimeSpec, time::ClockId}; - let result = unsafe { clock_settime(CLOCK_REALTIME, &raw const timespec) }; + let ts = date.timestamp(); + let timespec = TimeSpec::new(ts.as_second() as _, ts.subsec_nanosecond() as _); - if result == 0 { - Ok(()) - } else { - Err(std::io::Error::last_os_error() - .map_err_context(|| translate!("date-error-cannot-set-date"))) - } + nix::time::clock_settime(ClockId::CLOCK_REALTIME, timespec) + .map_err_context(|| translate!("date-error-cannot-set-date")) } #[cfg(windows)] diff --git a/src/uu/date/src/locale.rs b/src/uu/date/src/locale.rs new file mode 100644 index 00000000000..6b756e97d48 --- /dev/null +++ b/src/uu/date/src/locale.rs @@ -0,0 +1,177 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Locale detection for time format preferences + +// nl_langinfo is available on glibc (Linux), Apple platforms, and BSDs +// but not on Android, Redox or other minimal Unix systems + +// Macro to reduce cfg duplication across the module +macro_rules! cfg_langinfo { + ($($item:item)*) => { + $( + #[cfg(any( + target_os = "linux", + target_vendor = "apple", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "dragonfly" + ))] + $item + )* + } +} + +cfg_langinfo! { + use std::ffi::CStr; + use std::sync::OnceLock; +} + +cfg_langinfo! { + /// Cached result of locale time format detection + static TIME_FORMAT_CACHE: OnceLock = OnceLock::new(); + + /// Safe wrapper around libc setlocale + fn set_time_locale() { + unsafe { + nix::libc::setlocale(nix::libc::LC_TIME, c"".as_ptr()); + } + } + + /// Safe wrapper around libc nl_langinfo that returns `Option` + fn get_locale_info(item: nix::libc::nl_item) -> Option { + unsafe { + let ptr = nix::libc::nl_langinfo(item); + if ptr.is_null() { + None + } else { + CStr::from_ptr(ptr).to_str().ok().map(String::from) + } + } + } + + /// Internal function that performs the actual locale detection + fn detect_12_hour_format() -> bool { + // Helper function to check for 12-hour format indicators + fn has_12_hour_indicators(format_str: &str) -> bool { + const INDICATORS: &[&str] = &["%I", "%l", "%r"]; + INDICATORS.iter().any(|&indicator| format_str.contains(indicator)) + } + + // Helper function to check for 24-hour format indicators + fn has_24_hour_indicators(format_str: &str) -> bool { + const INDICATORS: &[&str] = &["%H", "%k", "%R", "%T"]; + INDICATORS.iter().any(|&indicator| format_str.contains(indicator)) + } + + // Set locale from environment variables (empty string = use LC_TIME/LANG env vars) + set_time_locale(); + + // Get locale format strings using safe wrappers + let d_t_fmt = get_locale_info(nix::libc::D_T_FMT); + let t_fmt_opt = get_locale_info(nix::libc::T_FMT); + let t_fmt_ampm_opt = get_locale_info(nix::libc::T_FMT_AMPM); + + // Check D_T_FMT first + if let Some(ref format) = d_t_fmt { + // Check for 12-hour indicators first (higher priority) + if has_12_hour_indicators(format) { + return true; + } + + // If we find 24-hour indicators, it's definitely not 12-hour + if has_24_hour_indicators(format) { + return false; + } + } + + // Also check the time-only format as a fallback + if let Some(ref time_format) = t_fmt_opt { + if has_12_hour_indicators(time_format) { + return true; + } + } + + // Check if there's a specific 12-hour format defined + if let Some(ref ampm_format) = t_fmt_ampm_opt { + // If T_FMT_AMPM is non-empty and different from T_FMT, locale supports 12-hour + if !ampm_format.is_empty() { + if let Some(ref time_format) = t_fmt_opt { + if ampm_format != time_format { + return true; + } + } else { + return true; + } + } + } + + // Default to 24-hour format if we can't determine + false + } +} + +cfg_langinfo! { + /// Detects whether the current locale prefers 12-hour or 24-hour time format + /// Results are cached for performance + pub fn uses_12_hour_format() -> bool { + *TIME_FORMAT_CACHE.get_or_init(detect_12_hour_format) + } + + /// Cached default format string + static DEFAULT_FORMAT_CACHE: OnceLock<&'static str> = OnceLock::new(); + + /// Get the locale-appropriate default format string for date output + /// This respects the locale's preference for 12-hour vs 24-hour time + /// Results are cached for performance (following uucore patterns) + pub fn get_locale_default_format() -> &'static str { + DEFAULT_FORMAT_CACHE.get_or_init(|| { + if uses_12_hour_format() { + // Use 12-hour format with AM/PM + "%a %b %e %r %Z %Y" + } else { + // Use 24-hour format + "%a %b %e %X %Z %Y" + } + }) + } +} + +/// On platforms without nl_langinfo support, use 24-hour format by default +#[cfg(not(any( + target_os = "linux", + target_vendor = "apple", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "dragonfly" +)))] +pub fn get_locale_default_format() -> &'static str { + "%a %b %e %X %Z %Y" +} + +#[cfg(test)] +mod tests { + cfg_langinfo! { + use super::*; + + #[test] + fn test_locale_detection() { + // Just verify the function doesn't panic + let _ = uses_12_hour_format(); + let _ = get_locale_default_format(); + } + + #[test] + fn test_default_format_contains_valid_codes() { + let format = get_locale_default_format(); + assert!(format.contains("%a")); // abbreviated weekday + assert!(format.contains("%b")); // abbreviated month + assert!(format.contains("%Y")); // year + assert!(format.contains("%Z")); // timezone + } + } +} diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs index a2ddbed6a68..2eb97933180 100644 --- a/src/uu/fold/src/fold.rs +++ b/src/uu/fold/src/fold.rs @@ -443,7 +443,7 @@ fn process_utf8_line(line: &str, ctx: &mut FoldContext<'_, W>) -> URes } } - let next_idx = iter.peek().map(|(idx, _)| *idx).unwrap_or(line_bytes.len()); + let next_idx = iter.peek().map_or(line_bytes.len(), |(idx, _)| *idx); if ch == '\n' { *ctx.last_space = None; diff --git a/src/uu/id/src/id.rs b/src/uu/id/src/id.rs index dcdc692435d..298619fd5e3 100644 --- a/src/uu/id/src/id.rs +++ b/src/uu/id/src/id.rs @@ -468,7 +468,7 @@ fn pretty(possible_pw: Option) { "{}", p.belongs_to() .iter() - .map(|&gr| entries::gid2grp(gr).unwrap()) + .map(|&gr| entries::gid2grp(gr).unwrap_or_else(|_| gr.to_string())) .collect::>() .join(" ") ); @@ -508,7 +508,7 @@ fn pretty(possible_pw: Option) { entries::get_groups_gnu(None) .unwrap() .iter() - .map(|&gr| entries::gid2grp(gr).unwrap()) + .map(|&gr| entries::gid2grp(gr).unwrap_or_else(|_| gr.to_string())) .collect::>() .join(" ") ); diff --git a/src/uu/kill/src/kill.rs b/src/uu/kill/src/kill.rs index 809d59b7d88..94aa81964b6 100644 --- a/src/uu/kill/src/kill.rs +++ b/src/uu/kill/src/kill.rs @@ -137,8 +137,8 @@ pub fn uu_app() -> Command { } fn handle_obsolete(args: &mut Vec) -> Option { - // Sanity check - if args.len() > 2 { + // Sanity check - need at least the program name and one argument + if args.len() >= 2 { // Old signal can only be in the first argument position let slice = args[1].as_str(); if let Some(signal) = slice.strip_prefix('-') { diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index e66da6b6ece..7abfcde8c5c 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -479,8 +479,7 @@ fn extract_sort(options: &clap::ArgMatches) -> Sort { let sort_index = options .get_one::(options::SORT) .and_then(|_| options.indices_of(options::SORT)) - .map(|mut indices| indices.next_back().unwrap_or(0)) - .unwrap_or(0); + .map_or(0, |mut indices| indices.next_back().unwrap_or(0)); let time_index = get_last_index(options::sort::TIME); let size_index = get_last_index(options::sort::SIZE); let none_index = get_last_index(options::sort::NONE); @@ -599,8 +598,7 @@ fn extract_color(options: &clap::ArgMatches) -> bool { let color_index = options .get_one::(options::COLOR) .and_then(|_| options.indices_of(options::COLOR)) - .map(|mut indices| indices.next_back().unwrap_or(0)) - .unwrap_or(0); + .map_or(0, |mut indices| indices.next_back().unwrap_or(0)); let unsorted_all_index = get_last_index(options::files::UNSORTED_ALL); let color_enabled = match options.get_one::(options::COLOR) { diff --git a/src/uu/nl/src/nl.rs b/src/uu/nl/src/nl.rs index 7d1f862aa5e..18ad095a8ab 100644 --- a/src/uu/nl/src/nl.rs +++ b/src/uu/nl/src/nl.rs @@ -345,6 +345,13 @@ pub fn uu_app() -> Command { ) } +/// Helper to write: prefix bytes + line bytes + newline +fn write_line(writer: &mut impl Write, prefix: &[u8], line: &[u8]) -> std::io::Result<()> { + writer.write_all(prefix)?; + writer.write_all(line)?; + writeln!(writer) +} + /// `nl` implements the main functionality for an individual buffer. fn nl(reader: &mut BufReader, stats: &mut Stats, settings: &Settings) -> UResult<()> { let mut writer = BufWriter::new(stdout()); @@ -409,24 +416,17 @@ fn nl(reader: &mut BufReader, stats: &mut Stats, settings: &Settings translate!("nl-error-line-number-overflow"), )); }; - writeln!( - writer, - "{}{}{}", - settings - .number_format - .format(line_number, settings.number_width), - settings.number_separator.to_string_lossy(), - String::from_utf8_lossy(&line), - ) - .map_err_context(|| translate!("nl-error-could-not-write"))?; - // update line number for the potential next line - match line_number.checked_add(settings.line_increment) { - Some(new_line_number) => stats.line_number = Some(new_line_number), - None => stats.line_number = None, // overflow - } + let mut prefix = settings + .number_format + .format(line_number, settings.number_width) + .into_bytes(); + prefix.extend_from_slice(settings.number_separator.as_encoded_bytes()); + write_line(&mut writer, &prefix, &line) + .map_err_context(|| translate!("nl-error-could-not-write"))?; + stats.line_number = line_number.checked_add(settings.line_increment); } else { - let spaces = " ".repeat(settings.number_width + 1); - writeln!(writer, "{spaces}{}", String::from_utf8_lossy(&line)) + let prefix = " ".repeat(settings.number_width + 1); + write_line(&mut writer, prefix.as_bytes(), &line) .map_err_context(|| translate!("nl-error-could-not-write"))?; } } diff --git a/src/uu/nohup/src/nohup.rs b/src/uu/nohup/src/nohup.rs index 28292ac4154..38b5e5ceb30 100644 --- a/src/uu/nohup/src/nohup.rs +++ b/src/uu/nohup/src/nohup.rs @@ -55,10 +55,21 @@ impl UError for NohupError { } } +fn failure_code() -> i32 { + if env::var("POSIXLY_CORRECT").is_ok() { + POSIX_NOHUP_FAILURE + } else { + EXIT_CANCELED + } +} + #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let matches = - uucore::clap_localization::handle_clap_result_with_exit_code(uu_app(), args, 125)?; + let matches = uucore::clap_localization::handle_clap_result_with_exit_code( + uu_app(), + args, + failure_code(), + )?; replace_fds()?; @@ -124,10 +135,7 @@ fn replace_fds() -> UResult<()> { } fn find_stdout() -> UResult { - let internal_failure_code = match env::var("POSIXLY_CORRECT") { - Ok(_) => POSIX_NOHUP_FAILURE, - Err(_) => EXIT_CANCELED, - }; + let internal_failure_code = failure_code(); match OpenOptions::new() .create(true) diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index e65f70d5a28..184f6776be7 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -36,7 +36,9 @@ thiserror = { workspace = true } unicode-width = { workspace = true } uucore = { workspace = true, features = ["fs", "parser-size", "version-cmp"] } fluent = { workspace = true } -nix = { workspace = true } + +[target.'cfg(unix)'.dependencies] +nix = { workspace = true, features = ["resource"] } [dev-dependencies] divan = { workspace = true } diff --git a/src/uu/sort/src/merge.rs b/src/uu/sort/src/merge.rs index ea212f62f34..502dcda82a6 100644 --- a/src/uu/sort/src/merge.rs +++ b/src/uu/sort/src/merge.rs @@ -30,7 +30,7 @@ use uucore::error::{FromIo, UResult}; use crate::{ GlobalSettings, Output, SortError, chunks::{self, Chunk, RecycledChunk}, - compare_by, open, + compare_by, fd_soft_limit, open, tmp_dir::TmpDirWrapper, }; @@ -62,6 +62,28 @@ fn replace_output_file_in_input_files( Ok(()) } +/// Determine the effective merge batch size, enforcing a minimum and respecting the +/// file-descriptor soft limit after reserving stdio/output and a safety margin. +fn effective_merge_batch_size(settings: &GlobalSettings) -> usize { + const MIN_BATCH_SIZE: usize = 2; + const RESERVED_STDIO: usize = 3; + const RESERVED_OUTPUT: usize = 1; + const SAFETY_MARGIN: usize = 1; + let mut batch_size = settings.merge_batch_size.max(MIN_BATCH_SIZE); + + if let Some(limit) = fd_soft_limit() { + let reserved = RESERVED_STDIO + RESERVED_OUTPUT + SAFETY_MARGIN; + let available_inputs = limit.saturating_sub(reserved); + if available_inputs >= MIN_BATCH_SIZE { + batch_size = batch_size.min(available_inputs); + } else { + batch_size = MIN_BATCH_SIZE; + } + } + + batch_size +} + /// Merge pre-sorted `Box`s. /// /// If `settings.merge_batch_size` is greater than the length of `files`, intermediate files will be used. @@ -94,18 +116,21 @@ pub fn merge_with_file_limit< output: Output, tmp_dir: &mut TmpDirWrapper, ) -> UResult<()> { - if files.len() <= settings.merge_batch_size { + let batch_size = effective_merge_batch_size(settings); + debug_assert!(batch_size >= 2); + + if files.len() <= batch_size { let merger = merge_without_limit(files, settings); merger?.write_all(settings, output) } else { let mut temporary_files = vec![]; - let mut batch = vec![]; + let mut batch = Vec::with_capacity(batch_size); for file in files { batch.push(file); - if batch.len() >= settings.merge_batch_size { - assert_eq!(batch.len(), settings.merge_batch_size); + if batch.len() >= batch_size { + assert_eq!(batch.len(), batch_size); let merger = merge_without_limit(batch.into_iter(), settings)?; - batch = vec![]; + batch = Vec::with_capacity(batch_size); let mut tmp_file = Tmp::create(tmp_dir.next_file()?, settings.compress_prog.as_deref())?; @@ -115,7 +140,7 @@ pub fn merge_with_file_limit< } // Merge any remaining files that didn't get merged in a full batch above. if !batch.is_empty() { - assert!(batch.len() < settings.merge_batch_size); + assert!(batch.len() < batch_size); let merger = merge_without_limit(batch.into_iter(), settings)?; let mut tmp_file = diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index c25ef48147d..6122089e2f3 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -25,8 +25,6 @@ use clap::{Arg, ArgAction, Command}; use custom_str_cmp::custom_str_cmp; use ext_sort::ext_sort; use fnv::FnvHasher; -#[cfg(target_os = "linux")] -use nix::libc::{RLIMIT_NOFILE, getrlimit, rlimit}; use numeric_str_cmp::{NumInfo, NumInfoParseSettings, human_numeric_str_cmp, numeric_str_cmp}; use rand::{Rng, rng}; use rayon::prelude::*; @@ -1075,14 +1073,25 @@ fn make_sort_mode_arg(mode: &'static str, short: char, help: String) -> Arg { #[cfg(target_os = "linux")] fn get_rlimit() -> UResult { - let mut limit = rlimit { - rlim_cur: 0, - rlim_max: 0, - }; - match unsafe { getrlimit(RLIMIT_NOFILE, &raw mut limit) } { - 0 => Ok(limit.rlim_cur as usize), - _ => Err(UUsageError::new(2, translate!("sort-failed-fetch-rlimit"))), + use nix::sys::resource::{RLIM_INFINITY, Resource, getrlimit}; + + let (rlim_cur, _rlim_max) = getrlimit(Resource::RLIMIT_NOFILE) + .map_err(|_| UUsageError::new(2, translate!("sort-failed-fetch-rlimit")))?; + if rlim_cur == RLIM_INFINITY { + return Err(UUsageError::new(2, translate!("sort-failed-fetch-rlimit"))); } + usize::try_from(rlim_cur) + .map_err(|_| UUsageError::new(2, translate!("sort-failed-fetch-rlimit"))) +} + +#[cfg(target_os = "linux")] +pub(crate) fn fd_soft_limit() -> Option { + get_rlimit().ok() +} + +#[cfg(not(target_os = "linux"))] +pub(crate) fn fd_soft_limit() -> Option { + None } const STDIN_FILE: &str = "-"; @@ -1237,12 +1246,12 @@ fn default_merge_batch_size() -> usize { #[cfg(target_os = "linux")] { // Adjust merge batch size dynamically based on available file descriptors. - match get_rlimit() { - Ok(limit) => { + match fd_soft_limit() { + Some(limit) => { let usable_limit = limit.saturating_div(LINUX_BATCH_DIVISOR); usable_limit.clamp(LINUX_BATCH_MIN, LINUX_BATCH_MAX) } - Err(_) => 64, + None => 64, } } @@ -1371,9 +1380,15 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { settings.threads = matches .get_one::(options::PARALLEL) .map_or_else(|| "0".to_string(), String::from); - unsafe { - env::set_var("RAYON_NUM_THREADS", &settings.threads); - } + let num_threads = match settings.threads.parse::() { + Ok(0) | Err(_) => std::thread::available_parallelism() + .map(|n| n.get()) + .unwrap_or(1), + Ok(n) => n, + }; + let _ = rayon::ThreadPoolBuilder::new() + .num_threads(num_threads) + .build_global(); } if let Some(size_str) = matches.get_one::(options::BUF_SIZE) { @@ -1424,7 +1439,15 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { translate!( "sort-maximum-batch-size-rlimit", - "rlimit" => get_rlimit()? + "rlimit" => { + let Some(rlimit) = fd_soft_limit() else { + return Err(UUsageError::new( + 2, + translate!("sort-failed-fetch-rlimit"), + )); + }; + rlimit + } ) } #[cfg(not(target_os = "linux"))] diff --git a/src/uu/stdbuf/src/stdbuf.rs b/src/uu/stdbuf/src/stdbuf.rs index fae2942f0b6..9af3d80cab1 100644 --- a/src/uu/stdbuf/src/stdbuf.rs +++ b/src/uu/stdbuf/src/stdbuf.rs @@ -240,8 +240,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { use std::os::unix::process::ExitStatusExt; let signal_msg = status .signal() - .map(|s| s.to_string()) - .unwrap_or_else(|| "unknown".to_string()); + .map_or_else(|| "unknown".to_string(), |s| s.to_string()); Err(USimpleError::new( 1, translate!("stdbuf-error-killed-by-signal", "signal" => signal_msg), diff --git a/src/uu/truncate/src/truncate.rs b/src/uu/truncate/src/truncate.rs index 7a607cc1a45..997916b24a8 100644 --- a/src/uu/truncate/src/truncate.rs +++ b/src/uu/truncate/src/truncate.rs @@ -38,6 +38,10 @@ impl TruncateMode { /// reduce by is greater than `fsize`, then this function returns /// 0 (since it cannot return a negative number). /// + /// # Returns + /// + /// `None` if rounding by 0, else the target size. + /// /// # Examples /// /// Extending a file of 10 bytes by 5 bytes: @@ -45,7 +49,7 @@ impl TruncateMode { /// ```rust,ignore /// let mode = TruncateMode::Extend(5); /// let fsize = 10; - /// assert_eq!(mode.to_size(fsize), 15); + /// assert_eq!(mode.to_size(fsize), Some(15)); /// ``` /// /// Reducing a file by more than its size results in 0: @@ -53,25 +57,36 @@ impl TruncateMode { /// ```rust,ignore /// let mode = TruncateMode::Reduce(5); /// let fsize = 3; - /// assert_eq!(mode.to_size(fsize), 0); + /// assert_eq!(mode.to_size(fsize), Some(0)); + /// ``` + /// + /// Rounding a file by 0: + /// + /// ```rust,ignore + /// let mode = TruncateMode::RoundDown(0); + /// let fsize = 17; + /// assert_eq!(mode.to_size(fsize), None); /// ``` - fn to_size(&self, fsize: u64) -> u64 { + fn to_size(&self, fsize: u64) -> Option { match self { - Self::Absolute(size) => *size, - Self::Extend(size) => fsize + size, - Self::Reduce(size) => { - if *size > fsize { - 0 - } else { - fsize - size - } - } - Self::AtMost(size) => fsize.min(*size), - Self::AtLeast(size) => fsize.max(*size), - Self::RoundDown(size) => fsize - fsize % size, - Self::RoundUp(size) => fsize + fsize % size, + Self::Absolute(size) => Some(*size), + Self::Extend(size) => Some(fsize + size), + Self::Reduce(size) => Some(fsize.saturating_sub(*size)), + Self::AtMost(size) => Some(fsize.min(*size)), + Self::AtLeast(size) => Some(fsize.max(*size)), + Self::RoundDown(size) => fsize.checked_rem(*size).map(|remainder| fsize - remainder), + Self::RoundUp(size) => fsize.checked_next_multiple_of(*size), } } + + /// Determine if mode is absolute + /// + /// # Returns + /// + /// `true` is self matches Self::Absolute(_), `false` otherwise. + fn is_absolute(&self) -> bool { + matches!(self, Self::Absolute(_)) + } } pub mod options { @@ -170,18 +185,9 @@ pub fn uu_app() -> Command { /// /// If the file could not be opened, or there was a problem setting the /// size of the file. -fn file_truncate(filename: &OsString, create: bool, size: u64) -> UResult<()> { +fn do_file_truncate(filename: &Path, create: bool, size: u64) -> UResult<()> { let path = Path::new(filename); - #[cfg(unix)] - if let Ok(metadata) = metadata(path) { - if metadata.file_type().is_fifo() { - return Err(USimpleError::new( - 1, - translate!("truncate-error-cannot-open-no-device", "filename" => filename.to_string_lossy().quote()), - )); - } - } match OpenOptions::new().write(true).create(create).open(path) { Ok(file) => file.set_len(size), Err(e) if e.kind() == ErrorKind::NotFound && !create => Ok(()), @@ -192,181 +198,99 @@ fn file_truncate(filename: &OsString, create: bool, size: u64) -> UResult<()> { ) } -/// Truncate files to a size relative to a given file. -/// -/// `rfilename` is the name of the reference file. -/// -/// `size_string` gives the size relative to the reference file to which -/// to set the target files. For example, "+3K" means "set each file to -/// be three kilobytes larger than the size of the reference file". -/// -/// If `create` is true, then each file will be created if it does not -/// already exist. -/// -/// # Errors -/// -/// If any file could not be opened, or there was a problem setting -/// the size of at least one file. -/// -/// If at least one file is a named pipe (also known as a fifo). -fn truncate_reference_and_size( - rfilename: &str, - size_string: &str, - filenames: &[OsString], - create: bool, +fn file_truncate( + no_create: bool, + reference_size: Option, + mode: &TruncateMode, + filename: &OsString, ) -> UResult<()> { - let mode = match parse_mode_and_size(size_string) { - Err(e) => { - return Err(USimpleError::new( - 1, - translate!("truncate-error-invalid-number", "error" => e), - )); - } - Ok(TruncateMode::Absolute(_)) => { - return Err(USimpleError::new( - 1, - translate!("truncate-error-must-specify-relative-size"), - )); + let path = Path::new(filename); + + // Get the length of the file. + let file_size = match metadata(path) { + Ok(metadata) => { + // A pipe has no length. Do this check here to avoid duplicate `stat()` syscall. + #[cfg(unix)] + if metadata.file_type().is_fifo() { + return Err(USimpleError::new( + 1, + translate!("truncate-error-cannot-open-no-device", "filename" => filename.to_string_lossy().quote()), + )); + } + metadata.len() } - Ok(m) => m, + Err(_) => 0, }; - if let TruncateMode::RoundDown(0) | TruncateMode::RoundUp(0) = mode { + // The reference size can be either: + // + // 1. The size of a given file + // 2. The size of the file to be truncated if no reference has been provided. + let actual_reference_size = reference_size.unwrap_or(file_size); + + let Some(truncate_size) = mode.to_size(actual_reference_size) else { return Err(USimpleError::new( 1, translate!("truncate-error-division-by-zero"), )); - } - - let metadata = metadata(rfilename).map_err(|e| match e.kind() { - ErrorKind::NotFound => USimpleError::new( - 1, - translate!("truncate-error-cannot-stat-no-such-file", "filename" => rfilename.quote()), - ), - _ => e.map_err_context(String::new), - })?; - - let fsize = metadata.len(); - let tsize = mode.to_size(fsize); - - for filename in filenames { - file_truncate(filename, create, tsize)?; - } + }; - Ok(()) + do_file_truncate(path, !no_create, truncate_size) } -/// Truncate files to match the size of a given reference file. -/// -/// `rfilename` is the name of the reference file. -/// -/// If `create` is true, then each file will be created if it does not -/// already exist. -/// -/// # Errors -/// -/// If any file could not be opened, or there was a problem setting -/// the size of at least one file. -/// -/// If at least one file is a named pipe (also known as a fifo). -fn truncate_reference_file_only( - rfilename: &str, +fn truncate( + no_create: bool, + _: bool, + reference: Option, + size: Option, filenames: &[OsString], - create: bool, ) -> UResult<()> { - let metadata = metadata(rfilename).map_err(|e| match e.kind() { - ErrorKind::NotFound => USimpleError::new( - 1, - translate!("truncate-error-cannot-stat-no-such-file", "filename" => rfilename.quote()), - ), - _ => e.map_err_context(String::new), - })?; - - let tsize = metadata.len(); - - for filename in filenames { - file_truncate(filename, create, tsize)?; - } + let reference_size = match reference { + Some(reference_path) => { + let reference_metadata = metadata(&reference_path).map_err(|error| match error.kind() { + ErrorKind::NotFound => USimpleError::new( + 1, + translate!("truncate-error-cannot-stat-no-such-file", "filename" => reference_path.quote()), + ), + _ => error.map_err_context(String::new), + })?; + + Some(reference_metadata.len()) + } + None => None, + }; - Ok(()) -} + let size_string = size.as_deref(); -/// Truncate files to a specified size. -/// -/// `size_string` gives either an absolute size or a relative size. A -/// relative size adjusts the size of each file relative to its current -/// size. For example, "3K" means "set each file to be three kilobytes" -/// whereas "+3K" means "set each file to be three kilobytes larger than -/// its current size". -/// -/// If `create` is true, then each file will be created if it does not -/// already exist. -/// -/// # Errors -/// -/// If any file could not be opened, or there was a problem setting -/// the size of at least one file. -/// -/// If at least one file is a named pipe (also known as a fifo). -fn truncate_size_only(size_string: &str, filenames: &[OsString], create: bool) -> UResult<()> { - let mode = parse_mode_and_size(size_string).map_err(|e| { - USimpleError::new(1, translate!("truncate-error-invalid-number", "error" => e)) - })?; + // Omitting the mode is equivalent to extending a file by 0 bytes. + let mode = match size_string { + Some(string) => match parse_mode_and_size(string) { + Err(error) => { + return Err(USimpleError::new( + 1, + translate!("truncate-error-invalid-number", "error" => error), + )); + } + Ok(mode) => mode, + }, + None => TruncateMode::Extend(0), + }; - if let TruncateMode::RoundDown(0) | TruncateMode::RoundUp(0) = mode { + // If a reference file has been given, the truncate mode cannot be absolute. + if reference_size.is_some() && mode.is_absolute() { return Err(USimpleError::new( 1, - translate!("truncate-error-division-by-zero"), + translate!("truncate-error-must-specify-relative-size"), )); } for filename in filenames { - let path = Path::new(filename); - let fsize = match metadata(path) { - Ok(m) => { - #[cfg(unix)] - if m.file_type().is_fifo() { - return Err(USimpleError::new( - 1, - translate!("truncate-error-cannot-open-no-device", "filename" => filename.to_string_lossy().quote()), - )); - } - m.len() - } - Err(_) => 0, - }; - let tsize = mode.to_size(fsize); - // TODO: Fix duplicate call to stat - file_truncate(filename, create, tsize)?; + file_truncate(no_create, reference_size, &mode, filename)?; } Ok(()) } -fn truncate( - no_create: bool, - _: bool, - reference: Option, - size: Option, - filenames: &[OsString], -) -> UResult<()> { - let create = !no_create; - - // There are four possibilities - // - reference file given and size given, - // - reference file given but no size given, - // - no reference file given but size given, - // - no reference file given and no size given, - match (reference, size) { - (Some(rfilename), Some(size_string)) => { - truncate_reference_and_size(&rfilename, &size_string, filenames, create) - } - (Some(rfilename), None) => truncate_reference_file_only(&rfilename, filenames, create), - (None, Some(size_string)) => truncate_size_only(&size_string, filenames, create), - (None, None) => unreachable!(), // this case cannot happen anymore because it's handled by clap - } -} - /// Decide whether a character is one of the size modifiers, like '+' or '<'. fn is_modifier(c: char) -> bool { c == '+' || c == '-' || c == '<' || c == '>' || c == '/' || c == '%' @@ -382,13 +306,12 @@ fn is_modifier(c: char) -> bool { /// /// # Panics /// -/// If `size_string` is empty, or if no number could be parsed from the -/// given string (for example, if the string were `"abc"`). +/// If `size_string` is empty. /// /// # Examples /// /// ```rust,ignore -/// assert_eq!(parse_mode_and_size("+123"), (TruncateMode::Extend, 123)); +/// assert_eq!(parse_mode_and_size("+123"), Ok(TruncateMode::Extend(123))); /// ``` fn parse_mode_and_size(size_string: &str) -> Result { // Trim any whitespace. @@ -432,8 +355,13 @@ mod tests { #[test] fn test_to_size() { - assert_eq!(TruncateMode::Extend(5).to_size(10), 15); - assert_eq!(TruncateMode::Reduce(5).to_size(10), 5); - assert_eq!(TruncateMode::Reduce(5).to_size(3), 0); + assert_eq!(TruncateMode::Extend(5).to_size(10), Some(15)); + assert_eq!(TruncateMode::Reduce(5).to_size(10), Some(5)); + assert_eq!(TruncateMode::Reduce(5).to_size(3), Some(0)); + assert_eq!(TruncateMode::RoundDown(4).to_size(13), Some(12)); + assert_eq!(TruncateMode::RoundDown(4).to_size(16), Some(16)); + assert_eq!(TruncateMode::RoundUp(8).to_size(10), Some(16)); + assert_eq!(TruncateMode::RoundUp(8).to_size(16), Some(16)); + assert_eq!(TruncateMode::RoundDown(0).to_size(123), None); } } diff --git a/src/uu/wc/Cargo.toml b/src/uu/wc/Cargo.toml index 144fcd083ad..ae9bb6e899b 100644 --- a/src/uu/wc/Cargo.toml +++ b/src/uu/wc/Cargo.toml @@ -18,16 +18,21 @@ workspace = true path = "src/wc.rs" [dependencies] -clap = { workspace = true } -uucore = { workspace = true, features = ["parser", "pipes", "quoting-style"] } bytecount = { workspace = true, features = ["runtime-dispatch-simd"] } +clap = { workspace = true } +fluent = { workspace = true } thiserror = { workspace = true } +uucore = { workspace = true, features = [ + "hardware", + "parser", + "pipes", + "quoting-style", +] } unicode-width = { workspace = true } -fluent = { workspace = true } [target.'cfg(unix)'.dependencies] -nix = { workspace = true } libc = { workspace = true } +nix = { workspace = true } [dev-dependencies] divan = { workspace = true } diff --git a/src/uu/wc/locales/en-US.ftl b/src/uu/wc/locales/en-US.ftl index 410eb3e6e26..52ea93291d8 100644 --- a/src/uu/wc/locales/en-US.ftl +++ b/src/uu/wc/locales/en-US.ftl @@ -31,3 +31,9 @@ decoder-error-io = underlying bytestream error: { $error } # Other messages wc-standard-input = standard input wc-total = total + +# Debug messages +wc-debug-hw-unavailable = wc: debug: hardware support unavailable on this CPU +wc-debug-hw-using = wc: debug: using hardware support (features: { $features }) +wc-debug-hw-disabled-env = wc: debug: hardware support disabled by environment +wc-debug-hw-disabled-glibc = wc: debug: hardware support disabled by GLIBC_TUNABLES ({ $features }) diff --git a/src/uu/wc/locales/fr-FR.ftl b/src/uu/wc/locales/fr-FR.ftl index e04d89fd9be..641e6846111 100644 --- a/src/uu/wc/locales/fr-FR.ftl +++ b/src/uu/wc/locales/fr-FR.ftl @@ -31,3 +31,9 @@ decoder-error-io = erreur du flux d'octets sous-jacent : { $error } # Autres messages wc-standard-input = entrée standard wc-total = total + +# Messages de débogage +wc-debug-hw-unavailable = wc : debug : prise en charge matérielle indisponible sur ce CPU +wc-debug-hw-using = wc : debug : utilisation de l'accélération matérielle (fonctions : { $features }) +wc-debug-hw-disabled-env = wc : debug : prise en charge matérielle désactivée par l'environnement +wc-debug-hw-disabled-glibc = wc : debug : prise en charge matérielle désactivée par GLIBC_TUNABLES ({ $features }) diff --git a/src/uu/wc/src/count_fast.rs b/src/uu/wc/src/count_fast.rs index 9a473401e24..d2715f3754a 100644 --- a/src/uu/wc/src/count_fast.rs +++ b/src/uu/wc/src/count_fast.rs @@ -5,6 +5,7 @@ // cSpell:ignore sysconf use crate::word_count::WordCount; +use uucore::hardware::SimdPolicy; use super::WordCountable; @@ -232,6 +233,7 @@ pub(crate) fn count_bytes_chars_and_lines_fast< ) -> (WordCount, Option) { let mut total = WordCount::default(); let buf: &mut [u8] = &mut AlignedBuffer::default().data; + let simd_allowed = SimdPolicy::detect().allows_simd(); loop { match handle.read(buf) { Ok(0) => return (total, None), @@ -240,10 +242,18 @@ pub(crate) fn count_bytes_chars_and_lines_fast< total.bytes += n; } if COUNT_CHARS { - total.chars += bytecount::num_chars(&buf[..n]); + total.chars += if simd_allowed { + bytecount::num_chars(&buf[..n]) + } else { + bytecount::naive_num_chars(&buf[..n]) + }; } if COUNT_LINES { - total.lines += bytecount::count(&buf[..n], b'\n'); + total.lines += if simd_allowed { + bytecount::count(&buf[..n], b'\n') + } else { + bytecount::naive_count(&buf[..n], b'\n') + }; } } Err(ref e) if e.kind() == ErrorKind::Interrupted => (), diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 44362e03fb4..b8b4d5134da 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -29,6 +29,7 @@ use uucore::translate; use uucore::{ error::{FromIo, UError, UResult}, format_usage, + hardware::{HardwareFeature, HasHardwareFeatures as _, SimdPolicy}, parser::shortcut_value_parser::ShortcutValueParser, quoting_style::{self, QuotingStyle}, show, @@ -49,6 +50,7 @@ struct Settings<'a> { show_lines: bool, show_words: bool, show_max_line_length: bool, + debug: bool, files0_from: Option>, total_when: TotalWhen, } @@ -62,6 +64,7 @@ impl Default for Settings<'_> { show_lines: true, show_words: true, show_max_line_length: false, + debug: false, files0_from: None, total_when: TotalWhen::default(), } @@ -85,6 +88,7 @@ impl<'a> Settings<'a> { show_lines: matches.get_flag(options::LINES), show_words: matches.get_flag(options::WORDS), show_max_line_length: matches.get_flag(options::MAX_LINE_LENGTH), + debug: matches.get_flag(options::DEBUG), files0_from, total_when, }; @@ -95,6 +99,7 @@ impl<'a> Settings<'a> { Self { files0_from: settings.files0_from, total_when, + debug: settings.debug, ..Default::default() } } @@ -122,6 +127,7 @@ mod options { pub static MAX_LINE_LENGTH: &str = "max-line-length"; pub static TOTAL: &str = "total"; pub static WORDS: &str = "words"; + pub static DEBUG: &str = "debug"; } static ARG_FILES: &str = "files"; static STDIN_REPR: &str = "-"; @@ -445,6 +451,12 @@ pub fn uu_app() -> Command { .help(translate!("wc-help-words")) .action(ArgAction::SetTrue), ) + .arg( + Arg::new(options::DEBUG) + .long(options::DEBUG) + .action(ArgAction::SetTrue) + .hide(true), + ) .arg( Arg::new(ARG_FILES) .action(ArgAction::Append) @@ -805,6 +817,17 @@ fn escape_name_wrapper(name: &OsStr) -> String { .expect("All escaped names with the escaping option return valid strings.") } +fn hardware_feature_label(feature: HardwareFeature) -> &'static str { + match feature { + HardwareFeature::Avx512 => "AVX512F", + HardwareFeature::Avx2 => "AVX2", + HardwareFeature::PclMul => "PCLMUL", + HardwareFeature::Vmull => "VMULL", + HardwareFeature::Sse2 => "SSE2", + HardwareFeature::Asimd => "ASIMD", + } +} + fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> { let mut total_word_count = WordCount::default(); let mut num_inputs: usize = 0; @@ -814,6 +837,41 @@ fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> { _ => (compute_number_width(inputs, settings), true), }; + if settings.debug { + let policy = SimdPolicy::detect(); + if policy.allows_simd() { + let enabled: Vec<&'static str> = policy + .iter_features() + .map(hardware_feature_label) + .collect(); + if enabled.is_empty() { + eprintln!("{}", translate!("wc-debug-hw-unavailable")); + } else { + eprintln!( + "{}", + translate!("wc-debug-hw-using", "features" => enabled.join(", ")) + ); + } + } else { + let disabled: Vec<&'static str> = policy + .disabled_features() + .into_iter() + .map(hardware_feature_label) + .collect(); + if disabled.is_empty() { + eprintln!("{}", translate!("wc-debug-hw-disabled-env")); + } else { + eprintln!( + "{}", + translate!( + "wc-debug-hw-disabled-glibc", + "features" => disabled.join(", ") + ) + ); + } + } + } + for maybe_input in inputs.try_iter(settings)? { num_inputs += 1; diff --git a/src/uucore/src/lib/features/entries.rs b/src/uucore/src/lib/features/entries.rs index d3796890a69..6a067e132b7 100644 --- a/src/uucore/src/lib/features/entries.rs +++ b/src/uucore/src/lib/features/entries.rs @@ -290,7 +290,7 @@ macro_rules! f { unsafe { let data = $fid(k); if !data.is_null() { - Ok($st::from_raw(ptr::read(data as *const _))) + Ok($st::from_raw(ptr::read(data.cast_const()))) } else { // FIXME: Resource limits, signals and I/O failure may // cause this too. See getpwnam(3). @@ -317,12 +317,12 @@ macro_rules! f { // f!(getgrnam, getgrgid, gid_t, Group); let data = $fnam(cstring.as_ptr()); if !data.is_null() { - return Ok($st::from_raw(ptr::read(data as *const _))); + return Ok($st::from_raw(ptr::read(data.cast_const()))); } if let Ok(id) = k.parse::<$t>() { let data = $fid(id); if !data.is_null() { - Ok($st::from_raw(ptr::read(data as *const _))) + Ok($st::from_raw(ptr::read(data.cast_const()))) } else { Err(IOError::new( ErrorKind::NotFound, diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 467f0985059..3bef0fbb1a7 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -595,14 +595,10 @@ fn eat_number(rest: &mut &[u8], index: &mut usize) -> Option { match rest[*index..].iter().position(|b| !b.is_ascii_digit()) { None | Some(0) => None, Some(i) => { - // TODO: This might need to handle errors better - // For example in case of overflow. - let parsed = std::str::from_utf8(&rest[*index..(*index + i)]) - .unwrap() - .parse() - .unwrap(); + // Handle large numbers that would cause overflow + let num_str = std::str::from_utf8(&rest[*index..(*index + i)]).unwrap(); *index += i; - Some(parsed) + Some(num_str.parse().unwrap_or(usize::MAX)) } } } diff --git a/src/uucore/src/lib/features/fsext.rs b/src/uucore/src/lib/features/fsext.rs index 78dfcceb23d..8051b2f435f 100644 --- a/src/uucore/src/lib/features/fsext.rs +++ b/src/uucore/src/lib/features/fsext.rs @@ -347,19 +347,19 @@ impl From for MountInfo { fn from(statfs: StatFs) -> Self { let dev_name = unsafe { // spell-checker:disable-next-line - CStr::from_ptr(&statfs.f_mntfromname[0]) + CStr::from_ptr(statfs.f_mntfromname.as_ptr()) .to_string_lossy() .into_owned() }; let fs_type = unsafe { // spell-checker:disable-next-line - CStr::from_ptr(&statfs.f_fstypename[0]) + CStr::from_ptr(statfs.f_fstypename.as_ptr()) .to_string_lossy() .into_owned() }; let mount_dir_bytes = unsafe { // spell-checker:disable-next-line - CStr::from_ptr(&statfs.f_mntonname[0]).to_bytes() + CStr::from_ptr(statfs.f_mntonname.as_ptr()).to_bytes() }; let mount_dir = os_str_from_bytes(mount_dir_bytes).unwrap().into_owned(); @@ -506,7 +506,7 @@ pub fn read_fs_list() -> UResult> { ))] { let mut mount_buffer_ptr: *mut StatFs = ptr::null_mut(); - let len = unsafe { get_mount_info(&mut mount_buffer_ptr, 1_i32) }; + let len = unsafe { get_mount_info(&raw mut mount_buffer_ptr, 1_i32) }; if len < 0 { return Err(USimpleError::new(1, "get_mount_info() failed")); } @@ -668,10 +668,10 @@ impl FsUsage { let path = to_nul_terminated_wide_string(path); GetDiskFreeSpaceW( path.as_ptr(), - &mut sectors_per_cluster, - &mut bytes_per_sector, - &mut number_of_free_clusters, - &mut total_number_of_clusters, + &raw mut sectors_per_cluster, + &raw mut bytes_per_sector, + &raw mut number_of_free_clusters, + &raw mut total_number_of_clusters, ); } @@ -881,7 +881,7 @@ impl FsMeta for StatFs { fn fsid(&self) -> u64 { // Use type inference to determine the type of f_fsid // (libc::__fsid_t on Android, libc::fsid_t on other platforms) - let f_fsid: &[u32; 2] = unsafe { &*(&raw const self.f_fsid as *const [u32; 2]) }; + let f_fsid: &[u32; 2] = unsafe { &*(&raw const self.f_fsid).cast() }; ((u64::from(f_fsid[0])) << 32) | u64::from(f_fsid[1]) } #[cfg(not(any( @@ -932,7 +932,7 @@ pub fn statfs(path: &OsStr) -> Result { Ok(p) => { let mut buffer: StatFs = unsafe { mem::zeroed() }; unsafe { - match statfs_fn(p.as_ptr(), &mut buffer) { + match statfs_fn(p.as_ptr(), &raw mut buffer) { 0 => Ok(buffer), _ => { let errno = IOError::last_os_error().raw_os_error().unwrap_or(0); diff --git a/src/uucore/src/lib/features/systemd_logind.rs b/src/uucore/src/lib/features/systemd_logind.rs index 0e599cfe5d8..d34e8cc1729 100644 --- a/src/uucore/src/lib/features/systemd_logind.rs +++ b/src/uucore/src/lib/features/systemd_logind.rs @@ -53,7 +53,7 @@ mod login { pub fn get_sessions() -> Result, Box> { let mut sessions_ptr: *mut *mut libc::c_char = ptr::null_mut(); - let result = unsafe { ffi::sd_get_sessions(&mut sessions_ptr) }; + let result = unsafe { ffi::sd_get_sessions(&raw mut sessions_ptr) }; if result < 0 { return Err(format!("sd_get_sessions failed: {result}").into()); @@ -71,11 +71,11 @@ mod login { let session_cstr = unsafe { CStr::from_ptr(session_ptr) }; sessions.push(session_cstr.to_string_lossy().into_owned()); - unsafe { libc::free(session_ptr as *mut libc::c_void) }; + unsafe { libc::free(session_ptr.cast()) }; i += 1; } - unsafe { libc::free(sessions_ptr as *mut libc::c_void) }; + unsafe { libc::free(sessions_ptr.cast()) }; } Ok(sessions) @@ -86,7 +86,7 @@ mod login { let session_cstring = CString::new(session_id)?; let mut uid: std::os::raw::c_uint = 0; - let result = unsafe { ffi::sd_session_get_uid(session_cstring.as_ptr(), &mut uid) }; + let result = unsafe { ffi::sd_session_get_uid(session_cstring.as_ptr(), &raw mut uid) }; if result < 0 { return Err( @@ -102,7 +102,8 @@ mod login { let session_cstring = CString::new(session_id)?; let mut usec: u64 = 0; - let result = unsafe { ffi::sd_session_get_start_time(session_cstring.as_ptr(), &mut usec) }; + let result = + unsafe { ffi::sd_session_get_start_time(session_cstring.as_ptr(), &raw mut usec) }; if result < 0 { return Err(format!( @@ -119,7 +120,7 @@ mod login { let session_cstring = CString::new(session_id)?; let mut tty_ptr: *mut libc::c_char = ptr::null_mut(); - let result = unsafe { ffi::sd_session_get_tty(session_cstring.as_ptr(), &mut tty_ptr) }; + let result = unsafe { ffi::sd_session_get_tty(session_cstring.as_ptr(), &raw mut tty_ptr) }; if result < 0 { return Err( @@ -134,7 +135,7 @@ mod login { let tty_cstr = unsafe { CStr::from_ptr(tty_ptr) }; let tty_string = tty_cstr.to_string_lossy().into_owned(); - unsafe { libc::free(tty_ptr as *mut libc::c_void) }; + unsafe { libc::free(tty_ptr.cast()) }; Ok(Some(tty_string)) } @@ -147,7 +148,7 @@ mod login { let mut host_ptr: *mut libc::c_char = ptr::null_mut(); let result = - unsafe { ffi::sd_session_get_remote_host(session_cstring.as_ptr(), &mut host_ptr) }; + unsafe { ffi::sd_session_get_remote_host(session_cstring.as_ptr(), &raw mut host_ptr) }; if result < 0 { return Err(format!( @@ -163,7 +164,7 @@ mod login { let host_cstr = unsafe { CStr::from_ptr(host_ptr) }; let host_string = host_cstr.to_string_lossy().into_owned(); - unsafe { libc::free(host_ptr as *mut libc::c_void) }; + unsafe { libc::free(host_ptr.cast()) }; Ok(Some(host_string)) } @@ -176,7 +177,7 @@ mod login { let mut display_ptr: *mut libc::c_char = ptr::null_mut(); let result = - unsafe { ffi::sd_session_get_display(session_cstring.as_ptr(), &mut display_ptr) }; + unsafe { ffi::sd_session_get_display(session_cstring.as_ptr(), &raw mut display_ptr) }; if result < 0 { return Err(format!( @@ -192,7 +193,7 @@ mod login { let display_cstr = unsafe { CStr::from_ptr(display_ptr) }; let display_string = display_cstr.to_string_lossy().into_owned(); - unsafe { libc::free(display_ptr as *mut libc::c_void) }; + unsafe { libc::free(display_ptr.cast()) }; Ok(Some(display_string)) } @@ -204,7 +205,8 @@ mod login { let session_cstring = CString::new(session_id)?; let mut type_ptr: *mut libc::c_char = ptr::null_mut(); - let result = unsafe { ffi::sd_session_get_type(session_cstring.as_ptr(), &mut type_ptr) }; + let result = + unsafe { ffi::sd_session_get_type(session_cstring.as_ptr(), &raw mut type_ptr) }; if result < 0 { return Err( @@ -219,7 +221,7 @@ mod login { let type_cstr = unsafe { CStr::from_ptr(type_ptr) }; let type_string = type_cstr.to_string_lossy().into_owned(); - unsafe { libc::free(type_ptr as *mut libc::c_void) }; + unsafe { libc::free(type_ptr.cast()) }; Ok(Some(type_string)) } @@ -231,7 +233,8 @@ mod login { let session_cstring = CString::new(session_id)?; let mut seat_ptr: *mut libc::c_char = ptr::null_mut(); - let result = unsafe { ffi::sd_session_get_seat(session_cstring.as_ptr(), &mut seat_ptr) }; + let result = + unsafe { ffi::sd_session_get_seat(session_cstring.as_ptr(), &raw mut seat_ptr) }; if result < 0 { return Err( @@ -246,7 +249,7 @@ mod login { let seat_cstr = unsafe { CStr::from_ptr(seat_ptr) }; let seat_string = seat_cstr.to_string_lossy().into_owned(); - unsafe { libc::free(seat_ptr as *mut libc::c_void) }; + unsafe { libc::free(seat_ptr.cast()) }; Ok(Some(seat_string)) } @@ -373,9 +376,9 @@ pub fn read_login_records() -> UResult> { let ret = libc::getpwuid_r( uid, passwd.as_mut_ptr(), - buf.as_mut_ptr() as *mut libc::c_char, + buf.as_mut_ptr().cast(), buf.len(), - &mut result, + &raw mut result, ); if ret == 0 && !result.is_null() { diff --git a/src/uucore/src/lib/features/uptime.rs b/src/uucore/src/lib/features/uptime.rs index c278ff21f39..e29e2d17c00 100644 --- a/src/uucore/src/lib/features/uptime.rs +++ b/src/uucore/src/lib/features/uptime.rs @@ -62,10 +62,9 @@ pub fn get_uptime(_boot_time: Option) -> UResult { tv_sec: 0, tv_nsec: 0, }; - let raw_tp = &mut tp as *mut timespec; // OpenBSD prototype: clock_gettime(clk_id: ::clockid_t, tp: *mut ::timespec) -> ::c_int; - let ret: c_int = unsafe { clock_gettime(CLOCK_BOOTTIME, raw_tp) }; + let ret: c_int = unsafe { clock_gettime(CLOCK_BOOTTIME, &raw mut tp) }; if ret == 0 { #[cfg(target_pointer_width = "64")] diff --git a/src/uucore/src/lib/features/utmpx.rs b/src/uucore/src/lib/features/utmpx.rs index 3c18cc16fe0..8832caff3ba 100644 --- a/src/uucore/src/lib/features/utmpx.rs +++ b/src/uucore/src/lib/features/utmpx.rs @@ -525,7 +525,7 @@ impl Iterator for UtmpxIter { // All the strings live inline in the struct as arrays, which // makes things easier. Some(UtmpxRecord::Traditional(Box::new(Utmpx { - inner: ptr::read(res as *const _), + inner: ptr::read(res.cast_const()), }))) } } diff --git a/src/uucore/src/lib/mods/locale.rs b/src/uucore/src/lib/mods/locale.rs index 559dc72ef14..cd2a54343b1 100644 --- a/src/uucore/src/lib/mods/locale.rs +++ b/src/uucore/src/lib/mods/locale.rs @@ -264,8 +264,7 @@ fn create_english_bundle_from_embedded( fn get_message_internal(id: &str, args: Option) -> String { LOCALIZER.with(|lock| { lock.get() - .map(|loc| loc.format(id, args.as_ref())) - .unwrap_or_else(|| id.to_string()) // Return the key ID if localizer not initialized + .map_or_else(|| id.to_string(), |loc| loc.format(id, args.as_ref())) // Return the key ID if localizer not initialized }) } diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index 9d59efd58a7..bd1c31cc1a9 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -293,6 +293,27 @@ fn test_date_set_permissions_error() { } } +#[test] +#[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))] +fn test_date_set_hyphen_prefixed_values() { + // test -s flag accepts hyphen-prefixed values like "-3 days" + if !(geteuid() == 0 || uucore::os::is_wsl_1()) { + let test_cases = vec!["-1 hour", "-2 days", "-3 weeks", "-1 month"]; + + for date_str in test_cases { + let result = new_ucmd!().arg("--set").arg(date_str).fails(); + result.no_stdout(); + // permission error, not argument parsing error + assert!( + result.stderr_str().starts_with("date: cannot set date: "), + "Expected permission error for '{}', but got: {}", + date_str, + result.stderr_str() + ); + } + } +} + #[test] #[cfg(target_os = "macos")] fn test_date_set_mac_unavailable() { @@ -1071,3 +1092,58 @@ fn test_date_military_timezone_with_offset_variations() { .stdout_is(format!("{expected}\n")); } } + +// Locale-aware hour formatting tests +#[test] +#[cfg(unix)] +fn test_date_locale_hour_c_locale() { + // C locale should use 24-hour format + new_ucmd!() + .env("LC_ALL", "C") + .env("TZ", "UTC") + .arg("-d") + .arg("2025-10-11T13:00") + .succeeds() + .stdout_contains("13:00"); +} + +#[test] +#[cfg(any( + target_os = "linux", + target_vendor = "apple", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "dragonfly" +))] +fn test_date_locale_hour_en_us() { + // en_US locale typically uses 12-hour format when available + // Note: If locale is not installed on system, falls back to C locale (24-hour) + let result = new_ucmd!() + .env("LC_ALL", "en_US.UTF-8") + .env("TZ", "UTC") + .arg("-d") + .arg("2025-10-11T13:00") + .succeeds(); + + let stdout = result.stdout_str(); + // Accept either 12-hour (if locale available) or 24-hour (if locale unavailable) + // The important part is that the code doesn't crash and handles locale detection gracefully + assert!( + stdout.contains("1:00") || stdout.contains("13:00"), + "date output should contain either 1:00 (12-hour) or 13:00 (24-hour), got: {stdout}" + ); +} + +#[test] +fn test_date_explicit_format_overrides_locale() { + // Explicit format should override locale preferences + new_ucmd!() + .env("LC_ALL", "en_US.UTF-8") + .env("TZ", "UTC") + .arg("-d") + .arg("2025-10-11T13:00") + .arg("+%H:%M") + .succeeds() + .stdout_is("13:00\n"); +} diff --git a/tests/by-util/test_kill.rs b/tests/by-util/test_kill.rs index 5fb8fb31219..aad1982d686 100644 --- a/tests/by-util/test_kill.rs +++ b/tests/by-util/test_kill.rs @@ -395,3 +395,27 @@ fn test_kill_with_signal_and_table() { .arg("-t") .fails(); } + +/// Test that `kill -1` (signal without PID) reports "no process ID" error +/// instead of being misinterpreted as pid=-1 which would kill all processes. +/// This matches GNU kill behavior. +#[test] +fn test_kill_signal_only_no_pid() { + // Test with -1 (SIGHUP) + new_ucmd!() + .arg("-1") + .fails() + .stderr_contains("no process ID specified"); + + // Test with -9 (SIGKILL) + new_ucmd!() + .arg("-9") + .fails() + .stderr_contains("no process ID specified"); + + // Test with -TERM + new_ucmd!() + .arg("-TERM") + .fails() + .stderr_contains("no process ID specified"); +} diff --git a/tests/by-util/test_nl.rs b/tests/by-util/test_nl.rs index ab430b20bcc..dab5cc47f92 100644 --- a/tests/by-util/test_nl.rs +++ b/tests/by-util/test_nl.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // -// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid nabcabc nabcabcabc ninvalid vinvalid winvalid dabc näää +// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid nabcabc nabcabcabc ninvalid vinvalid winvalid dabc näää févr use uutests::{at_and_ucmd, new_ucmd, util::TestScenario, util_name}; #[test] @@ -209,23 +209,24 @@ fn test_number_separator() { #[test] #[cfg(target_os = "linux")] fn test_number_separator_non_utf8() { - use std::{ - ffi::{OsStr, OsString}, - os::unix::ffi::{OsStrExt, OsStringExt}, - }; + use std::{ffi::OsString, os::unix::ffi::OsStringExt}; let separator_bytes = [0xFF, 0xFE]; let mut v = b"--number-separator=".to_vec(); v.extend_from_slice(&separator_bytes); let arg = OsString::from_vec(v); - let separator = OsStr::from_bytes(&separator_bytes); + + // Raw bytes should be preserved in the separator output + let mut expected = b" 1".to_vec(); + expected.extend_from_slice(&separator_bytes); + expected.extend_from_slice(b"test\n"); new_ucmd!() .arg(arg) .pipe_in("test") .succeeds() - .stdout_is(format!(" 1{}test\n", separator.to_string_lossy())); + .stdout_is_bytes(expected); } #[test] @@ -791,14 +792,24 @@ fn test_file_with_non_utf8_content() { let filename = "file"; let content: &[u8] = b"a\n\xFF\xFE\nb"; - let invalid_utf8: &[u8] = b"\xFF\xFE"; at.write_bytes(filename, content); - ucmd.arg(filename).succeeds().stdout_is(format!( - " 1\ta\n 2\t{}\n 3\tb\n", - String::from_utf8_lossy(invalid_utf8) - )); + // Raw bytes should be preserved in output (not converted to UTF-8 replacement chars) + let expected: Vec = b" 1\ta\n 2\t\xFF\xFE\n 3\tb\n".to_vec(); + ucmd.arg(filename).succeeds().stdout_is_bytes(expected); +} + +#[test] +fn test_stdin_non_utf8_preserved() { + // Verify that non-UTF8 bytes are preserved in output, not converted to replacement chars + // This is important for locale compatibility + let input: Vec = b"f\xe9vr.\n".to_vec(); // "févr." in Latin-1 + let expected: Vec = b" 1\tf\xe9vr.\n".to_vec(); + new_ucmd!() + .pipe_in(input) + .succeeds() + .stdout_is_bytes(expected); } // Regression tests for issue #9132: repeated flags should use last value diff --git a/tests/by-util/test_nohup.rs b/tests/by-util/test_nohup.rs index 2349b2dc2a8..f3fa0bc948c 100644 --- a/tests/by-util/test_nohup.rs +++ b/tests/by-util/test_nohup.rs @@ -14,8 +14,17 @@ use uutests::util_name; // All that can be tested is the side-effects. #[test] -fn test_invalid_arg() { - new_ucmd!().arg("--definitely-invalid").fails_with_code(125); +fn test_nohup_exit_codes() { + // No args: 125 default, 127 with POSIXLY_CORRECT + new_ucmd!().fails_with_code(125); + new_ucmd!().env("POSIXLY_CORRECT", "1").fails_with_code(127); + + // Invalid arg: 125 default, 127 with POSIXLY_CORRECT + new_ucmd!().arg("--invalid").fails_with_code(125); + new_ucmd!() + .env("POSIXLY_CORRECT", "1") + .arg("--invalid") + .fails_with_code(127); } #[test] diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 6bfcecbb40c..21e638f7c26 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -1482,3 +1482,13 @@ fn test_large_width_format() { .stdout_is(""); } } + +#[test] +fn test_extreme_field_width_overflow() { + // Test the specific case that was causing panic due to integer overflow + // in the field width parsing. + new_ucmd!() + .args(&["%999999999999999999999999d", "1"]) + .fails_with_code(1) + .stderr_only("printf: write error\n"); +} diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index fa861a4c3f2..d1266e09d5c 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -808,3 +808,69 @@ fn wc_w_words_with_emoji_separator() { .succeeds() .stdout_contains("3"); } + +#[cfg(unix)] +#[test] +fn test_simd_respects_glibc_tunables() { + // Ensure debug output reflects that SIMD paths are disabled via GLIBC_TUNABLES + let debug_output = new_ucmd!() + .args(&["-l", "--debug", "/dev/null"]) + .env("GLIBC_TUNABLES", "glibc.cpu.hwcaps=-AVX2,-AVX512F") + .succeeds() + .stderr_str() + .to_string(); + assert!( + !debug_output.contains("using hardware support"), + "SIMD should be reported as disabled when GLIBC_TUNABLES blocks AVX features: {debug_output}" + ); + assert!( + debug_output.contains("hardware support disabled"), + "Debug output should acknowledge GLIBC_TUNABLES restrictions: {debug_output}" + ); + + // WC results should be identical with and without GLIBC_TUNABLES overrides + let sample_sizes = [0usize, 1, 7, 128, 513, 999]; + use std::fmt::Write as _; + for &lines in &sample_sizes { + let content: String = (0..lines).fold(String::new(), |mut acc, i| { + // Build the input buffer efficiently without allocating per line. + let _ = writeln!(acc, "{i}"); + acc + }); + + let base = new_ucmd!() + .arg("-l") + .pipe_in(content.clone()) + .succeeds() + .stdout_str() + .trim() + .to_string(); + + let no_avx512 = new_ucmd!() + .arg("-l") + .env("GLIBC_TUNABLES", "glibc.cpu.hwcaps=-AVX512F") + .pipe_in(content.clone()) + .succeeds() + .stdout_str() + .trim() + .to_string(); + + let no_avx2_avx512 = new_ucmd!() + .arg("-l") + .env("GLIBC_TUNABLES", "glibc.cpu.hwcaps=-AVX2,-AVX512F") + .pipe_in(content) + .succeeds() + .stdout_str() + .trim() + .to_string(); + + assert_eq!( + base, no_avx512, + "Line counts should not change when AVX512 is disabled (lines={lines})" + ); + assert_eq!( + base, no_avx2_avx512, + "Line counts should not change when AVX2/AVX512 are disabled (lines={lines})" + ); + } +} diff --git a/tests/uutests/src/lib/util.rs b/tests/uutests/src/lib/util.rs index 108a2b056f4..5c5ed3ef401 100644 --- a/tests/uutests/src/lib/util.rs +++ b/tests/uutests/src/lib/util.rs @@ -1147,7 +1147,7 @@ impl AtPath { unsafe { let name = CString::new(self.plus_as_string(fifo)).unwrap(); let mut stat: libc::stat = std::mem::zeroed(); - if libc::stat(name.as_ptr(), &mut stat) >= 0 { + if libc::stat(name.as_ptr(), &raw mut stat) >= 0 { libc::S_IFIFO & stat.st_mode as libc::mode_t != 0 } else { false @@ -1160,7 +1160,7 @@ impl AtPath { unsafe { let name = CString::new(self.plus_as_string(char_dev)).unwrap(); let mut stat: libc::stat = std::mem::zeroed(); - if libc::stat(name.as_ptr(), &mut stat) >= 0 { + if libc::stat(name.as_ptr(), &raw mut stat) >= 0 { libc::S_IFCHR & stat.st_mode as libc::mode_t != 0 } else { false diff --git a/util/build-gnu.sh b/util/build-gnu.sh index 626400d6a57..25ff4cc6a23 100755 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -3,16 +3,15 @@ # # spell-checker:ignore (paths) abmon deref discrim eacces getlimits getopt ginstall inacc infloop inotify reflink ; (misc) INT_OFLOW OFLOW -# spell-checker:ignore baddecode submodules xstrtol distros ; (vars/env) SRCDIR vdir rcexp xpart dired OSTYPE ; (utils) gnproc greadlink gsed multihardlink texinfo CARGOFLAGS +# spell-checker:ignore baddecode submodules xstrtol distros ; (vars/env) SRCDIR vdir rcexp xpart dired OSTYPE ; (utils) greadlink gsed multihardlink texinfo CARGOFLAGS # spell-checker:ignore openat TOCTOU CFLAGS # spell-checker:ignore hfsplus casefold chattr set -e -# Use system's GNU version for make, nproc, readlink and sed on *BSD and macOS +# Use GNU make, readlink and sed on *BSD and macOS MAKE=$(command -v gmake||command -v make) -NPROC=$(command -v gnproc||command -v nproc) -READLINK=$(command -v greadlink||command -v readlink) +READLINK=$(command -v greadlink||command -v readlink) # Use our readlink to remove a dependency SED=$(command -v gsed||command -v sed) SYSTEM_TIMEOUT=$(command -v timeout) @@ -34,18 +33,13 @@ path_GNU="$("${READLINK}" -fm -- "${path_GNU:-${path_UUTILS}/../gnu}")" ### -release_tag_GNU="v9.9" - # check if the GNU coreutils has been cloned, if not print instructions -# note: the ${path_GNU} might already exist, so we check for the .git directory -if test ! -d "${path_GNU}/.git"; then +# note: the ${path_GNU} might already exist, so we check for the configure +if test ! -f "${path_GNU}/configure"; then echo "Could not find the GNU coreutils (expected at '${path_GNU}')" echo "Download them to the expected path:" - echo " git clone --recurse-submodules https://github.com/coreutils/coreutils.git \"${path_GNU}\"" - echo "Afterwards, checkout the latest release tag:" - echo " cd \"${path_GNU}\"" - echo " git fetch --all --tags" - echo " git checkout tags/${release_tag_GNU}" + echo " (cd '${path_GNU}' && fetch-gnu.sh ) " + echo "You can edit fetch-gnu.sh to change the tag" exit 1 fi @@ -111,7 +105,8 @@ test -f "${UU_BUILD_DIR}/[" || (cd ${UU_BUILD_DIR} && ln -s "test" "[") cd "${path_GNU}" && echo "[ pwd:'${PWD}' ]" -# Any binaries that aren't built become `false` so their tests fail +# Any binaries that aren't built become `false` to make tests failure +# Note that some test (e.g. runcon/runcon-compute.sh) incorrectly passes by this for binary in $(./build-aux/gen-lists-of-programs.sh --list-progs); do bin_path="${UU_BUILD_DIR}/${binary}" test -f "${bin_path}" || { @@ -126,22 +121,24 @@ done if test -f gnu-built; then echo "GNU build already found. Skip" - echo "'rm -f $(pwd)/gnu-built' to force the build" + echo "'rm -f $(pwd)/{gnu-built,src/getlimits}' to force the build" echo "Note: the customization of the tests will still happen" else # Disable useless checks "${SED}" -i 's|check-texinfo: $(syntax_checks)|check-texinfo:|' doc/local.mk - "${SED}" -i '/^wget.*/d' bootstrap.conf # wget is used to DL po. Remove the dep. - ./bootstrap --skip-po # Use CFLAGS for best build time since we discard GNU coreutils - CFLAGS="${CFLAGS} -pipe -O0 -s" ./configure --quiet --disable-gcc-warnings --disable-nls --disable-dependency-tracking --disable-bold-man-page-references \ + CFLAGS="${CFLAGS} -pipe -O0 -s" ./configure -C --quiet --disable-gcc-warnings --disable-nls --disable-dependency-tracking --disable-bold-man-page-references \ --enable-single-binary=symlinks \ "$([ "${SELINUX_ENABLED}" = 1 ] && echo --with-selinux || echo --without-selinux)" #Add timeout to to protect against hangs "${SED}" -i 's|^"\$@|'"${SYSTEM_TIMEOUT}"' 600 "\$@|' build-aux/test-driver # Use a better diff "${SED}" -i 's|diff -c|diff -u|g' tests/Coreutils.pm - "${MAKE}" -j "$("${NPROC}")" + + # Skip make if possible + # Use our nproc for *BSD and macOS + test -f src/getlimits || "${MAKE}" -j "$("${UU_BUILD_DIR}/nproc")" + cp -f src/getlimits "${UU_BUILD_DIR}" # Handle generated factor tests t_first=00 @@ -170,14 +167,16 @@ grep -rl 'path_prepend_' tests/* | xargs -r "${SED}" -i 's| path_prepend_ ./src| # path_prepend_ sets $abs_path_dir_: set it manually instead. grep -rl '\$abs_path_dir_' tests/*/*.sh | xargs -r "${SED}" -i "s|\$abs_path_dir_|${UU_BUILD_DIR//\//\\/}|g" +# We can't build runcon and chcon without libselinux. But GNU no longer builds dummies of them. So consider they are SELinux specific. +"${SED}" -i 's/^print_ver_.*/require_selinux_/' tests/runcon/runcon-compute.sh +"${SED}" -i 's/^print_ver_.*/require_selinux_/' tests/runcon/runcon-no-reorder.sh +"${SED}" -i 's/^print_ver_.*/require_selinux_/' tests/chcon/chcon-fail.sh + # We use coreutils yes "${SED}" -i "s|--coreutils-prog=||g" tests/misc/coreutils.sh # Different message "${SED}" -i "s|coreutils: unknown program 'blah'|blah: function/utility not found|" tests/misc/coreutils.sh -# Remove hfs dependency (should be merged to upstream) -"${SED}" -i -e "s|hfsplus|ext4 -O casefold|" -e "s|cd mnt|rm -d mnt/lost+found;chattr +F mnt;cd mnt|" tests/mv/hardlink-case.sh - # Use the system coreutils where the test fails due to error in a util that is not the one being tested "${SED}" -i "s|grep '^#define HAVE_CAP 1' \$CONFIG_HEADER > /dev/null|true|" tests/ls/capability.sh @@ -223,7 +222,11 @@ sed -i -e "s|---dis ||g" tests/tail/overlay-headers.sh # Do not FAIL, just do a regular ERROR "${SED}" -i -e "s|framework_failure_ 'no inotify_add_watch';|fail=1;|" tests/tail/inotify-rotate-resources.sh -test -f "${UU_BUILD_DIR}/getlimits" || cp src/getlimits "${UU_BUILD_DIR}" +# The notify crate makes inotify_add_watch calls in a background thread, so strace needs -f to follow threads. +# Also remove the HAVE_INOTIFY header check since that's for C builds. +"${SED}" -i -e "s|grep '^#define HAVE_INOTIFY 1' \"\$CONFIG_HEADER\" >/dev/null && is_local_dir_ \. |is_local_dir_ . |" \ + -e "s|strace -e inotify_add_watch|strace -f -e inotify_add_watch|" \ + tests/tail/inotify-dir-recreate.sh # pr produces very long log and this command isn't super interesting # SKIP for now diff --git a/util/fetch-gnu.sh b/util/fetch-gnu.sh new file mode 100755 index 00000000000..927d85949e8 --- /dev/null +++ b/util/fetch-gnu.sh @@ -0,0 +1,9 @@ +#!/bin/bash -e +ver="9.9" +repo=https://github.com/coreutils/coreutils +curl -L "${repo}/releases/download/v${ver}/coreutils-${ver}.tar.xz" | tar --strip-components=1 -xJf - + +# backport from coreutils > 9.9 +curl ${repo}/raw/refs/heads/master/tests/mv/hardlink-case.sh > tests/mv/hardlink-case.sh +curl ${repo}/raw/refs/heads/master/tests/mkdir/writable-under-readonly.sh > tests/mkdir/writable-under-readonly.sh +curl ${repo}/raw/refs/heads/master/tests/cp/cp-mv-enotsup-xattr.sh > tests/cp/cp-mv-enotsup-xattr.sh #spell-checker:disable-line diff --git a/util/run-gnu-test.sh b/util/run-gnu-test.sh index 43eb25f66c5..23d78ca625a 100755 --- a/util/run-gnu-test.sh +++ b/util/run-gnu-test.sh @@ -2,24 +2,14 @@ # `run-gnu-test.bash [TEST]` # run GNU test (or all tests if TEST is missing/null) -# spell-checker:ignore (env/vars) GNULIB SRCDIR SUBDIRS OSTYPE ; (utils) shellcheck gnproc greadlink +# spell-checker:ignore (env/vars) GNULIB SRCDIR SUBDIRS OSTYPE MAKEFLAGS; (utils) shellcheck greadlink # ref: [How the GNU coreutils are tested](https://www.pixelbeat.org/docs/coreutils-testing.html) @@ # * note: to run a single test => `make check TESTS=PATH/TO/TEST/SCRIPT SUBDIRS=. VERBOSE=yes` -# Use GNU version for make, nproc, readlink on *BSD -case "$OSTYPE" in - *bsd*) - MAKE="gmake" - NPROC="gnproc" - READLINK="greadlink" - ;; - *) - MAKE="make" - NPROC="nproc" - READLINK="readlink" - ;; -esac +# Use GNU make, readlink on *BSD +MAKE=$(command -v gmake||command -v make) +READLINK=$(command -v greadlink||command -v readlink) # Use our readlink to remove a dependency ME_dir="$(dirname -- "$("${READLINK}" -fm -- "$0")")" REPO_main_dir="$(dirname -- "${ME_dir}")" @@ -37,6 +27,10 @@ path_GNU="$("${READLINK}" -fm -- "${path_GNU:-${path_UUTILS}/../gnu}")" echo "path_UUTILS='${path_UUTILS}'" echo "path_GNU='${path_GNU}'" +# Use GNU nproc for *BSD +NPROC=$(command -v ${path_GNU}/src/nproc||command -v nproc) +MAKEFLAGS="${MAKEFLAGS} -j ${NPROC}" +export MAKEFLAGS ### cd "${path_GNU}" && echo "[ pwd:'${PWD}' ]" @@ -71,7 +65,7 @@ elif [[ "$1" == "run-root" && "$has_selinux_tests" == true ]]; then if test -n "$CI"; then echo "Running SELinux tests as root" # Don't use check-root here as the upstream root tests is hardcoded - sudo "${MAKE}" -j "$("${NPROC}")" check TESTS="$*" SUBDIRS=. RUN_EXPENSIVE_TESTS=yes RUN_VERY_EXPENSIVE_TESTS=yes VERBOSE=no gl_public_submodule_commit="" srcdir="${path_GNU}" TEST_SUITE_LOG="tests/test-suite-root.log" || : + sudo "${MAKE}" check TESTS="$*" SUBDIRS=. RUN_EXPENSIVE_TESTS=yes RUN_VERY_EXPENSIVE_TESTS=yes VERBOSE=no gl_public_submodule_commit="" srcdir="${path_GNU}" TEST_SUITE_LOG="tests/test-suite-root.log" || : fi exit 0 elif test "$1" != "run-root" && test "$1" != "run-tty"; then @@ -105,9 +99,9 @@ fi if test "$1" != "run-root" && test "$1" != "run-tty"; then # run the regular tests if test $# -ge 1; then - timeout -sKILL 4h "${MAKE}" -j "$("${NPROC}")" check TESTS="$SPECIFIC_TESTS" SUBDIRS=. RUN_EXPENSIVE_TESTS=yes RUN_VERY_EXPENSIVE_TESTS=yes VERBOSE=no gl_public_submodule_commit="" srcdir="${path_GNU}" || : # Kill after 4 hours in case something gets stuck in make + timeout -sKILL 4h "${MAKE}" check TESTS="$SPECIFIC_TESTS" SUBDIRS=. RUN_EXPENSIVE_TESTS=yes RUN_VERY_EXPENSIVE_TESTS=yes VERBOSE=no gl_public_submodule_commit="" srcdir="${path_GNU}" || : # Kill after 4 hours in case something gets stuck in make else - timeout -sKILL 4h "${MAKE}" -j "$("${NPROC}")" check SUBDIRS=. RUN_EXPENSIVE_TESTS=yes RUN_VERY_EXPENSIVE_TESTS=yes VERBOSE=no gl_public_submodule_commit="" srcdir="${path_GNU}" || : # Kill after 4 hours in case something gets stuck in make + timeout -sKILL 4h "${MAKE}" check SUBDIRS=. RUN_EXPENSIVE_TESTS=yes RUN_VERY_EXPENSIVE_TESTS=yes VERBOSE=no gl_public_submodule_commit="" srcdir="${path_GNU}" || : # Kill after 4 hours in case something gets stuck in make fi else # in case we would like to run tests requiring root @@ -115,10 +109,10 @@ else if test -n "$CI"; then if test $# -ge 2; then echo "Running check-root to run only root tests" - sudo "${MAKE}" -j "$("${NPROC}")" check-root TESTS="$2" SUBDIRS=. RUN_EXPENSIVE_TESTS=yes RUN_VERY_EXPENSIVE_TESTS=yes VERBOSE=no gl_public_submodule_commit="" srcdir="${path_GNU}" TEST_SUITE_LOG="tests/test-suite-root.log" || : + sudo "${MAKE}" check-root TESTS="$2" SUBDIRS=. RUN_EXPENSIVE_TESTS=yes RUN_VERY_EXPENSIVE_TESTS=yes VERBOSE=no gl_public_submodule_commit="" srcdir="${path_GNU}" TEST_SUITE_LOG="tests/test-suite-root.log" || : else echo "Running check-root to run only root tests" - sudo "${MAKE}" -j "$("${NPROC}")" check-root SUBDIRS=. RUN_EXPENSIVE_TESTS=yes RUN_VERY_EXPENSIVE_TESTS=yes VERBOSE=no gl_public_submodule_commit="" srcdir="${path_GNU}" TEST_SUITE_LOG="tests/test-suite-root.log" || : + sudo "${MAKE}" check-root SUBDIRS=. RUN_EXPENSIVE_TESTS=yes RUN_VERY_EXPENSIVE_TESTS=yes VERBOSE=no gl_public_submodule_commit="" srcdir="${path_GNU}" TEST_SUITE_LOG="tests/test-suite-root.log" || : fi fi fi diff --git a/util/why-error.md b/util/why-error.md index 137e189ad81..04039e34e2a 100644 --- a/util/why-error.md +++ b/util/why-error.md @@ -13,18 +13,15 @@ This file documents why some GNU tests are failing: * ls/ls-misc.pl * ls/stat-free-symlinks.sh * misc/close-stdout.sh -* misc/nohup.sh * numfmt/numfmt.pl - https://github.com/uutils/coreutils/issues/7219 / https://github.com/uutils/coreutils/issues/7221 * misc/stdbuf.sh - https://github.com/uutils/coreutils/issues/7072 * misc/tsort.pl - https://github.com/uutils/coreutils/issues/7074 * misc/write-errors.sh -* od/od-float.sh * ptx/ptx-overrun.sh * ptx/ptx.pl * rm/one-file-system.sh - https://github.com/uutils/coreutils/issues/7011 * rm/rm1.sh - https://github.com/uutils/coreutils/issues/9479 -* shred/shred-passes.sh -* sort/sort-continue.sh +* shred/shred-passes.sh - https://github.com/uutils/coreutils/pull/9317 * sort/sort-debug-keys.sh * sort/sort-debug-warn.sh * sort/sort-float.sh @@ -39,4 +36,3 @@ This file documents why some GNU tests are failing: * tail/symlink.sh * stty/stty-row-col.sh * stty/stty.sh -* tty/tty-eof.pl diff --git a/util/why-skip.md b/util/why-skip.md index 75f14c6f5cb..f471ec09b22 100644 --- a/util/why-skip.md +++ b/util/why-skip.md @@ -31,5 +31,3 @@ = Disabled. Enabled at GNU coreutils > 9.9 = * tests/misc/tac-continue.sh -* tests/mkdir/writable-under-readonly.sh -* tests/cp/cp-mv-enotsup-xattr.sh From a61c191b68a31914b0aa462e877ca444180e88dc Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 20 Dec 2025 21:39:51 +0900 Subject: [PATCH 2/8] refactor(wc): simplify SIMD feature collection in debug output - Changed multi-line SIMD feature vector creation to a single-line expression for improved readability and consistency with surrounding code. - No functional changes; only stylistic refactoring in the wc debug logic. --- src/uu/wc/src/wc.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index b8b4d5134da..b7d22dd3af0 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -840,10 +840,8 @@ fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> { if settings.debug { let policy = SimdPolicy::detect(); if policy.allows_simd() { - let enabled: Vec<&'static str> = policy - .iter_features() - .map(hardware_feature_label) - .collect(); + let enabled: Vec<&'static str> = + policy.iter_features().map(hardware_feature_label).collect(); if enabled.is_empty() { eprintln!("{}", translate!("wc-debug-hw-unavailable")); } else { From a640e0dbe5f21be4bb57e8227ebfdc4f98073297 Mon Sep 17 00:00:00 2001 From: mattsu Date: Sun, 21 Dec 2025 22:52:56 +0900 Subject: [PATCH 3/8] feat(wc): enhance debug output for SIMD hardware support limitations Add new localization strings and logic to provide detailed debug information when SIMD support is limited by GLIBC_TUNABLES, including lists of disabled and enabled features. Refactor SIMD allowance check for better accuracy in detecting runtime support. --- src/uu/wc/locales/en-US.ftl | 1 + src/uu/wc/locales/fr-FR.ftl | 1 + src/uu/wc/src/count_fast.rs | 5 +- src/uu/wc/src/wc.rs | 112 ++++++++++++++++++------ src/uucore/src/lib/features/hardware.rs | 3 +- 5 files changed, 91 insertions(+), 31 deletions(-) diff --git a/src/uu/wc/locales/en-US.ftl b/src/uu/wc/locales/en-US.ftl index 52ea93291d8..ac22b1f1ef5 100644 --- a/src/uu/wc/locales/en-US.ftl +++ b/src/uu/wc/locales/en-US.ftl @@ -37,3 +37,4 @@ wc-debug-hw-unavailable = wc: debug: hardware support unavailable on this CPU wc-debug-hw-using = wc: debug: using hardware support (features: { $features }) wc-debug-hw-disabled-env = wc: debug: hardware support disabled by environment wc-debug-hw-disabled-glibc = wc: debug: hardware support disabled by GLIBC_TUNABLES ({ $features }) +wc-debug-hw-limited-glibc = wc: debug: hardware support limited by GLIBC_TUNABLES (disabled: { $disabled }; enabled: { $enabled }) diff --git a/src/uu/wc/locales/fr-FR.ftl b/src/uu/wc/locales/fr-FR.ftl index 641e6846111..22efab71a1c 100644 --- a/src/uu/wc/locales/fr-FR.ftl +++ b/src/uu/wc/locales/fr-FR.ftl @@ -37,3 +37,4 @@ wc-debug-hw-unavailable = wc : debug : prise en charge matérielle indisponible wc-debug-hw-using = wc : debug : utilisation de l'accélération matérielle (fonctions : { $features }) wc-debug-hw-disabled-env = wc : debug : prise en charge matérielle désactivée par l'environnement wc-debug-hw-disabled-glibc = wc : debug : prise en charge matérielle désactivée par GLIBC_TUNABLES ({ $features }) +wc-debug-hw-limited-glibc = wc : debug : prise en charge matérielle limitée par GLIBC_TUNABLES (désactivé : { $disabled } ; activé : { $enabled }) diff --git a/src/uu/wc/src/count_fast.rs b/src/uu/wc/src/count_fast.rs index d2715f3754a..d20c53d4fb8 100644 --- a/src/uu/wc/src/count_fast.rs +++ b/src/uu/wc/src/count_fast.rs @@ -4,7 +4,7 @@ // file that was distributed with this source code. // cSpell:ignore sysconf -use crate::word_count::WordCount; +use crate::{wc_simd_allowed, word_count::WordCount}; use uucore::hardware::SimdPolicy; use super::WordCountable; @@ -233,7 +233,8 @@ pub(crate) fn count_bytes_chars_and_lines_fast< ) -> (WordCount, Option) { let mut total = WordCount::default(); let buf: &mut [u8] = &mut AlignedBuffer::default().data; - let simd_allowed = SimdPolicy::detect().allows_simd(); + let policy = SimdPolicy::detect(); + let simd_allowed = wc_simd_allowed(policy); loop { match handle.read(buf) { Ok(0) => return (total, None), diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index b7d22dd3af0..81c44555468 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -828,6 +828,55 @@ fn hardware_feature_label(feature: HardwareFeature) -> &'static str { } } +fn is_wc_simd_runtime_feature(feature: &HardwareFeature) -> bool { + matches!( + feature, + HardwareFeature::Avx2 | HardwareFeature::Sse2 | HardwareFeature::Asimd + ) +} + +fn is_wc_simd_debug_feature(feature: &HardwareFeature) -> bool { + matches!( + feature, + HardwareFeature::Avx512 + | HardwareFeature::Avx2 + | HardwareFeature::Sse2 + | HardwareFeature::Asimd + ) +} + +fn wc_simd_enabled_features(policy: &SimdPolicy) -> Vec { + policy + .iter_features() + .filter(is_wc_simd_runtime_feature) + .collect() +} + +fn wc_simd_disabled_features(policy: &SimdPolicy) -> Vec { + policy + .disabled_features() + .into_iter() + .filter(is_wc_simd_debug_feature) + .collect() +} + +fn wc_simd_disabled_runtime_features(policy: &SimdPolicy) -> Vec { + policy + .disabled_features() + .into_iter() + .filter(is_wc_simd_runtime_feature) + .collect() +} + +pub(crate) fn wc_simd_allowed(policy: &SimdPolicy) -> bool { + if !wc_simd_disabled_runtime_features(policy).is_empty() { + return false; + } + policy + .iter_features() + .any(|feature| is_wc_simd_runtime_feature(&feature)) +} + fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> { let mut total_word_count = WordCount::default(); let mut num_inputs: usize = 0; @@ -839,34 +888,41 @@ fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> { if settings.debug { let policy = SimdPolicy::detect(); - if policy.allows_simd() { - let enabled: Vec<&'static str> = - policy.iter_features().map(hardware_feature_label).collect(); - if enabled.is_empty() { - eprintln!("{}", translate!("wc-debug-hw-unavailable")); - } else { - eprintln!( - "{}", - translate!("wc-debug-hw-using", "features" => enabled.join(", ")) - ); - } - } else { - let disabled: Vec<&'static str> = policy - .disabled_features() - .into_iter() - .map(hardware_feature_label) - .collect(); - if disabled.is_empty() { - eprintln!("{}", translate!("wc-debug-hw-disabled-env")); - } else { - eprintln!( - "{}", - translate!( - "wc-debug-hw-disabled-glibc", - "features" => disabled.join(", ") - ) - ); - } + let enabled_features = wc_simd_enabled_features(policy); + let disabled_features = wc_simd_disabled_features(policy); + let disabled_runtime_features = wc_simd_disabled_runtime_features(policy); + + let enabled: Vec<&'static str> = enabled_features + .iter() + .copied() + .map(hardware_feature_label) + .collect(); + let disabled: Vec<&'static str> = disabled_features + .iter() + .copied() + .map(hardware_feature_label) + .collect(); + + let runtime_disabled = !disabled_runtime_features.is_empty(); + + match (enabled.is_empty(), runtime_disabled, disabled.is_empty()) { + (true, false, _) => eprintln!("{}", translate!("wc-debug-hw-unavailable")), + (_, true, _) => eprintln!( + "{}", + translate!("wc-debug-hw-disabled-glibc", "features" => disabled.join(", ")) + ), + (false, false, true) => eprintln!( + "{}", + translate!("wc-debug-hw-using", "features" => enabled.join(", ")) + ), + (false, false, false) => eprintln!( + "{}", + translate!( + "wc-debug-hw-limited-glibc", + "disabled" => disabled.join(", "), + "enabled" => enabled.join(", ") + ) + ), } } diff --git a/src/uucore/src/lib/features/hardware.rs b/src/uucore/src/lib/features/hardware.rs index f2fef80301e..474990343cb 100644 --- a/src/uucore/src/lib/features/hardware.rs +++ b/src/uucore/src/lib/features/hardware.rs @@ -214,8 +214,9 @@ impl SimdPolicy { } } + /// Returns true if any SIMD feature remains enabled after applying GLIBC_TUNABLES. pub fn allows_simd(&self) -> bool { - self.disabled_by_env.is_empty() + self.iter_features().next().is_some() } pub fn disabled_features(&self) -> Vec { From 62673fcd9c22e1047f264cc0892b96a019064d18 Mon Sep 17 00:00:00 2001 From: mattsu Date: Mon, 22 Dec 2025 21:13:49 +0900 Subject: [PATCH 4/8] refactor: consolidate SIMD feature handling in wc command Refactor SIMD feature detection and reporting in the wc utility by introducing a WcSimdFeatures struct to group enabled, disabled, and runtime-disabled features. This replaces multiple separate functions with a single function, improving code organization and efficiency by reducing redundant iterations over feature lists. Also rename helper functions for clarity and update debug output logic accordingly. --- src/uu/wc/src/wc.rs | 85 ++++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 36 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 81c44555468..963fa6e78e0 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -828,14 +828,14 @@ fn hardware_feature_label(feature: HardwareFeature) -> &'static str { } } -fn is_wc_simd_runtime_feature(feature: &HardwareFeature) -> bool { +fn is_simd_runtime_feature(feature: &HardwareFeature) -> bool { matches!( feature, HardwareFeature::Avx2 | HardwareFeature::Sse2 | HardwareFeature::Asimd ) } -fn is_wc_simd_debug_feature(feature: &HardwareFeature) -> bool { +fn is_simd_debug_feature(feature: &HardwareFeature) -> bool { matches!( feature, HardwareFeature::Avx512 @@ -845,36 +845,44 @@ fn is_wc_simd_debug_feature(feature: &HardwareFeature) -> bool { ) } -fn wc_simd_enabled_features(policy: &SimdPolicy) -> Vec { - policy - .iter_features() - .filter(is_wc_simd_runtime_feature) - .collect() +struct WcSimdFeatures { + enabled: Vec, + disabled: Vec, + disabled_runtime: Vec, } -fn wc_simd_disabled_features(policy: &SimdPolicy) -> Vec { - policy - .disabled_features() - .into_iter() - .filter(is_wc_simd_debug_feature) - .collect() -} +fn wc_simd_features(policy: &SimdPolicy) -> WcSimdFeatures { + let enabled = policy + .iter_features() + .filter(is_simd_runtime_feature) + .collect(); -fn wc_simd_disabled_runtime_features(policy: &SimdPolicy) -> Vec { - policy - .disabled_features() - .into_iter() - .filter(is_wc_simd_runtime_feature) - .collect() + let mut disabled = Vec::new(); + let mut disabled_runtime = Vec::new(); + for feature in policy.disabled_features() { + if is_simd_debug_feature(&feature) { + disabled.push(feature); + } + if is_simd_runtime_feature(&feature) { + disabled_runtime.push(feature); + } + } + + WcSimdFeatures { + enabled, + disabled, + disabled_runtime, + } } pub(crate) fn wc_simd_allowed(policy: &SimdPolicy) -> bool { - if !wc_simd_disabled_runtime_features(policy).is_empty() { + let disabled_features = policy.disabled_features(); + if disabled_features.iter().any(is_simd_runtime_feature) { return false; } policy .iter_features() - .any(|feature| is_wc_simd_runtime_feature(&feature)) + .any(|feature| is_simd_runtime_feature(&feature)) } fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> { @@ -888,41 +896,46 @@ fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> { if settings.debug { let policy = SimdPolicy::detect(); - let enabled_features = wc_simd_enabled_features(policy); - let disabled_features = wc_simd_disabled_features(policy); - let disabled_runtime_features = wc_simd_disabled_runtime_features(policy); + let features = wc_simd_features(policy); - let enabled: Vec<&'static str> = enabled_features + let enabled: Vec<&'static str> = features + .enabled .iter() .copied() .map(hardware_feature_label) .collect(); - let disabled: Vec<&'static str> = disabled_features + let disabled: Vec<&'static str> = features + .disabled .iter() .copied() .map(hardware_feature_label) .collect(); - let runtime_disabled = !disabled_runtime_features.is_empty(); + let enabled_empty = enabled.is_empty(); + let disabled_empty = disabled.is_empty(); + let runtime_disabled = !features.disabled_runtime.is_empty(); - match (enabled.is_empty(), runtime_disabled, disabled.is_empty()) { - (true, false, _) => eprintln!("{}", translate!("wc-debug-hw-unavailable")), - (_, true, _) => eprintln!( + if enabled_empty && !runtime_disabled { + eprintln!("{}", translate!("wc-debug-hw-unavailable")); + } else if runtime_disabled { + eprintln!( "{}", translate!("wc-debug-hw-disabled-glibc", "features" => disabled.join(", ")) - ), - (false, false, true) => eprintln!( + ); + } else if !enabled_empty && disabled_empty { + eprintln!( "{}", translate!("wc-debug-hw-using", "features" => enabled.join(", ")) - ), - (false, false, false) => eprintln!( + ); + } else { + eprintln!( "{}", translate!( "wc-debug-hw-limited-glibc", "disabled" => disabled.join(", "), "enabled" => enabled.join(", ") ) - ), + ); } } From 39615a3e0d04c170ca510ef8222b86a317edfbb1 Mon Sep 17 00:00:00 2001 From: mattsu <35655889+mattsu2020@users.noreply.github.com> Date: Tue, 23 Dec 2025 08:08:22 +0900 Subject: [PATCH 5/8] Update src/uu/wc/src/wc.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Dorian Péron <72708393+RenjiSann@users.noreply.github.com> --- src/uu/wc/src/wc.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 963fa6e78e0..ff4e3d2c382 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -916,19 +916,19 @@ fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> { let runtime_disabled = !features.disabled_runtime.is_empty(); if enabled_empty && !runtime_disabled { - eprintln!("{}", translate!("wc-debug-hw-unavailable")); + show_error!("{}", translate!("wc-debug-hw-unavailable")); } else if runtime_disabled { - eprintln!( + show_error!( "{}", translate!("wc-debug-hw-disabled-glibc", "features" => disabled.join(", ")) ); } else if !enabled_empty && disabled_empty { - eprintln!( + show_error!( "{}", translate!("wc-debug-hw-using", "features" => enabled.join(", ")) ); } else { - eprintln!( + show_error!( "{}", translate!( "wc-debug-hw-limited-glibc", From b0ededa06ebe6500cfb61911f0431c899f89d73f Mon Sep 17 00:00:00 2001 From: mattsu <35655889+mattsu2020@users.noreply.github.com> Date: Tue, 23 Dec 2025 08:08:37 +0900 Subject: [PATCH 6/8] Update src/uu/wc/locales/en-US.ftl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Dorian Péron <72708393+RenjiSann@users.noreply.github.com> --- src/uu/wc/locales/en-US.ftl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/uu/wc/locales/en-US.ftl b/src/uu/wc/locales/en-US.ftl index ac22b1f1ef5..c08805740c0 100644 --- a/src/uu/wc/locales/en-US.ftl +++ b/src/uu/wc/locales/en-US.ftl @@ -33,8 +33,8 @@ wc-standard-input = standard input wc-total = total # Debug messages -wc-debug-hw-unavailable = wc: debug: hardware support unavailable on this CPU -wc-debug-hw-using = wc: debug: using hardware support (features: { $features }) -wc-debug-hw-disabled-env = wc: debug: hardware support disabled by environment -wc-debug-hw-disabled-glibc = wc: debug: hardware support disabled by GLIBC_TUNABLES ({ $features }) -wc-debug-hw-limited-glibc = wc: debug: hardware support limited by GLIBC_TUNABLES (disabled: { $disabled }; enabled: { $enabled }) +wc-debug-hw-unavailable = debug: hardware support unavailable on this CPU +wc-debug-hw-using = debug: using hardware support (features: { $features }) +wc-debug-hw-disabled-env = debug: hardware support disabled by environment +wc-debug-hw-disabled-glibc = debug: hardware support disabled by GLIBC_TUNABLES ({ $features }) +wc-debug-hw-limited-glibc = debug: hardware support limited by GLIBC_TUNABLES (disabled: { $disabled }; enabled: { $enabled }) From 746513702af013e102faf2b3b775de324077d8a0 Mon Sep 17 00:00:00 2001 From: mattsu <35655889+mattsu2020@users.noreply.github.com> Date: Tue, 23 Dec 2025 08:08:49 +0900 Subject: [PATCH 7/8] Update src/uu/wc/locales/fr-FR.ftl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Dorian Péron <72708393+RenjiSann@users.noreply.github.com> --- src/uu/wc/locales/fr-FR.ftl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/uu/wc/locales/fr-FR.ftl b/src/uu/wc/locales/fr-FR.ftl index 22efab71a1c..8eae88e2d5e 100644 --- a/src/uu/wc/locales/fr-FR.ftl +++ b/src/uu/wc/locales/fr-FR.ftl @@ -33,8 +33,8 @@ wc-standard-input = entrée standard wc-total = total # Messages de débogage -wc-debug-hw-unavailable = wc : debug : prise en charge matérielle indisponible sur ce CPU -wc-debug-hw-using = wc : debug : utilisation de l'accélération matérielle (fonctions : { $features }) -wc-debug-hw-disabled-env = wc : debug : prise en charge matérielle désactivée par l'environnement -wc-debug-hw-disabled-glibc = wc : debug : prise en charge matérielle désactivée par GLIBC_TUNABLES ({ $features }) -wc-debug-hw-limited-glibc = wc : debug : prise en charge matérielle limitée par GLIBC_TUNABLES (désactivé : { $disabled } ; activé : { $enabled }) +wc-debug-hw-unavailable = debug : prise en charge matérielle indisponible sur ce CPU +wc-debug-hw-using = debug : utilisation de l'accélération matérielle (fonctions : { $features }) +wc-debug-hw-disabled-env = debug : prise en charge matérielle désactivée par l'environnement +wc-debug-hw-disabled-glibc = debug : prise en charge matérielle désactivée par GLIBC_TUNABLES ({ $features }) +wc-debug-hw-limited-glibc = debug : prise en charge matérielle limitée par GLIBC_TUNABLES (désactivé : { $disabled } ; activé : { $enabled }) From b2fe8618242c1ecc62d4f0eb8962b015b8f4d283 Mon Sep 17 00:00:00 2001 From: mattsu Date: Tue, 23 Dec 2025 08:16:07 +0900 Subject: [PATCH 8/8] feat(wc): import show_error for enhanced error reporting Add the show_error import from uucore to enable better error handling in the wc utility, allowing for consistent error messages in line with the project's style. --- src/uu/wc/src/wc.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index ff4e3d2c382..d048880d942 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -32,7 +32,7 @@ use uucore::{ hardware::{HardwareFeature, HasHardwareFeatures as _, SimdPolicy}, parser::shortcut_value_parser::ShortcutValueParser, quoting_style::{self, QuotingStyle}, - show, + show, show_error, }; use crate::{