From 9258cc50755efc3c5d009c20d4de6a076624cdb6 Mon Sep 17 00:00:00 2001 From: venoosoo <120491991+venoosoo@users.noreply.github.com> Date: Wed, 8 Apr 2026 19:43:17 +0300 Subject: [PATCH 01/11] wc: optimize -c when reading from stdin regular file --- src/uu/wc/src/wc.rs | 31 +++++++++++++++++++++++++++++++ tests/by-util/test_wc.rs | 13 +++++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 90c80752f74..15d1f0a14ba 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -18,6 +18,7 @@ use std::{ fs::{self, File}, io::{self, Write, stderr}, iter, + os::fd::AsFd, path::{Path, PathBuf}, }; @@ -45,6 +46,26 @@ use crate::{ /// The minimum character width for formatting counts when reading from stdin. const MINIMUM_WIDTH: usize = 7; +/// Returns the byte size of stdin if it is a regular file (e.g. `wc -c < file`). +/// Returns `None` for pipes, terminals, sockets, etc. so we fall back to normal counting. +fn try_get_stding_size() -> Option { + #[cfg(unix)] + { + let stdin = io::stdin(); + let fd = stdin.as_fd(); + + let Ok(stat) = rustix::fs::fstat(fd) else { return None}; + + if rustix::fs::FileType::from_raw_mode(stat.st_mode) == rustix::fs::FileType::RegularFile { + return Some(stat.st_size as usize); + } + None + } + #[cfg(not(unix))] + { + None // TODO: Implement Windows support + } +} struct Settings<'a> { show_bytes: bool, show_chars: bool, @@ -482,6 +503,16 @@ fn word_count_from_reader( // show_bytes (true, false, false, false, false) => { + // Fast path: if stdin is a regular file, get size from metadata (no reading needed) + if let Some(bytes) = try_get_stding_size() { + return ( + WordCount { + bytes, + ..WordCount::default() + }, + None, + ); + } // Fast path when only show_bytes is true. let (bytes, error) = count_bytes_fast(&mut reader); ( diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index 2a3e9bebe44..7a25590386a 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -850,6 +850,19 @@ fn wc_w_words_with_emoji_separator() { .stdout_contains("3"); } +#[test] +fn test_wc_bytes_from_stdin_file_size_optimization() { + // This tests the metadata fast path (`fstat` on stdin) when only `-c` is used. + // It should be extremely fast even for large inputs because it doesn't read the data. + let large_input = "a".repeat(50_000_000); // 50 MB + + new_ucmd!() + .arg("-c") + .pipe_in(large_input) + .succeeds() + .stdout_is("50000000\n"); +} + #[test] fn test_invalid_byte_sequence_word_count() { // wc should count invalid byte sequences as words From 0acb0e52f757c7702f221d304dbed1beab28ef54 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 8 Apr 2026 16:48:13 +0000 Subject: [PATCH 02/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/uu/wc/src/wc.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 15d1f0a14ba..038ffafaa21 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -63,7 +63,7 @@ fn try_get_stding_size() -> Option { } #[cfg(not(unix))] { - None // TODO: Implement Windows support + None // TODO: Implement Windows support } } struct Settings<'a> { From abf12a48dd75b8940b5eb00061a2a5017349c0aa Mon Sep 17 00:00:00 2001 From: venoosoo <120491991+venoosoo@users.noreply.github.com> Date: Wed, 8 Apr 2026 19:50:27 +0300 Subject: [PATCH 03/11] wc: apply cargo fmt --- src/uu/wc/src/wc.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 038ffafaa21..23bcd4f136b 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -54,7 +54,9 @@ fn try_get_stding_size() -> Option { let stdin = io::stdin(); let fd = stdin.as_fd(); - let Ok(stat) = rustix::fs::fstat(fd) else { return None}; + let Ok(stat) = rustix::fs::fstat(fd) else { + return None; + }; if rustix::fs::FileType::from_raw_mode(stat.st_mode) == rustix::fs::FileType::RegularFile { return Some(stat.st_size as usize); From f06fffa5858ce701a617811217938490f8734c94 Mon Sep 17 00:00:00 2001 From: venoosoo <120491991+venoosoo@users.noreply.github.com> Date: Wed, 8 Apr 2026 19:54:18 +0300 Subject: [PATCH 04/11] wc: delete test --- tests/by-util/test_wc.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index 7a25590386a..3ace7e2ee07 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -850,18 +850,6 @@ fn wc_w_words_with_emoji_separator() { .stdout_contains("3"); } -#[test] -fn test_wc_bytes_from_stdin_file_size_optimization() { - // This tests the metadata fast path (`fstat` on stdin) when only `-c` is used. - // It should be extremely fast even for large inputs because it doesn't read the data. - let large_input = "a".repeat(50_000_000); // 50 MB - - new_ucmd!() - .arg("-c") - .pipe_in(large_input) - .succeeds() - .stdout_is("50000000\n"); -} #[test] fn test_invalid_byte_sequence_word_count() { From 614fcebc6d368b14810ffa8308cf5295546cbcd7 Mon Sep 17 00:00:00 2001 From: venoosoo <120491991+venoosoo@users.noreply.github.com> Date: Wed, 8 Apr 2026 19:56:46 +0300 Subject: [PATCH 05/11] wc: appy cargo fmt again --- tests/by-util/test_wc.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index 3ace7e2ee07..2a3e9bebe44 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -850,7 +850,6 @@ fn wc_w_words_with_emoji_separator() { .stdout_contains("3"); } - #[test] fn test_invalid_byte_sequence_word_count() { // wc should count invalid byte sequences as words From 846244ea40fe72d920d9619125eb1f8b839681e0 Mon Sep 17 00:00:00 2001 From: venoosoo <120491991+venoosoo@users.noreply.github.com> Date: Wed, 8 Apr 2026 20:05:52 +0300 Subject: [PATCH 06/11] wc: clippy and spelling mistake fix --- src/uu/wc/src/wc.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 23bcd4f136b..305237e80a1 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -18,11 +18,11 @@ use std::{ fs::{self, File}, io::{self, Write, stderr}, iter, - os::fd::AsFd, path::{Path, PathBuf}, }; use clap::{Arg, ArgAction, ArgMatches, Command, builder::ValueParser}; +use rustix::fd::AsFd; use thiserror::Error; use unicode_width::UnicodeWidthChar; use utf8::{BufReadDecoder, BufReadDecoderError}; @@ -48,7 +48,7 @@ const MINIMUM_WIDTH: usize = 7; /// Returns the byte size of stdin if it is a regular file (e.g. `wc -c < file`). /// Returns `None` for pipes, terminals, sockets, etc. so we fall back to normal counting. -fn try_get_stding_size() -> Option { +fn try_get_stdin_size() -> Option { #[cfg(unix)] { let stdin = io::stdin(); @@ -506,7 +506,7 @@ fn word_count_from_reader( // show_bytes (true, false, false, false, false) => { // Fast path: if stdin is a regular file, get size from metadata (no reading needed) - if let Some(bytes) = try_get_stding_size() { + if let Some(bytes) = try_get_stdin_size() { return ( WordCount { bytes, From d56f2629993daed64948c3ce895e659dd7dbb4fe Mon Sep 17 00:00:00 2001 From: venoosoo <120491991+venoosoo@users.noreply.github.com> Date: Wed, 8 Apr 2026 20:14:32 +0300 Subject: [PATCH 07/11] wc: move use to unix only --- src/uu/wc/src/wc.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 305237e80a1..5aef27eb72b 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -22,7 +22,6 @@ use std::{ }; use clap::{Arg, ArgAction, ArgMatches, Command, builder::ValueParser}; -use rustix::fd::AsFd; use thiserror::Error; use unicode_width::UnicodeWidthChar; use utf8::{BufReadDecoder, BufReadDecoderError}; @@ -51,6 +50,8 @@ const MINIMUM_WIDTH: usize = 7; fn try_get_stdin_size() -> Option { #[cfg(unix)] { + use rustix::fd::AsFd; + let stdin = io::stdin(); let fd = stdin.as_fd(); From 3dde5d864ef9f18f92256e53af022c665d817d55 Mon Sep 17 00:00:00 2001 From: venoosoo <120491991+venoosoo@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:08:49 +0300 Subject: [PATCH 08/11] wc: more strict file checking --- src/uu/wc/src/wc.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 5aef27eb72b..2919fc343f4 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -59,9 +59,12 @@ fn try_get_stdin_size() -> Option { return None; }; - if rustix::fs::FileType::from_raw_mode(stat.st_mode) == rustix::fs::FileType::RegularFile { + let file_type = rustix::fs::FileType::from_raw_mode(stat.st_mode); + + if file_type == rustix::fs::FileType::RegularFile && stat.st_size > 0 { return Some(stat.st_size as usize); } + None } #[cfg(not(unix))] From 788c49b3e55ff5d87e9703d3ecee18f3b1e29fad Mon Sep 17 00:00:00 2001 From: venoosoo <120491991+venoosoo@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:22:23 +0300 Subject: [PATCH 09/11] wc: stricter file checking --- src/uu/wc/src/wc.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 2919fc343f4..dce1021a362 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -61,7 +61,8 @@ fn try_get_stdin_size() -> Option { let file_type = rustix::fs::FileType::from_raw_mode(stat.st_mode); - if file_type == rustix::fs::FileType::RegularFile && stat.st_size > 0 { + if file_type == rustix::fs::FileType::RegularFile && stat.st_size > 0 && stat.st_nlink == 1 + { return Some(stat.st_size as usize); } From 5176d84ab447090635823f409edc8575f6d2cac5 Mon Sep 17 00:00:00 2001 From: venoosoo <120491991+venoosoo@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:46:31 +0300 Subject: [PATCH 10/11] wc: failing test fix --- src/uu/wc/src/wc.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index dce1021a362..817b31f1177 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -59,14 +59,19 @@ fn try_get_stdin_size() -> Option { return None; }; - let file_type = rustix::fs::FileType::from_raw_mode(stat.st_mode); + if rustix::fs::FileType::from_raw_mode(stat.st_mode) != rustix::fs::FileType::RegularFile { + return None; + } + + let Ok(fs) = rustix::fs::fstatfs(fd) else { + return None; + }; - if file_type == rustix::fs::FileType::RegularFile && stat.st_size > 0 && stat.st_nlink == 1 - { - return Some(stat.st_size as usize); + if fs.f_type == rustix::fs::PROC_SUPER_MAGIC { + return None; } - None + Some(stat.st_size as usize) } #[cfg(not(unix))] { From c60b7db0c311681e104681bba6b4f31fccff7420 Mon Sep 17 00:00:00 2001 From: venoosoo <120491991+venoosoo@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:56:23 +0300 Subject: [PATCH 11/11] wc: gnu test failing fix --- src/uu/wc/src/wc.rs | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 817b31f1177..d4d282fd9b5 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -51,31 +51,21 @@ fn try_get_stdin_size() -> Option { #[cfg(unix)] { use rustix::fd::AsFd; - let stdin = io::stdin(); let fd = stdin.as_fd(); - let Ok(stat) = rustix::fs::fstat(fd) else { return None; }; - - if rustix::fs::FileType::from_raw_mode(stat.st_mode) != rustix::fs::FileType::RegularFile { - return None; + let file_type = rustix::fs::FileType::from_raw_mode(stat.st_mode); + if file_type == rustix::fs::FileType::RegularFile && stat.st_size > 0 && stat.st_blocks > 0 + { + return Some(stat.st_size as usize); } - - let Ok(fs) = rustix::fs::fstatfs(fd) else { - return None; - }; - - if fs.f_type == rustix::fs::PROC_SUPER_MAGIC { - return None; - } - - Some(stat.st_size as usize) + None } #[cfg(not(unix))] { - None // TODO: Implement Windows support + None } } struct Settings<'a> {