diff --git a/Cargo.lock b/Cargo.lock index ef971925a73..34c1a6cff68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -585,6 +585,7 @@ dependencies = [ "uucore", "uuhelp_parser", "uutests", + "uutils-args", "walkdir", "xattr", "zip", @@ -1320,6 +1321,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "lexopt" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa0e2a1fcbe2f6be6c42e342259976206b383122fc152e872795338b5a3f3a7" + [[package]] name = "libc" version = "0.2.172" @@ -2759,6 +2766,7 @@ dependencies = [ "glob", "thiserror 2.0.12", "uucore", + "uutils-args", "windows-sys 0.59.0", ] @@ -3639,6 +3647,27 @@ dependencies = [ "xattr", ] +[[package]] +name = "uutils-args" +version = "0.1.0" +source = "git+https://github.com/uutils/uutils-args.git?rev=bbb193d3870fb5b564b1189bc80d4df8b3f65280#bbb193d3870fb5b564b1189bc80d4df8b3f65280" +dependencies = [ + "lexopt", + "roff", + "strsim", + "uutils-args-derive", +] + +[[package]] +name = "uutils-args-derive" +version = "0.1.0" +source = "git+https://github.com/uutils/uutils-args.git?rev=bbb193d3870fb5b564b1189bc80d4df8b3f65280#bbb193d3870fb5b564b1189bc80d4df8b3f65280" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "uutils_term_grid" version = "0.7.0" diff --git a/Cargo.toml b/Cargo.toml index 05dc4ed7d81..9d9e9b08119 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -350,6 +350,7 @@ unicode-segmentation = "1.11.0" unicode-width = "0.2.0" utf-8 = "0.7.6" utmp-classic = "0.1.6" +uutils-args = { git = "https://github.com/uutils/uutils-args.git", rev = "bbb193d3870fb5b564b1189bc80d4df8b3f65280" } uutils_term_grid = "0.7" walkdir = "2.5" winapi-util = "0.1.8" @@ -377,6 +378,7 @@ uutests = { version = "0.0.30", package = "uutests", path = "tests/uutests/" } [dependencies] clap = { workspace = true } uucore = { workspace = true } +uutils-args = { workspace = true } clap_complete = { workspace = true } clap_mangen = { workspace = true } phf = { workspace = true } diff --git a/src/uu/du/Cargo.toml b/src/uu/du/Cargo.toml index 5b0d3f5e8ea..eea61394cce 100644 --- a/src/uu/du/Cargo.toml +++ b/src/uu/du/Cargo.toml @@ -23,6 +23,7 @@ chrono = { workspace = true } glob = { workspace = true } clap = { workspace = true } uucore = { workspace = true, features = ["format", "parser"] } +uutils-args = { workspace = true } thiserror = { workspace = true } [target.'cfg(target_os = "windows")'.dependencies] diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 0b268888136..041070673fe 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -3,11 +3,14 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +// spell-checker:ignore (terms) Kibi Giga Tera Tebi Peta Pebi Exbi + use chrono::{DateTime, Local}; use clap::{Arg, ArgAction, ArgMatches, Command, builder::PossibleValue}; use glob::Pattern; use std::collections::HashSet; use std::env; +use std::ffi::{OsStr, OsString}; #[cfg(not(windows))] use std::fs::Metadata; use std::fs::{self, DirEntry, File}; @@ -31,6 +34,7 @@ use uucore::parser::parse_glob; use uucore::parser::parse_size::{ParseSizeError, parse_size_u64}; use uucore::parser::shortcut_value_parser::ShortcutValueParser; use uucore::{format_usage, help_about, help_section, help_usage, show, show_error, show_warning}; +use uutils_args::{Arguments, Options, Value, ValueResult}; #[cfg(windows)] use windows_sys::Win32::Foundation::HANDLE; #[cfg(windows)] @@ -74,6 +78,112 @@ const ABOUT: &str = help_about!("du.md"); const AFTER_HELP: &str = help_section!("after help", "du.md"); const USAGE: &str = help_usage!("du.md"); +#[derive(Clone, Debug, Eq, PartialEq)] +#[allow(dead_code)] +enum SizeUnit { + Kilo, + Kibi, + Mega, + Mebi, + Giga, + Gibi, + Tera, + Tebi, + Peta, + Pebi, + Exa, + Exbi, + // GNU du does not recognize higher prefixes, so we shouldn't either. +} + +#[derive(Arguments)] +enum NewArg { + #[arg( + "-0", + "-a", + "--all", + "-c", + "--count-links", + "-D", + "--dereference", + "--dereference-args", + "-H", + "--inodes", + "-l", + "-L", + "--no-dereference", + "--one-file-system", + "-P", + "-s", + "-S", + "--separate-dirs", + "--summarize", + "--verbose", + "-x" + )] + Ignore, + + #[arg( + "--exclude-from=X", + "--exclude=X", + "--files0-from=X", + "--threshold=X", + "--time-style=X" + )] + IgnoreArg(#[allow(dead_code)] OsString), + + #[arg("--time[=X]", "-d[X]")] + IgnoreOptionArg(#[allow(dead_code)] Option), + + #[arg("--apparent-size")] + ApparentSize, + + #[arg("-B SIZE", "--block-size=SIZE")] + BlockSize(SizeFormat), + + #[arg("-h", "--human-readable")] + HumanBinary, + + #[arg("--si")] + HumanDecimal, + + #[arg("-b", "--bytes")] + Bytes, + + #[arg("-k")] + KibiBytes, + + #[arg("-m")] + MebiBytes, +} + +#[derive(Debug, Default, PartialEq, Eq)] +struct Settings { + apparent_size: bool, + size_format: SizeFormat, +} + +impl Options for Settings { + fn apply(&mut self, arg: NewArg) -> Result<(), uutils_args::Error> { + match arg { + NewArg::ApparentSize => self.apparent_size = true, + NewArg::BlockSize(size_format) => self.size_format = size_format, + NewArg::Bytes => { + self.apparent_size = true; + self.size_format = SizeFormat::BlockSize(1); + } + NewArg::HumanBinary => self.size_format = SizeFormat::HumanBinary, + NewArg::HumanDecimal => self.size_format = SizeFormat::HumanDecimal, + NewArg::KibiBytes => self.size_format = SizeFormat::BlockSize(1024), + NewArg::MebiBytes => self.size_format = SizeFormat::BlockSize(1024 * 1024), + NewArg::Ignore => {} + NewArg::IgnoreArg(_) => {} + NewArg::IgnoreOptionArg(_) => {} + } + Ok(()) + } +} + struct TraversalOptions { all: bool, separate_dirs: bool, @@ -111,11 +221,54 @@ enum Time { Created, } -#[derive(Clone)] +#[derive(Clone, Debug, Eq, PartialEq)] enum SizeFormat { HumanDecimal, HumanBinary, BlockSize(u64), + #[allow(dead_code)] + Unit(SizeUnit), +} + +impl SizeFormat { + fn parse(s: &OsStr, _lenient: bool) -> ValueResult { + // FIXME: Must have a custom parser, since "Unit" and "Divisor" distinction is lost by parse_size_u64! + // FIXME: Must be more lenient when parsing envvars, i.e. allow trailing (potentially non-UTF-8) garbage! + if let Some(s) = s.to_str() { + let bytes = parse_size_u64(s)?; + // FIXME: Use USimpleError maybe? + if bytes == 0 { + Err("".into()) // FIXME: Error messages are ignored?! + } else { + Ok(SizeFormat::BlockSize(bytes)) + } + } else { + Err("".into()) // FIXME: Error messages are ignored?! + } + } +} + +impl Default for SizeFormat { + fn default() -> SizeFormat { + for env_var in ["DU_BLOCK_SIZE", "BLOCK_SIZE", "BLOCKSIZE"] { + if let Some(env_size) = env::var_os(env_var) { + if let Ok(v) = SizeFormat::parse(&env_size, true) { + return v; + } + } + } + if env::var("POSIXLY_CORRECT").is_ok() { + SizeFormat::BlockSize(512) + } else { + SizeFormat::BlockSize(1024) + } + } +} + +impl Value for SizeFormat { + fn from_value(s: &OsStr) -> ValueResult { + SizeFormat::parse(s, false) + } } #[derive(PartialEq, Eq, Hash, Clone, Copy)] @@ -280,26 +433,6 @@ fn get_file_info(path: &Path) -> Option { result } -fn read_block_size(s: Option<&str>) -> UResult { - if let Some(s) = s { - parse_size_u64(s) - .map_err(|e| USimpleError::new(1, format_error_message(&e, s, options::BLOCK_SIZE))) - } else { - for env_var in ["DU_BLOCK_SIZE", "BLOCK_SIZE", "BLOCKSIZE"] { - if let Ok(env_size) = env::var(env_var) { - if let Ok(v) = parse_size_u64(&env_size) { - return Ok(v); - } - } - } - if env::var("POSIXLY_CORRECT").is_ok() { - Ok(512) - } else { - Ok(1024) - } - } -} - // this takes `my_stat` to avoid having to stat files multiple times. #[allow(clippy::cognitive_complexity)] fn du( @@ -549,6 +682,7 @@ impl StatPrinter { size.div_ceil(block_size).to_string() } } + SizeFormat::Unit(_) => unimplemented!(), // FIXME } } @@ -618,7 +752,12 @@ fn read_files_from(file_name: &str) -> Result, std::io::Error> { #[uucore::main] #[allow(clippy::cognitive_complexity)] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let matches = uu_app().try_get_matches_from(args)?; + let raw_args = args.collect::>(); + let (settings, _operands) = Settings::default() + .parse(&raw_args) + // FIXME: Leads to ugly errors + .map_err(|e| std::io::Error::other(format!("{e}").trim_start_matches("error: ")))?; + let matches = uu_app().try_get_matches_from(raw_args.iter())?; let summarize = matches.get_flag(options::SUMMARIZE); @@ -665,29 +804,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } }); - let size_format = if matches.get_flag(options::HUMAN_READABLE) { - SizeFormat::HumanBinary - } else if matches.get_flag(options::SI) { - SizeFormat::HumanDecimal - } else if matches.get_flag(options::BYTES) { - SizeFormat::BlockSize(1) - } else if matches.get_flag(options::BLOCK_SIZE_1K) { - SizeFormat::BlockSize(1024) - } else if matches.get_flag(options::BLOCK_SIZE_1M) { - SizeFormat::BlockSize(1024 * 1024) - } else { - let block_size_str = matches.get_one::(options::BLOCK_SIZE); - let block_size = read_block_size(block_size_str.map(AsRef::as_ref))?; - if block_size == 0 { - return Err(std::io::Error::other(format!( - "invalid --{} argument {}", - options::BLOCK_SIZE, - block_size_str.map_or("???BUG", |v| v).quote() - )) - .into()); - } - SizeFormat::BlockSize(block_size) - }; + let size_format = settings.size_format; let traversal_options = TraversalOptions { all: matches.get_flag(options::ALL), @@ -726,7 +843,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { }) }) .transpose()?, - apparent_size: matches.get_flag(options::APPARENT_SIZE) || matches.get_flag(options::BYTES), + apparent_size: settings.apparent_size, time, time_format, line_ending: LineEnding::from_zero_flag(matches.get_flag(options::NULL)), @@ -1110,9 +1227,13 @@ mod test_du { #[test] fn test_read_block_size() { - let test_data = [Some("1024".to_string()), Some("K".to_string()), None]; - for it in &test_data { - assert!(matches!(read_block_size(it.as_deref()), Ok(1024))); + for (input, expected) in [ + ("1024", SizeFormat::BlockSize(1024)), + ("1K", SizeFormat::BlockSize(1024)), + // FIXME: data loss! Should also return whether a suffix was used, see test_du_blocksize_multiplier and #7738 + // ("K", SizeFormat::Unit(SizeUnit::Kibi)), + ] { + assert_eq!(expected, SizeFormat::from_value(OsStr::new(input)).unwrap()); } } } diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index 668c1ed1b2b..e947fa2ee0c 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (paths) atim sublink subwords azerty azeaze xcwww azeaz amaz azea qzerty tazerty tsublink testfile1 testfile2 filelist fpath testdir testfile +// spell-checker:ignore (paths) atim sublink subwords azerty azeaze xcwww azeaz amaz azea qzerty tazerty tsublink testfile1 testfile2 filelist fpath testdir testfile KMGTPE #[cfg(not(windows))] use regex::Regex; @@ -131,24 +131,28 @@ fn du_basics_subdir(s: &str) { #[test] fn test_du_invalid_size() { - let args = &["block-size", "threshold"]; + let args = &[ + "block-size", /*, "threshold" FIXME: Different parsers mean different error messages */ + ]; let ts = TestScenario::new(util_name!()); for s in args { ts.ucmd() .arg(format!("--{s}=1fb4t")) .arg("/tmp") .fails_with_code(1) - .stderr_only(format!("du: invalid suffix in --{s} argument '1fb4t'\n")); + .stderr_only(format!("du: Invalid value '1fb4t' for '--{s}': '1fb4t'\n")); // TODO: ugly message ts.ucmd() .arg(format!("--{s}=x")) .arg("/tmp") .fails_with_code(1) - .stderr_only(format!("du: invalid --{s} argument 'x'\n")); + .stderr_only(format!("du: Invalid value 'x' for '--{s}': 'x'\n")); ts.ucmd() .arg(format!("--{s}=1Y")) .arg("/tmp") .fails_with_code(1) - .stderr_only(format!("du: --{s} argument '1Y' too large\n")); + .stderr_only(format!( + "du: Invalid value '1Y' for '--{s}': '1Y': Value too large for defined data type\n" + )); } } @@ -1259,8 +1263,138 @@ fn test_du_blocksize_zero_do_not_panic() { .arg(format!("-B{block_size}")) .arg("foo") .fails() - .stderr_only(format!( - "du: invalid --block-size argument '{block_size}'\n" - )); + // TODO: Ugly error message + .stderr_only(format!("du: Invalid value '{block_size}' for '-B': \n")); + } +} + +#[test] +fn test_du_blocksize_bytes_order() { + for (args, expected_output) in [ + // Division is only correct here because 123_456 is even. Otherwise, we would need div_ceil. + (["--apparent-size", "-B2"].as_slice(), 123_456 / 2), + (["-bB2"].as_slice(), 123_456 / 2), + (["-b", "-B2"].as_slice(), 123_456 / 2), + (["-B2", "-b"].as_slice(), 123_456), + // FIXME, cannot be handled while clap is involved at all! (["-b", "-B2", "-b"].as_slice(), 123_456), + ] { + let (at, mut ucmd) = at_and_ucmd!(); + + let fpath = at.plus("test.txt"); + std::fs::File::create(&fpath) + .expect("cannot create test file") + .set_len(123_456) + .expect("cannot truncate test len to size"); + ucmd.args(args) + .arg(&fpath) + .succeeds() + .stdout_only(format!("{expected_output}\t{}\n", fpath.to_string_lossy())); } } + +#[ignore = "known issue https://github.com/uutils/coreutils/issues/7738"] +#[test] +fn test_du_blocksize_multiplier() { + for (blocksize, expected_output) in [ + ("1", "123456789"), + ("2", "61728395"), + ("1000", "123457"), + ("1024", "120564"), + ("1kB", "123457"), + ("1KB", "123457"), + ("1k", "120564"), + ("1K", "120564"), + ("kB", "123457kB"), + ("KB", "123457kB"), + ("k", "120564k"), + ("K", "120564k"), + ("2kB", "61729"), + ("2KB", "61729"), + ("2k", "60282"), + ("2K", "60282"), + ] { + let (at, mut ucmd) = at_and_ucmd!(); + let fpath = at.plus("test.txt"); + std::fs::File::create(&fpath) + .expect("cannot create test file") + .set_len(123_456_789) + .expect("cannot truncate test len to size"); + ucmd.arg("--apparent-size") + .arg("-B") + .arg(blocksize) + .arg(&fpath) + .succeeds() + .stdout_only(format!("{expected_output}\t{}\n", fpath.to_string_lossy())); + } +} + +#[test] +fn test_du_blocksize_refuse_lowercase_b() { + for blocksize in ["kb", "Kb", "1kb", "1Kb", "2kb", "2Kb"] { + for (complaint, args) in [ + ("-B", [&*format!("-B{blocksize}")].as_slice()), + ("-B", ["-B", blocksize].as_slice()), + ( + "--block-size", + [&*format!("--block-size={blocksize}")].as_slice(), + ), + ("--block-size", ["--block-size", blocksize].as_slice()), + ] { + let (at, mut ucmd) = at_and_ucmd!(); + let fpath = at.plus("test.txt"); + std::fs::File::create(&fpath) + .expect("cannot create test file") + .set_len(123_456) + .expect("cannot truncate test len to size"); + ucmd.arg("--apparent-size") + .args(args) + .arg(&fpath) + .fails() + .stderr_only(format!( + // TODO: Ugly error message! + "du: Invalid value '{blocksize}' for '{complaint}': '{blocksize}'\n" + )); + } + } +} + +#[test] +fn test_du_order_si_h_b() { + let ts = TestScenario::new(util_name!()); + let fpath = ts.fixtures.plus("test.txt"); + std::fs::File::create(&fpath) + .expect("cannot create test file") + .set_len(123_456_789) + .expect("cannot truncate test len to size"); + for (args, expected_output) in [ + ([].as_slice(), "120564"), + (["-h"].as_slice(), "118M"), + (["--si", "-h"].as_slice(), "118M"), + (["--si"].as_slice(), "124M"), + (["-h", "--si"].as_slice(), "124M"), + (["-b"].as_slice(), "123456789"), + (["-b", "-h"].as_slice(), "118M"), + (["-h", "-b"].as_slice(), "123456789"), + (["-b", "--si"].as_slice(), "124M"), + (["--si", "-b"].as_slice(), "123456789"), + ] { + let fpath = ts.fixtures.plus("test.txt"); + ts.ucmd() + .arg("--apparent-size") + .args(args) + .arg(&fpath) + .succeeds() + .stdout_only(format!("{expected_output}\t{}\n", fpath.to_string_lossy())); + } +} + +#[test] +fn test_du_error_precedence() { + new_ucmd!() + .args(&["-B", "banana", "--help"]) + .fails() + // TODO: Ugly error message! + .stderr_only("du: Invalid value 'banana' for '-B': 'banana'\n"); +} + +// TODO: Check all spellings of all KMGTPE variants, both 0 and 1 bytes.