From 8c9bb168573ee163b3b906362c656f603de43102 Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Thu, 30 Oct 2025 18:23:35 +0100 Subject: [PATCH 01/10] test(cksum): Add tests for BLAKE2b --length sanitization --- tests/by-util/test_cksum.rs | 33 ++++++++++++++----- .../cksum/length_larger_than_512.expected | 2 -- 2 files changed, 25 insertions(+), 10 deletions(-) delete mode 100644 tests/fixtures/cksum/length_larger_than_512.expected diff --git a/tests/by-util/test_cksum.rs b/tests/by-util/test_cksum.rs index d966e4b1fe0..4b9459ef5c8 100644 --- a/tests/by-util/test_cksum.rs +++ b/tests/by-util/test_cksum.rs @@ -774,14 +774,31 @@ fn test_blake2b_length() { #[test] fn test_blake2b_length_greater_than_512() { - new_ucmd!() - .arg("--length=1024") - .arg("--algorithm=blake2b") - .arg("lorem_ipsum.txt") - .arg("alice_in_wonderland.txt") - .fails_with_code(1) - .no_stdout() - .stderr_is_fixture("length_larger_than_512.expected"); + for l in ["513", "1024", "73786976294838206464"] { + new_ucmd!() + .arg("--algorithm=blake2b") + .arg("--length") + .arg(l) + .arg("lorem_ipsum.txt") + .fails_with_code(1) + .no_stdout() + .stderr_contains(format!("invalid length: '{l}'")) + .stderr_contains("maximum digest length for 'BLAKE2b' is 512 bits"); + } +} + +#[test] +fn test_blake2b_length_nan() { + for l in ["foo", "512x", "x512", "0xff"] { + new_ucmd!() + .arg("--algorithm=blake2b") + .arg("--length") + .arg(l) + .arg("lorem_ipsum.txt") + .fails_with_code(1) + .no_stdout() + .stderr_contains(format!("invalid length: '{l}'")); + } } #[test] diff --git a/tests/fixtures/cksum/length_larger_than_512.expected b/tests/fixtures/cksum/length_larger_than_512.expected deleted file mode 100644 index 8b5d3d4c22a..00000000000 --- a/tests/fixtures/cksum/length_larger_than_512.expected +++ /dev/null @@ -1,2 +0,0 @@ -cksum: invalid length: '1024' -cksum: maximum digest length for 'BLAKE2b' is 512 bits From 885082ce140235ca93795896cdded8297dd5bfcd Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Thu, 30 Oct 2025 18:26:05 +0100 Subject: [PATCH 02/10] util(cksum): Fix BLAKE2b --length sanitization --- src/uucore/src/lib/features/checksum.rs | 49 ++++++++++++++----------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 324dba7b3f8..6c67b68ac81 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -219,16 +219,23 @@ pub enum ChecksumError { StrictNotCheck, #[error("the --quiet option is meaningful only when verifying checksums")] QuietNotCheck, + + // --length sanitization errors #[error("--length required for {}", .0.quote())] LengthRequired(String), #[error("invalid length: {}", .0.quote())] InvalidLength(String), + #[error("maximum digest length for {} is 512 bits", .0.quote())] + LengthTooBigForBlake(String), + #[error("length is not a multiple of 8")] + LengthNotMultipleOf8, #[error("digest length for {} must be 224, 256, 384, or 512", .0.quote())] InvalidLengthForSha(String), #[error("--algorithm={0} requires specifying --length 224, 256, 384, or 512")] LengthRequiredForSha(String), #[error("--length is only supported with --algorithm blake2b, sha2, or sha3")] LengthOnlyForBlake2bSha2Sha3, + #[error("the --binary and --text options are meaningless when verifying checksums")] BinaryTextConflict, #[error("--text mode is only supported with --untagged")] @@ -1243,34 +1250,32 @@ pub fn calculate_blake2b_length(length: usize) -> UResult> { /// Calculates the length of the digest. pub fn calculate_blake2b_length_str(length: &str) -> UResult> { - match length.parse() { + // Blake2b's length is parsed in an u64. + match length.parse::() { Ok(0) => Ok(None), - Ok(n) if n % 8 != 0 => { - show_error!("{}", ChecksumError::InvalidLength(length.into())); - Err(io::Error::new(io::ErrorKind::InvalidInput, "length is not a multiple of 8").into()) - } + + // Error cases Ok(n) if n > 512 => { show_error!("{}", ChecksumError::InvalidLength(length.into())); - Err(io::Error::new( - io::ErrorKind::InvalidInput, - format!( - "maximum digest length for {} is 512 bits", - "BLAKE2b".quote() - ), - ) - .into()) + Err(ChecksumError::LengthTooBigForBlake("BLAKE2b".into()).into()) } - Ok(n) => { - // Divide by 8, as our blake2b implementation expects bytes instead of bits. - if n == 512 { - // When length is 512, it is blake2b's default. - // So, don't show it - Ok(None) - } else { - Ok(Some(n / 8)) - } + Err(e) if *e.kind() == IntErrorKind::PosOverflow => { + show_error!("{}", ChecksumError::InvalidLength(length.into())); + Err(ChecksumError::LengthTooBigForBlake("BLAKE2b".into()).into()) } Err(_) => Err(ChecksumError::InvalidLength(length.into()).into()), + + Ok(n) if n % 8 != 0 => { + show_error!("{}", ChecksumError::InvalidLength(length.into())); + Err(ChecksumError::LengthNotMultipleOf8.into()) + } + + // Valid cases + + // When length is 512, it is blake2b's default. So, don't show it + Ok(512) => Ok(None), + // Divide by 8, as our blake2b implementation expects bytes instead of bits. + Ok(n) => Ok(Some(n / 8)), } } From 9b6dc9675c82e78761020e8bd984e83fb1bb1855 Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Fri, 31 Oct 2025 03:10:10 +0100 Subject: [PATCH 03/10] checksum: Introduce `AlgoKind` enum to rely less on string comparison --- src/uu/cksum/src/cksum.rs | 23 +- src/uu/hashsum/src/hashsum.rs | 80 ++-- src/uucore/src/lib/features/checksum.rs | 484 +++++++++++++++--------- 3 files changed, 343 insertions(+), 244 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index c7a3e969b4d..cdd9de33f85 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -14,10 +14,10 @@ use std::iter; use std::path::Path; use uucore::checksum::{ ALGORITHM_OPTIONS_BLAKE2B, ALGORITHM_OPTIONS_BSD, ALGORITHM_OPTIONS_CRC, - ALGORITHM_OPTIONS_CRC32B, ALGORITHM_OPTIONS_SHA2, ALGORITHM_OPTIONS_SHA3, - ALGORITHM_OPTIONS_SYSV, ChecksumError, ChecksumOptions, ChecksumVerbose, HashAlgorithm, - LEGACY_ALGORITHMS, SUPPORTED_ALGORITHMS, calculate_blake2b_length_str, detect_algo, - digest_reader, perform_checksum_validation, sanitize_sha2_sha3_length_str, + ALGORITHM_OPTIONS_CRC32B, ALGORITHM_OPTIONS_SYSV, AlgoKind, ChecksumError, ChecksumOptions, + ChecksumVerbose, HashAlgorithm, LEGACY_ALGORITHMS, SUPPORTED_ALGORITHMS, + calculate_blake2b_length_str, detect_algo, digest_reader, perform_checksum_validation, + sanitize_sha2_sha3_length_str, }; use uucore::translate; @@ -368,7 +368,7 @@ fn figure_out_output_format( /// Sanitize the `--length` argument depending on `--algorithm` and `--length`. fn maybe_sanitize_length( - algo_cli: Option<&str>, + algo_cli: Option, input_length: Option<&str>, ) -> UResult> { match (algo_cli, input_length) { @@ -376,12 +376,12 @@ fn maybe_sanitize_length( (_, None) => Ok(None), // For SHA2 and SHA3, if a length is provided, ensure it is correct. - (Some(algo @ (ALGORITHM_OPTIONS_SHA2 | ALGORITHM_OPTIONS_SHA3)), Some(s_len)) => { + (Some(algo @ (AlgoKind::Sha2 | AlgoKind::Sha3)), Some(s_len)) => { sanitize_sha2_sha3_length_str(algo, s_len).map(Some) } // For BLAKE2b, if a length is provided, validate it. - (Some(ALGORITHM_OPTIONS_BLAKE2B), Some(len)) => calculate_blake2b_length_str(len), + (Some(AlgoKind::Blake2b), Some(len)) => calculate_blake2b_length_str(len), // For any other provided algorithm, check if length is 0. // Otherwise, this is an error. @@ -398,7 +398,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let algo_cli = matches .get_one::(options::ALGORITHM) - .map(String::as_str); + .map(AlgoKind::from_cksum) + .transpose()?; let input_length = matches .get_one::(options::LENGTH) @@ -415,7 +416,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { if check { // cksum does not support '--check'ing legacy algorithms - if algo_cli.is_some_and(|algo_name| LEGACY_ALGORITHMS.contains(&algo_name)) { + if algo_cli.is_some_and(AlgoKind::is_legacy) { return Err(ChecksumError::AlgorithmNotSupportedWithCheck.into()); } @@ -448,11 +449,11 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // Not --check // Set the default algorithm to CRC when not '--check'ing. - let algo_name = algo_cli.unwrap_or(ALGORITHM_OPTIONS_CRC); + let algo_kind = algo_cli.unwrap_or(AlgoKind::Crc); let (tag, binary) = handle_tag_text_binary_flags(std::env::args_os())?; - let algo = detect_algo(algo_name, length)?; + let algo = detect_algo(algo_kind, length)?; let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO)); let output_format = figure_out_output_format( diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 7edc916fb3f..4e020625275 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -15,19 +15,17 @@ use std::io::{BufReader, Read, stdin}; use std::iter; use std::num::ParseIntError; use std::path::Path; -use uucore::checksum::ChecksumError; use uucore::checksum::ChecksumOptions; use uucore::checksum::ChecksumVerbose; -use uucore::checksum::HashAlgorithm; use uucore::checksum::calculate_blake2b_length; -use uucore::checksum::create_sha3; use uucore::checksum::detect_algo; use uucore::checksum::digest_reader; use uucore::checksum::escape_filename; use uucore::checksum::perform_checksum_validation; +use uucore::checksum::{AlgoKind, ChecksumError}; use uucore::error::{UResult, strip_errno}; use uucore::format_usage; -use uucore::sum::{Digest, Sha3_224, Sha3_256, Sha3_384, Sha3_512, Shake128, Shake256}; +use uucore::sum::Digest; use uucore::translate; const NAME: &str = "hashsum"; @@ -63,10 +61,10 @@ struct Options<'a> { /// the output length in bits or an Err if multiple hash algorithms are specified or if a /// required flag is missing. #[allow(clippy::cognitive_complexity)] -fn create_algorithm_from_flags(matches: &ArgMatches) -> UResult { - let mut alg: Option = None; +fn create_algorithm_from_flags(matches: &ArgMatches) -> UResult<(AlgoKind, Option)> { + let mut alg: Option<(AlgoKind, Option)> = None; - let mut set_or_err = |new_alg: HashAlgorithm| -> UResult<()> { + let mut set_or_err = |new_alg: (AlgoKind, Option)| -> UResult<()> { if alg.is_some() { return Err(ChecksumError::CombineMultipleAlgorithms.into()); } @@ -75,80 +73,57 @@ fn create_algorithm_from_flags(matches: &ArgMatches) -> UResult { }; if matches.get_flag("md5") { - set_or_err(detect_algo("md5sum", None)?)?; + set_or_err((AlgoKind::Md5, None))?; } if matches.get_flag("sha1") { - set_or_err(detect_algo("sha1sum", None)?)?; + set_or_err((AlgoKind::Sha1, None))?; } if matches.get_flag("sha224") { - set_or_err(detect_algo("sha224sum", None)?)?; + set_or_err((AlgoKind::Sha224, None))?; } if matches.get_flag("sha256") { - set_or_err(detect_algo("sha256sum", None)?)?; + set_or_err((AlgoKind::Sha256, None))?; } if matches.get_flag("sha384") { - set_or_err(detect_algo("sha384sum", None)?)?; + set_or_err((AlgoKind::Sha384, None))?; } if matches.get_flag("sha512") { - set_or_err(detect_algo("sha512sum", None)?)?; + set_or_err((AlgoKind::Sha512, None))?; } if matches.get_flag("b2sum") { - set_or_err(detect_algo("b2sum", None)?)?; + set_or_err((AlgoKind::Blake2b, None))?; } if matches.get_flag("b3sum") { - set_or_err(detect_algo("b3sum", None)?)?; + set_or_err((AlgoKind::Blake3, None))?; } if matches.get_flag("sha3") { match matches.get_one::("bits") { - Some(bits) => set_or_err(create_sha3(*bits)?)?, + Some(bits @ (224 | 256 | 384 | 512)) => set_or_err((AlgoKind::Sha3, Some(*bits)))?, + Some(bits) => return Err(ChecksumError::InvalidLengthForSha(bits.to_string()).into()), None => return Err(ChecksumError::LengthRequired("SHA3".into()).into()), } } if matches.get_flag("sha3-224") { - set_or_err(HashAlgorithm { - name: "SHA3-224", - create_fn: Box::new(|| Box::new(Sha3_224::new())), - bits: 224, - })?; + set_or_err((AlgoKind::Sha3, Some(224)))?; } if matches.get_flag("sha3-256") { - set_or_err(HashAlgorithm { - name: "SHA3-256", - create_fn: Box::new(|| Box::new(Sha3_256::new())), - bits: 256, - })?; + set_or_err((AlgoKind::Sha3, Some(256)))?; } if matches.get_flag("sha3-384") { - set_or_err(HashAlgorithm { - name: "SHA3-384", - create_fn: Box::new(|| Box::new(Sha3_384::new())), - bits: 384, - })?; + set_or_err((AlgoKind::Sha3, Some(384)))?; } if matches.get_flag("sha3-512") { - set_or_err(HashAlgorithm { - name: "SHA3-512", - create_fn: Box::new(|| Box::new(Sha3_512::new())), - bits: 512, - })?; + set_or_err((AlgoKind::Sha3, Some(512)))?; } if matches.get_flag("shake128") { match matches.get_one::("bits") { - Some(bits) => set_or_err(HashAlgorithm { - name: "SHAKE128", - create_fn: Box::new(|| Box::new(Shake128::new())), - bits: *bits, - })?, + Some(bits) => set_or_err((AlgoKind::Shake128, Some(*bits)))?, None => return Err(ChecksumError::LengthRequired("SHAKE128".into()).into()), } } if matches.get_flag("shake256") { match matches.get_one::("bits") { - Some(bits) => set_or_err(HashAlgorithm { - name: "SHAKE256", - create_fn: Box::new(|| Box::new(Shake256::new())), - bits: *bits, - })?, + Some(bits) => set_or_err((AlgoKind::Shake256, Some(*bits)))?, None => return Err(ChecksumError::LengthRequired("SHAKE256".into()).into()), } } @@ -198,10 +173,10 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { None => None, }; - let algo = if is_hashsum_bin { + let (algo_kind, length) = if is_hashsum_bin { create_algorithm_from_flags(&matches)? } else { - detect_algo(&binary_name, length)? + (AlgoKind::from_bin_name(&binary_name)?, length) }; let binary = if matches.get_flag("binary") { @@ -255,12 +230,7 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { }; // Execute the checksum validation - return perform_checksum_validation( - input.iter().copied(), - Some(algo.name), - Some(algo.bits), - opts, - ); + return perform_checksum_validation(input.iter().copied(), Some(algo_kind), length, opts); } else if quiet { return Err(ChecksumError::QuietNotCheck.into()); } else if strict { @@ -273,6 +243,8 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { .unwrap_or(&false); let zero = matches.get_flag("zero"); + let algo = detect_algo(algo_kind, length)?; + let opts = Options { algoname: algo.name, digest: (algo.create_fn)(), diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 6c67b68ac81..e71d8135cba 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -60,11 +60,13 @@ pub const SUPPORTED_ALGORITHMS: [&str; 17] = [ ALGORITHM_OPTIONS_SHA3, ALGORITHM_OPTIONS_BLAKE2B, ALGORITHM_OPTIONS_SM3, - // Extra algorithms that are not valid `cksum --algorithm` + // Legacy aliases for -a sha2 -l xxx ALGORITHM_OPTIONS_SHA224, ALGORITHM_OPTIONS_SHA256, ALGORITHM_OPTIONS_SHA384, ALGORITHM_OPTIONS_SHA512, + // Extra algorithms that are not valid `cksum --algorithm` as per GNU. + // TODO: Should we keep them or drop them to align our support with GNU ? ALGORITHM_OPTIONS_BLAKE3, ALGORITHM_OPTIONS_SHAKE128, ALGORITHM_OPTIONS_SHAKE256, @@ -77,6 +79,139 @@ pub const LEGACY_ALGORITHMS: [&str; 4] = [ ALGORITHM_OPTIONS_CRC32B, ]; +/// Represents an algorithm kind. In some cases, it is not sufficient by itself +/// to know which algorithm to use exactly, because it lacks a digest length, +/// which is why [`SizedAlgoKind`] exists. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AlgoKind { + Sysv, + Bsd, + Crc, + Crc32b, + Md5, + Sm3, + Sha1, + Sha2, + Sha3, + Blake2b, + + // Available in cksum for backward compatibility + Sha224, + Sha256, + Sha384, + Sha512, + + // Not available in cksum + Shake128, + Shake256, + Blake3, +} + +impl AlgoKind { + /// Parses an [`AlgoKind`] from a string, only accepting valid cksum + /// `--algorithm` values. + pub fn from_cksum(algo: impl AsRef) -> UResult { + use AlgoKind::*; + Ok(match algo.as_ref() { + ALGORITHM_OPTIONS_SYSV => Sysv, + ALGORITHM_OPTIONS_BSD => Bsd, + ALGORITHM_OPTIONS_CRC => Crc, + ALGORITHM_OPTIONS_CRC32B => Crc32b, + ALGORITHM_OPTIONS_MD5 => Md5, + ALGORITHM_OPTIONS_SHA1 => Sha1, + ALGORITHM_OPTIONS_SHA2 => Sha2, + ALGORITHM_OPTIONS_SHA3 => Sha3, + ALGORITHM_OPTIONS_BLAKE2B => Blake2b, + ALGORITHM_OPTIONS_SM3 => Sm3, + + // For backward compatibility + ALGORITHM_OPTIONS_SHA224 => Sha224, + ALGORITHM_OPTIONS_SHA256 => Sha256, + ALGORITHM_OPTIONS_SHA384 => Sha384, + ALGORITHM_OPTIONS_SHA512 => Sha512, + _ => return Err(ChecksumError::UnknownAlgorithm(algo.as_ref().to_string()).into()), + }) + } + + /// Parses an algo kind from a string, accepting standalone binary names. + pub fn from_bin_name(algo: impl AsRef) -> UResult { + use AlgoKind::*; + Ok(match algo.as_ref() { + "md5sum" => Md5, + "sha1sum" => Sha1, + "sha224sum" => Sha224, + "sha256sum" => Sha256, + "sha384sum" => Sha384, + "sha512sum" => Sha512, + "sha3sum" => Sha3, + "b2sum" => Blake2b, + + _ => return Err(ChecksumError::UnknownAlgorithm(algo.as_ref().to_string()).into()), + }) + } + + /// Returns a string corresponding to the algorithm kind. + pub fn to_uppercase(self) -> &'static str { + use AlgoKind::*; + match self { + // Legacy algorithms + Sysv => "SYSV", + Bsd => "BSD", + Crc => "CRC", + Crc32b => "CRC32B", + + Md5 => "MD5", + Sm3 => "SM3", + Sha1 => "SHA1", + Sha2 => "SHA2", + Sha3 => "SHA3", + Blake2b => "BLAKE2b", // Note the lowercase b in the end here. + + // For backward compatibility + Sha224 => "SHA224", + Sha256 => "SHA256", + Sha384 => "SHA384", + Sha512 => "SHA512", + + Shake128 => "SHAKE128", + Shake256 => "SHAKE256", + Blake3 => "BLAKE3", + } + } + + /// Returns a string corresponding to the algorithm option in cksum `-a` + pub fn to_lowercase(self) -> &'static str { + use AlgoKind::*; + match self { + Sysv => "sysv", + Bsd => "bsd", + Crc => "crc", + Crc32b => "crc32b", + Md5 => "md5", + Sm3 => "sm3", + Sha1 => "sha1", + Sha2 => "sha2", + Sha3 => "sha3", + Blake2b => "blake2b", + + // For backward compatibility + Sha224 => "sha224", + Sha256 => "sha256", + Sha384 => "sha384", + Sha512 => "sha512", + + Shake128 => "shake128", + Shake256 => "shake256", + Blake3 => "blake3", + } + } + + pub fn is_legacy(self) -> bool { + use AlgoKind::*; + matches!(self, Sysv | Bsd | Crc | Crc32b) + } +} + pub struct HashAlgorithm { pub name: &'static str, pub create_fn: Box Box>, @@ -402,43 +537,43 @@ fn print_file_report( } } -pub fn detect_algo(algo: &str, length: Option) -> UResult { +pub fn detect_algo(algo: AlgoKind, length: Option) -> UResult { match algo { - ALGORITHM_OPTIONS_SYSV => Ok(HashAlgorithm { + AlgoKind::Sysv => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_SYSV, create_fn: Box::new(|| Box::new(SysV::new())), bits: 512, }), - ALGORITHM_OPTIONS_BSD => Ok(HashAlgorithm { + AlgoKind::Bsd => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_BSD, create_fn: Box::new(|| Box::new(Bsd::new())), bits: 1024, }), - ALGORITHM_OPTIONS_CRC => Ok(HashAlgorithm { + AlgoKind::Crc => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_CRC, create_fn: Box::new(|| Box::new(Crc::new())), bits: 256, }), - ALGORITHM_OPTIONS_CRC32B => Ok(HashAlgorithm { + AlgoKind::Crc32b => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_CRC32B, create_fn: Box::new(|| Box::new(CRC32B::new())), bits: 32, }), - ALGORITHM_OPTIONS_MD5 | "md5sum" => Ok(HashAlgorithm { + AlgoKind::Md5 => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_MD5, create_fn: Box::new(|| Box::new(Md5::new())), bits: 128, }), - ALGORITHM_OPTIONS_SHA1 | "sha1sum" => Ok(HashAlgorithm { + AlgoKind::Sha1 => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_SHA1, create_fn: Box::new(|| Box::new(Sha1::new())), bits: 160, }), - ALGORITHM_OPTIONS_SHA224 | "sha224sum" => Ok(create_sha2(224)?), - ALGORITHM_OPTIONS_SHA256 | "sha256sum" => Ok(create_sha2(256)?), - ALGORITHM_OPTIONS_SHA384 | "sha384sum" => Ok(create_sha2(384)?), - ALGORITHM_OPTIONS_SHA512 | "sha512sum" => Ok(create_sha2(512)?), - ALGORITHM_OPTIONS_BLAKE2B | "b2sum" => { + AlgoKind::Sha224 => Ok(create_sha2(224)?), + AlgoKind::Sha256 => Ok(create_sha2(256)?), + AlgoKind::Sha384 => Ok(create_sha2(384)?), + AlgoKind::Sha512 => Ok(create_sha2(512)?), + AlgoKind::Blake2b => { // Set default length to 512 if None let bits = length.unwrap_or(512); if bits == 512 { @@ -455,48 +590,50 @@ pub fn detect_algo(algo: &str, length: Option) -> UResult }) } } - ALGORITHM_OPTIONS_BLAKE3 | "b3sum" => Ok(HashAlgorithm { + AlgoKind::Blake3 => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_BLAKE3, create_fn: Box::new(|| Box::new(Blake3::new())), bits: 256, }), - ALGORITHM_OPTIONS_SM3 => Ok(HashAlgorithm { + AlgoKind::Sm3 => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_SM3, create_fn: Box::new(|| Box::new(Sm3::new())), bits: 512, }), - algo @ (ALGORITHM_OPTIONS_SHAKE128 | "shake128sum") => { - let bits = length.ok_or(ChecksumError::LengthRequired(algo.to_ascii_uppercase()))?; + AlgoKind::Shake128 => { + let bits = length.ok_or(ChecksumError::LengthRequired( + algo.to_uppercase().to_string(), + ))?; Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_SHAKE128, create_fn: Box::new(|| Box::new(Shake128::new())), bits, }) } - algo @ (ALGORITHM_OPTIONS_SHAKE256 | "shake256sum") => { - let bits = length.ok_or(ChecksumError::LengthRequired(algo.to_ascii_uppercase()))?; + AlgoKind::Shake256 => { + let bits = length.ok_or(ChecksumError::LengthRequired( + algo.to_uppercase().to_string(), + ))?; Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_SHAKE256, create_fn: Box::new(|| Box::new(Shake256::new())), bits, }) } - algo @ ALGORITHM_OPTIONS_SHA2 => { - let bits = validate_sha2_sha3_length(algo, length)?; - create_sha2(bits) - } - algo @ ALGORITHM_OPTIONS_SHA3 => { - let bits = validate_sha2_sha3_length(algo, length)?; - create_sha3(bits) - } - - // TODO: `hashsum` specific, to remove once hashsum is removed. - algo @ ("sha3-224" | "sha3-256" | "sha3-384" | "sha3-512") => { - let bits: usize = algo.strip_prefix("sha3-").unwrap().parse().unwrap(); - create_sha3(bits) + AlgoKind::Sha2 => { + let len = validate_sha2_sha3_length(algo, length)?; + create_sha2(len) } - - algo => Err(ChecksumError::UnknownAlgorithm(algo.into()).into()), + AlgoKind::Sha3 => { + let len = validate_sha2_sha3_length(algo, length)?; + create_sha3(len) + } // TODO: `hashsum` specific, to remove once hashsum is removed. + // algo @ ("sha3-224" | "sha3-256" | "sha3-384" | "sha3-512") => { + // let bits: usize = algo.strip_prefix("sha3-").unwrap().parse().unwrap(); + // create_sha3(bits) + // } + + // algo => Err(ChecksumError::UnknownAlgorithm(algo.into()).into()), } } @@ -843,11 +980,14 @@ fn get_input_file(filename: &OsStr) -> UResult> { /// Gets the algorithm name and length from the `LineInfo` if the algo-based format is matched. fn identify_algo_name_and_length( line_info: &LineInfo, - algo_name_input: Option<&str>, + algo_name_input: Option, last_algo: &mut Option, -) -> Result<(String, Option), LineCheckError> { +) -> Result<(AlgoKind, Option), LineCheckError> { let algo_from_line = line_info.algo_name.clone().unwrap_or_default(); - let line_algo = algo_from_line.to_lowercase(); + let Ok(line_algo) = AlgoKind::from_cksum(algo_from_line.to_lowercase()) else { + // Unknown algorithm + return Err(LineCheckError::ImproperlyFormatted); + }; *last_algo = Some(algo_from_line); // check if we are called with XXXsum (example: md5sum) but we detected a @@ -855,31 +995,21 @@ fn identify_algo_name_and_length( // // Also handle the case cksum -s sm3 but the file contains other formats if let Some(algo_name_input) = algo_name_input { - match (algo_name_input, line_algo.as_str()) { + match (algo_name_input, line_algo) { (l, r) if l == r => (), // Edge case for SHA2, which matches SHA(224|256|384|512) ( - ALGORITHM_OPTIONS_SHA2, - ALGORITHM_OPTIONS_SHA224 - | ALGORITHM_OPTIONS_SHA256 - | ALGORITHM_OPTIONS_SHA384 - | ALGORITHM_OPTIONS_SHA512, + AlgoKind::Sha2, + AlgoKind::Sha224 | AlgoKind::Sha256 | AlgoKind::Sha384 | AlgoKind::Sha512, ) => (), _ => return Err(LineCheckError::ImproperlyFormatted), } } - if !SUPPORTED_ALGORITHMS.contains(&line_algo.as_str()) { - // Not supported algo, leave early - return Err(LineCheckError::ImproperlyFormatted); - } - let bytes = if let Some(bitlen) = line_info.algo_bit_len { - match line_algo.as_str() { - ALGORITHM_OPTIONS_BLAKE2B if bitlen % 8 == 0 => Some(bitlen / 8), - ALGORITHM_OPTIONS_SHA2 | ALGORITHM_OPTIONS_SHA3 - if [224, 256, 384, 512].contains(&bitlen) => - { + match line_algo { + AlgoKind::Blake2b if bitlen % 8 == 0 => Some(bitlen / 8), + AlgoKind::Sha2 | AlgoKind::Sha3 if [224, 256, 384, 512].contains(&bitlen) => { Some(bitlen) } // Either @@ -892,7 +1022,7 @@ fn identify_algo_name_and_length( // the given length is wrong because it's not a multiple of 8. _ => return Err(LineCheckError::ImproperlyFormatted), } - } else if line_algo == ALGORITHM_OPTIONS_BLAKE2B { + } else if line_algo == AlgoKind::Blake2b { // Default length with BLAKE2b, Some(64) } else { @@ -943,26 +1073,26 @@ fn compute_and_check_digest_from_file( /// Check a digest checksum with non-algo based pre-treatment. fn process_algo_based_line( line_info: &LineInfo, - cli_algo_name: Option<&str>, + cli_algo_kind: Option, opts: ChecksumOptions, last_algo: &mut Option, ) -> Result<(), LineCheckError> { let filename_to_check = line_info.filename.as_slice(); - let (algo_name, algo_byte_len) = - identify_algo_name_and_length(line_info, cli_algo_name, last_algo)?; + let (algo_kind, algo_byte_len) = + identify_algo_name_and_length(line_info, cli_algo_kind, last_algo)?; // If the digest bitlen is known, we can check the format of the expected // checksum with it. - let digest_char_length_hint = match (algo_name.as_str(), algo_byte_len) { - (ALGORITHM_OPTIONS_BLAKE2B, Some(bytelen)) => Some(bytelen * 2), + let digest_char_length_hint = match (algo_kind, algo_byte_len) { + (AlgoKind::Blake2b, Some(bytelen)) => Some(bytelen * 2), _ => None, }; let expected_checksum = get_expected_digest_as_hex_string(line_info, digest_char_length_hint) .ok_or(LineCheckError::ImproperlyFormatted)?; - let algo = detect_algo(&algo_name, algo_byte_len)?; + let algo = detect_algo(algo_kind, algo_byte_len)?; compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts) } @@ -971,7 +1101,7 @@ fn process_algo_based_line( fn process_non_algo_based_line( line_number: usize, line_info: &LineInfo, - cli_algo_name: &str, + cli_algo_kind: AlgoKind, cli_algo_length: Option, opts: ChecksumOptions, ) -> Result<(), LineCheckError> { @@ -989,24 +1119,21 @@ fn process_non_algo_based_line( // When a specific algorithm name is input, use it and use the provided // bits except when dealing with blake2b, sha2 and sha3, where we will // detect the length. - let (algo_name, algo_byte_len) = match cli_algo_name { - ALGORITHM_OPTIONS_BLAKE2B => { + let (algo_kind, algo_byte_len) = match cli_algo_kind { + AlgoKind::Blake2b => { // division by 2 converts the length of the Blake2b checksum from // hexadecimal characters to bytes, as each byte is represented by // two hexadecimal characters. - ( - ALGORITHM_OPTIONS_BLAKE2B.to_string(), - Some(expected_checksum.len() / 2), - ) + (AlgoKind::Blake2b, Some(expected_checksum.len() / 2)) } - algo @ (ALGORITHM_OPTIONS_SHA2 | ALGORITHM_OPTIONS_SHA3) => { + algo @ (AlgoKind::Sha2 | AlgoKind::Sha3) => { // multiplication by 4 to get the number of bits - (algo.to_string(), Some(expected_checksum.len() * 4)) + (algo, Some(expected_checksum.len() * 4)) } - _ => (cli_algo_name.to_lowercase(), cli_algo_length), + _ => (cli_algo_kind, cli_algo_length), }; - let algo = detect_algo(&algo_name, algo_byte_len)?; + let algo = detect_algo(algo_kind, algo_byte_len)?; compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts) } @@ -1020,7 +1147,7 @@ fn process_non_algo_based_line( fn process_checksum_line( line: &OsStr, i: usize, - cli_algo_name: Option<&str>, + cli_algo_name: Option, cli_algo_length: Option, opts: ChecksumOptions, cached_line_format: &mut Option, @@ -1053,7 +1180,7 @@ fn process_checksum_line( fn process_checksum_file( filename_input: &OsStr, - cli_algo_name: Option<&str>, + cli_algo_kind: Option, cli_algo_length: Option, opts: ChecksumOptions, ) -> Result<(), FileCheckError> { @@ -1090,7 +1217,7 @@ fn process_checksum_file( let line_result = process_checksum_line( line, i, - cli_algo_name, + cli_algo_kind, cli_algo_length, opts, &mut cached_line_format, @@ -1114,12 +1241,12 @@ fn process_checksum_file( res.bad_format += 1; if opts.verbose.at_least_warning() { - let algo = if let Some(algo_name_input) = cli_algo_name { - Cow::Owned(algo_name_input.to_uppercase()) + let algo = if let Some(algo_name_input) = cli_algo_kind { + algo_name_input.to_uppercase() } else if let Some(algo) = &last_algo { - Cow::Borrowed(algo.as_str()) + algo.as_str() } else { - Cow::Borrowed("Unknown algorithm") + "Unknown algorithm" }; eprintln!( "{}: {}: {}: improperly formatted {algo} checksum line", @@ -1183,7 +1310,7 @@ fn process_checksum_file( /// Do the checksum validation (can be strict or not) pub fn perform_checksum_validation<'a, I>( files: I, - algo_name_input: Option<&str>, + algo_kind: Option, length_input: Option, opts: ChecksumOptions, ) -> UResult<()> @@ -1195,7 +1322,7 @@ where // if cksum has several input files, it will print the result for each file for filename_input in files { use FileCheckError::*; - match process_checksum_file(filename_input, algo_name_input, length_input, opts) { + match process_checksum_file(filename_input, algo_kind, length_input, opts) { Err(UError(e)) => return Err(e), Err(Failed | CantOpenChecksumFile) => failed = true, Ok(_) => (), @@ -1279,18 +1406,18 @@ pub fn calculate_blake2b_length_str(length: &str) -> UResult> { } } -pub fn validate_sha2_sha3_length(algo_name: &str, length: Option) -> UResult { +pub fn validate_sha2_sha3_length(algo_name: AlgoKind, length: Option) -> UResult { match length { Some(len @ (224 | 256 | 384 | 512)) => Ok(len), Some(len) => { show_error!("{}", ChecksumError::InvalidLength(len.to_string())); - Err(ChecksumError::InvalidLengthForSha(algo_name.to_ascii_uppercase()).into()) + Err(ChecksumError::InvalidLengthForSha(algo_name.to_uppercase().into()).into()) } - None => Err(ChecksumError::LengthRequiredForSha(algo_name.into()).into()), + None => Err(ChecksumError::LengthRequiredForSha(algo_name.to_lowercase().into()).into()), } } -pub fn sanitize_sha2_sha3_length_str(algo_name: &str, length: &str) -> UResult { +pub fn sanitize_sha2_sha3_length_str(algo_kind: AlgoKind, length: &str) -> UResult { // There is a difference in the errors sent when the length is not a number // vs. its an invalid number. // @@ -1302,7 +1429,7 @@ pub fn sanitize_sha2_sha3_length_str(algo_name: &str, length: &str) -> UResult { show_error!("{}", ChecksumError::InvalidLength(length.into())); - return Err(ChecksumError::InvalidLengthForSha(algo_name.to_ascii_uppercase()).into()); + return Err(ChecksumError::InvalidLengthForSha(algo_kind.to_uppercase().into()).into()); } Err(_) => return Err(ChecksumError::InvalidLength(length.into()).into()), }; @@ -1311,7 +1438,7 @@ pub fn sanitize_sha2_sha3_length_str(algo_name: &str, length: &str) -> UResult Date: Mon, 3 Nov 2025 23:25:22 +0100 Subject: [PATCH 04/10] checksum: Introduce `SizedAlgoKind` to improve representation and cleanup --- src/uu/cksum/src/cksum.rs | 42 ++--- src/uu/hashsum/src/hashsum.rs | 26 +-- src/uucore/src/lib/features/checksum.rs | 211 ++++++++++++++++++------ 3 files changed, 183 insertions(+), 96 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index cdd9de33f85..8aef74ba514 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -13,11 +13,9 @@ use std::io::{BufReader, Read, Write, stdin, stdout}; use std::iter; use std::path::Path; use uucore::checksum::{ - ALGORITHM_OPTIONS_BLAKE2B, ALGORITHM_OPTIONS_BSD, ALGORITHM_OPTIONS_CRC, - ALGORITHM_OPTIONS_CRC32B, ALGORITHM_OPTIONS_SYSV, AlgoKind, ChecksumError, ChecksumOptions, - ChecksumVerbose, HashAlgorithm, LEGACY_ALGORITHMS, SUPPORTED_ALGORITHMS, - calculate_blake2b_length_str, detect_algo, digest_reader, perform_checksum_validation, - sanitize_sha2_sha3_length_str, + AlgoKind, ChecksumError, ChecksumOptions, ChecksumVerbose, HashAlgorithm, SUPPORTED_ALGORITHMS, + SizedAlgoKind, calculate_blake2b_length_str, detect_algo, digest_reader, + perform_checksum_validation, sanitize_sha2_sha3_length_str, }; use uucore::translate; @@ -31,10 +29,9 @@ use uucore::{ }; struct Options { - algo_name: &'static str, + algo_kind: SizedAlgoKind, digest: Box, output_bits: usize, - length: Option, output_format: OutputFormat, line_ending: LineEnding, } @@ -108,16 +105,16 @@ fn print_legacy_checksum( sum: &str, size: usize, ) -> UResult<()> { - debug_assert!(LEGACY_ALGORITHMS.contains(&options.algo_name)); + debug_assert!(options.algo_kind.is_legacy()); // Print the sum - match options.algo_name { - ALGORITHM_OPTIONS_SYSV => print!( + match options.algo_kind { + SizedAlgoKind::Sysv => print!( "{} {}", sum.parse::().unwrap(), size.div_ceil(options.output_bits), ), - ALGORITHM_OPTIONS_BSD => { + SizedAlgoKind::Bsd => { // The BSD checksum output is 5 digit integer let bsd_width = 5; print!( @@ -126,7 +123,7 @@ fn print_legacy_checksum( size.div_ceil(options.output_bits), ); } - ALGORITHM_OPTIONS_CRC | ALGORITHM_OPTIONS_CRC32B => { + SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { print!("{sum} {size}"); } _ => unreachable!("Not a legacy algorithm"), @@ -143,15 +140,7 @@ fn print_legacy_checksum( fn print_tagged_checksum(options: &Options, filename: &OsStr, sum: &String) -> UResult<()> { // Print algo name and opening parenthesis. - print!( - "{} (", - match (options.algo_name, options.length) { - // Multiply the length by 8, as we want to print the length in bits. - (ALGORITHM_OPTIONS_BLAKE2B, Some(l)) => format!("BLAKE2b-{}", l * 8), - (ALGORITHM_OPTIONS_BLAKE2B, None) => "BLAKE2b".into(), - (name, _) => name.to_ascii_uppercase(), - } - ); + print!("{} (", options.algo_kind.to_tag()); // Print filename let _dropped_result = stdout().write_all(os_str_as_bytes(filename)?); @@ -235,11 +224,11 @@ where match options.output_format { OutputFormat::Raw => { - let bytes = match options.algo_name { - ALGORITHM_OPTIONS_CRC | ALGORITHM_OPTIONS_CRC32B => { + let bytes = match options.algo_kind { + SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { sum_hex.parse::().unwrap().to_be_bytes().to_vec() } - ALGORITHM_OPTIONS_SYSV | ALGORITHM_OPTIONS_BSD => { + SizedAlgoKind::Sysv | SizedAlgoKind::Bsd => { sum_hex.parse::().unwrap().to_be_bytes().to_vec() } _ => hex::decode(sum_hex).unwrap(), @@ -343,7 +332,7 @@ fn figure_out_output_format( } // Then, if the algo is legacy, takes precedence over the rest - if LEGACY_ALGORITHMS.contains(&algo.name) { + if algo.kind.is_legacy() { return OutputFormat::Legacy; } @@ -465,10 +454,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { ); let opts = Options { - algo_name: algo.name, + algo_kind: algo.kind, digest: (algo.create_fn)(), output_bits: algo.bits, - length, output_format, line_ending, }; diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 4e020625275..dfd3caf7cbf 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -15,7 +15,6 @@ use std::io::{BufReader, Read, stdin}; use std::iter; use std::num::ParseIntError; use std::path::Path; -use uucore::checksum::ChecksumOptions; use uucore::checksum::ChecksumVerbose; use uucore::checksum::calculate_blake2b_length; use uucore::checksum::detect_algo; @@ -23,6 +22,7 @@ use uucore::checksum::digest_reader; use uucore::checksum::escape_filename; use uucore::checksum::perform_checksum_validation; use uucore::checksum::{AlgoKind, ChecksumError}; +use uucore::checksum::{ChecksumOptions, SizedAlgoKind}; use uucore::error::{UResult, strip_errno}; use uucore::format_usage; use uucore::sum::Digest; @@ -33,7 +33,7 @@ const NAME: &str = "hashsum"; const READ_BUFFER_SIZE: usize = 32 * 1024; struct Options<'a> { - algoname: &'static str, + algo: SizedAlgoKind, digest: Box, binary: bool, binary_name: &'a str, @@ -246,7 +246,7 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { let algo = detect_algo(algo_kind, length)?; let opts = Options { - algoname: algo.name, + algo: algo.kind, digest: (algo.create_fn)(), output_bits: algo.bits, binary, @@ -549,22 +549,10 @@ where let (escaped_filename, prefix) = escape_filename(filename); if options.tag { - if options.algoname == "blake2b" { - if options.digest.output_bits() == 512 { - println!("BLAKE2b ({escaped_filename}) = {sum}"); - } else { - // special case for BLAKE2b with non-default output length - println!( - "BLAKE2b-{} ({escaped_filename}) = {sum}", - options.digest.output_bits() - ); - } - } else { - println!( - "{prefix}{} ({escaped_filename}) = {sum}", - options.algoname.to_ascii_uppercase() - ); - } + println!( + "{prefix}{} ({escaped_filename}) = {sum}", + options.algo.to_tag() + ); } else if options.nonames { println!("{sum}"); } else if options.zero { diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index e71d8135cba..3437a0a6e70 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -72,13 +72,6 @@ pub const SUPPORTED_ALGORITHMS: [&str; 17] = [ ALGORITHM_OPTIONS_SHAKE256, ]; -pub const LEGACY_ALGORITHMS: [&str; 4] = [ - ALGORITHM_OPTIONS_SYSV, - ALGORITHM_OPTIONS_BSD, - ALGORITHM_OPTIONS_CRC, - ALGORITHM_OPTIONS_CRC32B, -]; - /// Represents an algorithm kind. In some cases, it is not sufficient by itself /// to know which algorithm to use exactly, because it lacks a digest length, /// which is why [`SizedAlgoKind`] exists. @@ -212,8 +205,127 @@ impl AlgoKind { } } +/// Holds a length for a SHA2 of SHA3 algorithm kind. +#[derive(Debug, Clone, Copy)] +pub enum ShaLength { + Len224, + Len256, + Len384, + Len512, +} + +impl ShaLength { + pub fn as_usize(self) -> usize { + match self { + Self::Len224 => 224, + Self::Len256 => 256, + Self::Len384 => 384, + Self::Len512 => 512, + } + } +} + +impl TryFrom for ShaLength { + type Error = ChecksumError; + + fn try_from(value: usize) -> Result { + use ShaLength::*; + match value { + 224 => Ok(Len224), + 256 => Ok(Len256), + 384 => Ok(Len384), + 512 => Ok(Len512), + _ => Err(ChecksumError::InvalidLengthForSha(value.to_string())), + } + } +} + +/// Represents an actual determined algorithm. +#[derive(Debug, Clone, Copy)] +pub enum SizedAlgoKind { + Sysv, + Bsd, + Crc, + Crc32b, + Md5, + Sm3, + Sha1, + Blake3, + Sha2(ShaLength), + Sha3(ShaLength), + Blake2b(Option), + Shake128(usize), + Shake256(usize), +} + +impl SizedAlgoKind { + pub fn from_unsized(kind: AlgoKind, length: Option) -> UResult { + use AlgoKind as ak; + match (kind, length) { + ( + ak::Sysv + | ak::Bsd + | ak::Crc + | ak::Crc32b + | ak::Md5 + | ak::Sm3 + | ak::Sha1 + | ak::Blake3 + | ak::Sha224 + | ak::Sha256 + | ak::Sha384 + | ak::Sha512, + Some(_), + ) => Err(ChecksumError::LengthOnlyForBlake2bSha2Sha3.into()), + + (ak::Sysv, _) => Ok(Self::Sysv), + (ak::Bsd, _) => Ok(Self::Bsd), + (ak::Crc, _) => Ok(Self::Crc), + (ak::Crc32b, _) => Ok(Self::Crc32b), + (ak::Md5, _) => Ok(Self::Md5), + (ak::Sm3, _) => Ok(Self::Sm3), + (ak::Sha1, _) => Ok(Self::Sha1), + (ak::Blake3, _) => Ok(Self::Blake3), + + (ak::Shake128, Some(l)) => Ok(Self::Shake128(l)), + (ak::Shake256, Some(l)) => Ok(Self::Shake256(l)), + (ak::Sha2, Some(l)) => Ok(Self::Sha2(ShaLength::try_from(l)?)), + (ak::Sha3, Some(l)) => Ok(Self::Sha3(ShaLength::try_from(l)?)), + (ak::Blake2b, Some(l)) => Ok(Self::Blake2b(calculate_blake2b_length(l)?)), + + (ak::Sha224, None) => Ok(Self::Sha2(ShaLength::Len224)), + (ak::Sha256, None) => Ok(Self::Sha2(ShaLength::Len256)), + (ak::Sha384, None) => Ok(Self::Sha2(ShaLength::Len384)), + (ak::Sha512, None) => Ok(Self::Sha2(ShaLength::Len512)), + (_, None) => Err(ChecksumError::LengthRequired(kind.to_uppercase().into()).into()), + } + } + + pub fn to_tag(&self) -> String { + use SizedAlgoKind::*; + match self { + Md5 => "MD5".into(), + Sm3 => "SM3".into(), + Sha1 => "SHA1".into(), + Blake3 => "BLAKE3".into(), + Sha2(len) => format!("SHA{}", len.as_usize()), + Sha3(len) => format!("SHA3-{}", len.as_usize()), + Blake2b(Some(len)) => format!("BLAKE2b-{}", len * 8), + Blake2b(None) => "BLAKE2b".into(), + Shake128(_) => "SHAKE128".into(), + Shake256(_) => "SHAKE256".into(), + Sysv | Bsd | Crc | Crc32b => panic!("Should not be used for tagging"), + } + } + + pub fn is_legacy(&self) -> bool { + use SizedAlgoKind::*; + matches!(self, Sysv | Bsd | Crc | Crc32b) + } +} + pub struct HashAlgorithm { - pub name: &'static str, + pub kind: SizedAlgoKind, pub create_fn: Box Box>, pub bits: usize, } @@ -399,57 +511,53 @@ impl UError for ChecksumError { /// /// Returns a `UResult` with an `HashAlgorithm` or an `Err` if an unsupported /// output size is provided. -pub fn create_sha3(bits: usize) -> UResult { - match bits { - 224 => Ok(HashAlgorithm { - name: "SHA3-224", +pub fn create_sha3(len: ShaLength) -> UResult { + match len { + ShaLength::Len224 => Ok(HashAlgorithm { + kind: SizedAlgoKind::Sha3(ShaLength::Len224), create_fn: Box::new(|| Box::new(Sha3_224::new())), bits: 224, }), - 256 => Ok(HashAlgorithm { - name: "SHA3-256", + ShaLength::Len256 => Ok(HashAlgorithm { + kind: SizedAlgoKind::Sha3(ShaLength::Len256), create_fn: Box::new(|| Box::new(Sha3_256::new())), bits: 256, }), - 384 => Ok(HashAlgorithm { - name: "SHA3-384", + ShaLength::Len384 => Ok(HashAlgorithm { + kind: SizedAlgoKind::Sha3(ShaLength::Len384), create_fn: Box::new(|| Box::new(Sha3_384::new())), bits: 384, }), - 512 => Ok(HashAlgorithm { - name: "SHA3-512", + ShaLength::Len512 => Ok(HashAlgorithm { + kind: SizedAlgoKind::Sha3(ShaLength::Len512), create_fn: Box::new(|| Box::new(Sha3_512::new())), bits: 512, }), - - _ => Err(ChecksumError::InvalidLengthForSha("SHA3".into()).into()), } } -pub fn create_sha2(bits: usize) -> UResult { - match bits { - 224 => Ok(HashAlgorithm { - name: "SHA224", +pub fn create_sha2(len: ShaLength) -> UResult { + match len { + ShaLength::Len224 => Ok(HashAlgorithm { + kind: SizedAlgoKind::Sha2(ShaLength::Len224), create_fn: Box::new(|| Box::new(Sha224::new())), bits: 224, }), - 256 => Ok(HashAlgorithm { - name: "SHA256", + ShaLength::Len256 => Ok(HashAlgorithm { + kind: SizedAlgoKind::Sha2(ShaLength::Len256), create_fn: Box::new(|| Box::new(Sha256::new())), bits: 256, }), - 384 => Ok(HashAlgorithm { - name: "SHA384", + ShaLength::Len384 => Ok(HashAlgorithm { + kind: SizedAlgoKind::Sha2(ShaLength::Len384), create_fn: Box::new(|| Box::new(Sha384::new())), bits: 384, }), - 512 => Ok(HashAlgorithm { - name: "SHA512", + ShaLength::Len512 => Ok(HashAlgorithm { + kind: SizedAlgoKind::Sha2(ShaLength::Len512), create_fn: Box::new(|| Box::new(Sha512::new())), bits: 512, }), - - _ => Err(ChecksumError::InvalidLengthForSha("SHA2".into()).into()), } } @@ -540,63 +648,63 @@ fn print_file_report( pub fn detect_algo(algo: AlgoKind, length: Option) -> UResult { match algo { AlgoKind::Sysv => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_SYSV, + kind: SizedAlgoKind::Sysv, create_fn: Box::new(|| Box::new(SysV::new())), bits: 512, }), AlgoKind::Bsd => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_BSD, + kind: SizedAlgoKind::Bsd, create_fn: Box::new(|| Box::new(Bsd::new())), bits: 1024, }), AlgoKind::Crc => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_CRC, + kind: SizedAlgoKind::Crc, create_fn: Box::new(|| Box::new(Crc::new())), bits: 256, }), AlgoKind::Crc32b => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_CRC32B, + kind: SizedAlgoKind::Crc32b, create_fn: Box::new(|| Box::new(CRC32B::new())), bits: 32, }), AlgoKind::Md5 => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_MD5, + kind: SizedAlgoKind::Md5, create_fn: Box::new(|| Box::new(Md5::new())), bits: 128, }), AlgoKind::Sha1 => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_SHA1, + kind: SizedAlgoKind::Sha1, create_fn: Box::new(|| Box::new(Sha1::new())), bits: 160, }), - AlgoKind::Sha224 => Ok(create_sha2(224)?), - AlgoKind::Sha256 => Ok(create_sha2(256)?), - AlgoKind::Sha384 => Ok(create_sha2(384)?), - AlgoKind::Sha512 => Ok(create_sha2(512)?), + AlgoKind::Sha224 => Ok(create_sha2(ShaLength::Len224)?), + AlgoKind::Sha256 => Ok(create_sha2(ShaLength::Len256)?), + AlgoKind::Sha384 => Ok(create_sha2(ShaLength::Len384)?), + AlgoKind::Sha512 => Ok(create_sha2(ShaLength::Len512)?), AlgoKind::Blake2b => { // Set default length to 512 if None let bits = length.unwrap_or(512); if bits == 512 { Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_BLAKE2B, + kind: SizedAlgoKind::Blake2b(None), create_fn: Box::new(move || Box::new(Blake2b::new())), bits: 512, }) } else { Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_BLAKE2B, + kind: SizedAlgoKind::Blake2b(Some(bits)), create_fn: Box::new(move || Box::new(Blake2b::with_output_bytes(bits))), bits, }) } } AlgoKind::Blake3 => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_BLAKE3, + kind: SizedAlgoKind::Blake3, create_fn: Box::new(|| Box::new(Blake3::new())), bits: 256, }), AlgoKind::Sm3 => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_SM3, + kind: SizedAlgoKind::Sm3, create_fn: Box::new(|| Box::new(Sm3::new())), bits: 512, }), @@ -605,7 +713,7 @@ pub fn detect_algo(algo: AlgoKind, length: Option) -> UResult) -> UResult UResult> { } } -pub fn validate_sha2_sha3_length(algo_name: AlgoKind, length: Option) -> UResult { +pub fn validate_sha2_sha3_length(algo_name: AlgoKind, length: Option) -> UResult { match length { - Some(len @ (224 | 256 | 384 | 512)) => Ok(len), + Some(224) => Ok(ShaLength::Len224), + Some(256) => Ok(ShaLength::Len256), + Some(384) => Ok(ShaLength::Len384), + Some(512) => Ok(ShaLength::Len512), Some(len) => { show_error!("{}", ChecksumError::InvalidLength(len.to_string())); Err(ChecksumError::InvalidLengthForSha(algo_name.to_uppercase().into()).into()) From 923baae8536ed14350a2bad7f924b95e11a7ae18 Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Tue, 4 Nov 2025 00:46:10 +0100 Subject: [PATCH 05/10] checksum: Always get bit length from the SizedAlgoKind --- src/uu/cksum/src/cksum.rs | 18 ++++---- src/uu/hashsum/src/hashsum.rs | 6 +-- src/uucore/src/lib/features/checksum.rs | 58 ++++++++++++------------- 3 files changed, 39 insertions(+), 43 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 8aef74ba514..46ccad41e4d 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) fname, algo +// spell-checker:ignore (ToDO) fname, algo, bitlen use clap::builder::ValueParser; use clap::{Arg, ArgAction, Command}; @@ -31,7 +31,6 @@ use uucore::{ struct Options { algo_kind: SizedAlgoKind, digest: Box, - output_bits: usize, output_format: OutputFormat, line_ending: LineEnding, } @@ -112,7 +111,7 @@ fn print_legacy_checksum( SizedAlgoKind::Sysv => print!( "{} {}", sum.parse::().unwrap(), - size.div_ceil(options.output_bits), + size.div_ceil(options.algo_kind.bitlen()), ), SizedAlgoKind::Bsd => { // The BSD checksum output is 5 digit integer @@ -120,7 +119,7 @@ fn print_legacy_checksum( print!( "{:0bsd_width$} {:bsd_width$}", sum.parse::().unwrap(), - size.div_ceil(options.output_bits), + size.div_ceil(options.algo_kind.bitlen()), ); } SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { @@ -209,9 +208,13 @@ where Box::new(file_buf) as Box }); - let (sum_hex, sz) = - digest_reader(&mut options.digest, &mut file, false, options.output_bits) - .map_err_context(|| translate!("cksum-error-failed-to-read-input"))?; + let (sum_hex, sz) = digest_reader( + &mut options.digest, + &mut file, + false, + options.algo_kind.bitlen(), + ) + .map_err_context(|| translate!("cksum-error-failed-to-read-input"))?; // Encodes the sum if df is Base64, leaves as-is otherwise. let encode_sum = |sum: String, df: DigestFormat| { @@ -456,7 +459,6 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let opts = Options { algo_kind: algo.kind, digest: (algo.create_fn)(), - output_bits: algo.bits, output_format, line_ending, }; diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index dfd3caf7cbf..6eebf651dc6 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) algo, algoname, regexes, nread, nonames +// spell-checker:ignore (ToDO) algo, algoname, bitlen, regexes, nread, nonames use clap::ArgAction; use clap::builder::ValueParser; @@ -44,7 +44,6 @@ struct Options<'a> { //quiet: bool, //strict: bool, //warn: bool, - output_bits: usize, zero: bool, //ignore_missing: bool, } @@ -248,7 +247,6 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { let opts = Options { algo: algo.kind, digest: (algo.create_fn)(), - output_bits: algo.bits, binary, binary_name: &binary_name, tag: matches.get_flag("tag"), @@ -532,7 +530,7 @@ where &mut options.digest, &mut file, options.binary, - options.output_bits, + options.algo.bitlen(), ) { Ok((sum, _)) => sum, Err(e) => { diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 3437a0a6e70..b008dd98649 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -301,7 +301,7 @@ impl SizedAlgoKind { } } - pub fn to_tag(&self) -> String { + pub fn to_tag(self) -> String { use SizedAlgoKind::*; match self { Md5 => "MD5".into(), @@ -318,6 +318,24 @@ impl SizedAlgoKind { } } + pub fn bitlen(&self) -> usize { + use SizedAlgoKind::*; + match self { + Sysv => 512, + Bsd => 1024, + Crc => 256, + Crc32b => 32, + Md5 => 128, + Sm3 => 512, + Sha1 => 160, + Blake3 => 256, + Sha2(len) => len.as_usize(), + Sha3(len) => len.as_usize(), + Blake2b(len) => len.unwrap_or(512), + Shake128(len) => *len, + Shake256(len) => *len, + } + } pub fn is_legacy(&self) -> bool { use SizedAlgoKind::*; matches!(self, Sysv | Bsd | Crc | Crc32b) @@ -327,7 +345,6 @@ impl SizedAlgoKind { pub struct HashAlgorithm { pub kind: SizedAlgoKind, pub create_fn: Box Box>, - pub bits: usize, } /// This structure holds the count of checksum test lines' outcomes. @@ -516,22 +533,18 @@ pub fn create_sha3(len: ShaLength) -> UResult { ShaLength::Len224 => Ok(HashAlgorithm { kind: SizedAlgoKind::Sha3(ShaLength::Len224), create_fn: Box::new(|| Box::new(Sha3_224::new())), - bits: 224, }), ShaLength::Len256 => Ok(HashAlgorithm { kind: SizedAlgoKind::Sha3(ShaLength::Len256), create_fn: Box::new(|| Box::new(Sha3_256::new())), - bits: 256, }), ShaLength::Len384 => Ok(HashAlgorithm { kind: SizedAlgoKind::Sha3(ShaLength::Len384), create_fn: Box::new(|| Box::new(Sha3_384::new())), - bits: 384, }), ShaLength::Len512 => Ok(HashAlgorithm { kind: SizedAlgoKind::Sha3(ShaLength::Len512), create_fn: Box::new(|| Box::new(Sha3_512::new())), - bits: 512, }), } } @@ -541,22 +554,18 @@ pub fn create_sha2(len: ShaLength) -> UResult { ShaLength::Len224 => Ok(HashAlgorithm { kind: SizedAlgoKind::Sha2(ShaLength::Len224), create_fn: Box::new(|| Box::new(Sha224::new())), - bits: 224, }), ShaLength::Len256 => Ok(HashAlgorithm { kind: SizedAlgoKind::Sha2(ShaLength::Len256), create_fn: Box::new(|| Box::new(Sha256::new())), - bits: 256, }), ShaLength::Len384 => Ok(HashAlgorithm { kind: SizedAlgoKind::Sha2(ShaLength::Len384), create_fn: Box::new(|| Box::new(Sha384::new())), - bits: 384, }), ShaLength::Len512 => Ok(HashAlgorithm { kind: SizedAlgoKind::Sha2(ShaLength::Len512), create_fn: Box::new(|| Box::new(Sha512::new())), - bits: 512, }), } } @@ -650,32 +659,26 @@ pub fn detect_algo(algo: AlgoKind, length: Option) -> UResult Ok(HashAlgorithm { kind: SizedAlgoKind::Sysv, create_fn: Box::new(|| Box::new(SysV::new())), - bits: 512, }), AlgoKind::Bsd => Ok(HashAlgorithm { kind: SizedAlgoKind::Bsd, create_fn: Box::new(|| Box::new(Bsd::new())), - bits: 1024, }), AlgoKind::Crc => Ok(HashAlgorithm { kind: SizedAlgoKind::Crc, create_fn: Box::new(|| Box::new(Crc::new())), - bits: 256, }), AlgoKind::Crc32b => Ok(HashAlgorithm { kind: SizedAlgoKind::Crc32b, create_fn: Box::new(|| Box::new(CRC32B::new())), - bits: 32, }), AlgoKind::Md5 => Ok(HashAlgorithm { kind: SizedAlgoKind::Md5, create_fn: Box::new(|| Box::new(Md5::new())), - bits: 128, }), AlgoKind::Sha1 => Ok(HashAlgorithm { kind: SizedAlgoKind::Sha1, create_fn: Box::new(|| Box::new(Sha1::new())), - bits: 160, }), AlgoKind::Sha224 => Ok(create_sha2(ShaLength::Len224)?), AlgoKind::Sha256 => Ok(create_sha2(ShaLength::Len256)?), @@ -688,25 +691,21 @@ pub fn detect_algo(algo: AlgoKind, length: Option) -> UResult Ok(HashAlgorithm { kind: SizedAlgoKind::Blake3, create_fn: Box::new(|| Box::new(Blake3::new())), - bits: 256, }), AlgoKind::Sm3 => Ok(HashAlgorithm { kind: SizedAlgoKind::Sm3, create_fn: Box::new(|| Box::new(Sm3::new())), - bits: 512, }), AlgoKind::Shake128 => { let bits = length.ok_or(ChecksumError::LengthRequired( @@ -715,7 +714,6 @@ pub fn detect_algo(algo: AlgoKind, length: Option) -> UResult { @@ -725,7 +723,6 @@ pub fn detect_algo(algo: AlgoKind, length: Option) -> UResult { @@ -735,13 +732,7 @@ pub fn detect_algo(algo: AlgoKind, length: Option) -> UResult { let len = validate_sha2_sha3_length(algo, length)?; create_sha3(len) - } // TODO: `hashsum` specific, to remove once hashsum is removed. - // algo @ ("sha3-224" | "sha3-256" | "sha3-384" | "sha3-512") => { - // let bits: usize = algo.strip_prefix("sha3-").unwrap().parse().unwrap(); - // create_sha3(bits) - // } - - // algo => Err(ChecksumError::UnknownAlgorithm(algo.into()).into()), + } } } @@ -1158,8 +1149,13 @@ fn compute_and_check_digest_from_file( // Read the file and calculate the checksum let create_fn = &mut algo.create_fn; let mut digest = create_fn(); - let (calculated_checksum, _) = - digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap(); + let (calculated_checksum, _) = digest_reader( + &mut digest, + &mut file_reader, + opts.binary, + algo.kind.bitlen(), + ) + .unwrap(); // Do the checksum validation let checksum_correct = expected_checksum == calculated_checksum; From 68dc6fd1470b3515890b8fc7c5950042f6b5f731 Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Tue, 4 Nov 2025 02:18:36 +0100 Subject: [PATCH 06/10] checksum: Get rid of detect_algo and child functions --- src/uu/cksum/src/cksum.rs | 18 +- src/uu/hashsum/src/hashsum.rs | 7 +- src/uucore/src/lib/features/checksum.rs | 302 ++++-------------------- 3 files changed, 63 insertions(+), 264 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 46ccad41e4d..b80139f67ae 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -13,9 +13,9 @@ use std::io::{BufReader, Read, Write, stdin, stdout}; use std::iter; use std::path::Path; use uucore::checksum::{ - AlgoKind, ChecksumError, ChecksumOptions, ChecksumVerbose, HashAlgorithm, SUPPORTED_ALGORITHMS, - SizedAlgoKind, calculate_blake2b_length_str, detect_algo, digest_reader, - perform_checksum_validation, sanitize_sha2_sha3_length_str, + AlgoKind, ChecksumError, ChecksumOptions, ChecksumVerbose, SUPPORTED_ALGORITHMS, SizedAlgoKind, + calculate_blake2b_length_str, digest_reader, perform_checksum_validation, + sanitize_sha2_sha3_length_str, }; use uucore::translate; @@ -323,7 +323,7 @@ fn handle_tag_text_binary_flags>( /// Use already-processed arguments to decide the output format. fn figure_out_output_format( - algo: &HashAlgorithm, + algo: SizedAlgoKind, tag: bool, binary: bool, raw: bool, @@ -335,7 +335,7 @@ fn figure_out_output_format( } // Then, if the algo is legacy, takes precedence over the rest - if algo.kind.is_legacy() { + if algo.is_legacy() { return OutputFormat::Legacy; } @@ -445,11 +445,11 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let (tag, binary) = handle_tag_text_binary_flags(std::env::args_os())?; - let algo = detect_algo(algo_kind, length)?; + let algo = SizedAlgoKind::from_unsized(algo_kind, length)?; let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO)); let output_format = figure_out_output_format( - &algo, + algo, tag, binary, matches.get_flag(options::RAW), @@ -457,8 +457,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { ); let opts = Options { - algo_kind: algo.kind, - digest: (algo.create_fn)(), + algo_kind: algo, + digest: algo.create_digest(), output_format, line_ending, }; diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 6eebf651dc6..c9f600c32f8 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -17,7 +17,6 @@ use std::num::ParseIntError; use std::path::Path; use uucore::checksum::ChecksumVerbose; use uucore::checksum::calculate_blake2b_length; -use uucore::checksum::detect_algo; use uucore::checksum::digest_reader; use uucore::checksum::escape_filename; use uucore::checksum::perform_checksum_validation; @@ -242,11 +241,11 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { .unwrap_or(&false); let zero = matches.get_flag("zero"); - let algo = detect_algo(algo_kind, length)?; + let algo = SizedAlgoKind::from_unsized(algo_kind, length)?; let opts = Options { - algo: algo.kind, - digest: (algo.create_fn)(), + algo, + digest: algo.create_digest(), binary, binary_name: &binary_name, tag: matches.get_flag("tag"), diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index b008dd98649..aed5b911103 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -253,15 +253,16 @@ pub enum SizedAlgoKind { Blake3, Sha2(ShaLength), Sha3(ShaLength), + // Note: we store Blake2b's length as BYTES. Blake2b(Option), Shake128(usize), Shake256(usize), } impl SizedAlgoKind { - pub fn from_unsized(kind: AlgoKind, length: Option) -> UResult { + pub fn from_unsized(kind: AlgoKind, byte_length: Option) -> UResult { use AlgoKind as ak; - match (kind, length) { + match (kind, byte_length) { ( ak::Sysv | ak::Bsd @@ -291,7 +292,13 @@ impl SizedAlgoKind { (ak::Shake256, Some(l)) => Ok(Self::Shake256(l)), (ak::Sha2, Some(l)) => Ok(Self::Sha2(ShaLength::try_from(l)?)), (ak::Sha3, Some(l)) => Ok(Self::Sha3(ShaLength::try_from(l)?)), - (ak::Blake2b, Some(l)) => Ok(Self::Blake2b(calculate_blake2b_length(l)?)), + (algo @ (ak::Sha2 | ak::Sha3), None) => { + Err(ChecksumError::LengthRequiredForSha(algo.to_lowercase().into()).into()) + } + // [`calculate_blake2b_length`] expects a length in bits but we + // have a length in bytes. + (ak::Blake2b, Some(l)) => Ok(Self::Blake2b(calculate_blake2b_length(8 * l)?)), + (ak::Blake2b, None) => Ok(Self::Blake2b(None)), (ak::Sha224, None) => Ok(Self::Sha2(ShaLength::Len224)), (ak::Sha256, None) => Ok(Self::Sha2(ShaLength::Len256)), @@ -310,7 +317,7 @@ impl SizedAlgoKind { Blake3 => "BLAKE3".into(), Sha2(len) => format!("SHA{}", len.as_usize()), Sha3(len) => format!("SHA3-{}", len.as_usize()), - Blake2b(Some(len)) => format!("BLAKE2b-{}", len * 8), + Blake2b(Some(byte_len)) => format!("BLAKE2b-{}", byte_len * 8), Blake2b(None) => "BLAKE2b".into(), Shake128(_) => "SHAKE128".into(), Shake256(_) => "SHAKE256".into(), @@ -318,6 +325,32 @@ impl SizedAlgoKind { } } + pub fn create_digest(&self) -> Box { + use ShaLength::*; + match self { + Self::Sysv => Box::new(SysV::new()), + Self::Bsd => Box::new(Bsd::new()), + Self::Crc => Box::new(Crc::new()), + Self::Crc32b => Box::new(CRC32B::new()), + Self::Md5 => Box::new(Md5::new()), + Self::Sm3 => Box::new(Sm3::new()), + Self::Sha1 => Box::new(Sha1::new()), + Self::Blake3 => Box::new(Blake3::new()), + Self::Sha2(Len224) => Box::new(Sha224::new()), + Self::Sha2(Len256) => Box::new(Sha256::new()), + Self::Sha2(Len384) => Box::new(Sha384::new()), + Self::Sha2(Len512) => Box::new(Sha512::new()), + Self::Sha3(Len224) => Box::new(Sha3_224::new()), + Self::Sha3(Len256) => Box::new(Sha3_256::new()), + Self::Sha3(Len384) => Box::new(Sha3_384::new()), + Self::Sha3(Len512) => Box::new(Sha3_512::new()), + Self::Blake2b(Some(byte_len)) => Box::new(Blake2b::with_output_bytes(*byte_len)), + Self::Blake2b(None) => Box::new(Blake2b::new()), + Self::Shake128(_) => Box::new(Shake128::new()), + Self::Shake256(_) => Box::new(Shake256::new()), + } + } + pub fn bitlen(&self) -> usize { use SizedAlgoKind::*; match self { @@ -342,11 +375,6 @@ impl SizedAlgoKind { } } -pub struct HashAlgorithm { - pub kind: SizedAlgoKind, - pub create_fn: Box Box>, -} - /// This structure holds the count of checksum test lines' outcomes. #[derive(Default)] struct ChecksumResult { @@ -522,54 +550,6 @@ impl UError for ChecksumError { } } -/// Creates a SHA3 hasher instance based on the specified bits argument. -/// -/// # Returns -/// -/// Returns a `UResult` with an `HashAlgorithm` or an `Err` if an unsupported -/// output size is provided. -pub fn create_sha3(len: ShaLength) -> UResult { - match len { - ShaLength::Len224 => Ok(HashAlgorithm { - kind: SizedAlgoKind::Sha3(ShaLength::Len224), - create_fn: Box::new(|| Box::new(Sha3_224::new())), - }), - ShaLength::Len256 => Ok(HashAlgorithm { - kind: SizedAlgoKind::Sha3(ShaLength::Len256), - create_fn: Box::new(|| Box::new(Sha3_256::new())), - }), - ShaLength::Len384 => Ok(HashAlgorithm { - kind: SizedAlgoKind::Sha3(ShaLength::Len384), - create_fn: Box::new(|| Box::new(Sha3_384::new())), - }), - ShaLength::Len512 => Ok(HashAlgorithm { - kind: SizedAlgoKind::Sha3(ShaLength::Len512), - create_fn: Box::new(|| Box::new(Sha3_512::new())), - }), - } -} - -pub fn create_sha2(len: ShaLength) -> UResult { - match len { - ShaLength::Len224 => Ok(HashAlgorithm { - kind: SizedAlgoKind::Sha2(ShaLength::Len224), - create_fn: Box::new(|| Box::new(Sha224::new())), - }), - ShaLength::Len256 => Ok(HashAlgorithm { - kind: SizedAlgoKind::Sha2(ShaLength::Len256), - create_fn: Box::new(|| Box::new(Sha256::new())), - }), - ShaLength::Len384 => Ok(HashAlgorithm { - kind: SizedAlgoKind::Sha2(ShaLength::Len384), - create_fn: Box::new(|| Box::new(Sha384::new())), - }), - ShaLength::Len512 => Ok(HashAlgorithm { - kind: SizedAlgoKind::Sha2(ShaLength::Len512), - create_fn: Box::new(|| Box::new(Sha512::new())), - }), - } -} - #[allow(clippy::comparison_chain)] fn print_cksum_report(res: &ChecksumResult) { if res.bad_format == 1 { @@ -654,88 +634,6 @@ fn print_file_report( } } -pub fn detect_algo(algo: AlgoKind, length: Option) -> UResult { - match algo { - AlgoKind::Sysv => Ok(HashAlgorithm { - kind: SizedAlgoKind::Sysv, - create_fn: Box::new(|| Box::new(SysV::new())), - }), - AlgoKind::Bsd => Ok(HashAlgorithm { - kind: SizedAlgoKind::Bsd, - create_fn: Box::new(|| Box::new(Bsd::new())), - }), - AlgoKind::Crc => Ok(HashAlgorithm { - kind: SizedAlgoKind::Crc, - create_fn: Box::new(|| Box::new(Crc::new())), - }), - AlgoKind::Crc32b => Ok(HashAlgorithm { - kind: SizedAlgoKind::Crc32b, - create_fn: Box::new(|| Box::new(CRC32B::new())), - }), - AlgoKind::Md5 => Ok(HashAlgorithm { - kind: SizedAlgoKind::Md5, - create_fn: Box::new(|| Box::new(Md5::new())), - }), - AlgoKind::Sha1 => Ok(HashAlgorithm { - kind: SizedAlgoKind::Sha1, - create_fn: Box::new(|| Box::new(Sha1::new())), - }), - AlgoKind::Sha224 => Ok(create_sha2(ShaLength::Len224)?), - AlgoKind::Sha256 => Ok(create_sha2(ShaLength::Len256)?), - AlgoKind::Sha384 => Ok(create_sha2(ShaLength::Len384)?), - AlgoKind::Sha512 => Ok(create_sha2(ShaLength::Len512)?), - AlgoKind::Blake2b => { - // Set default length to 512 if None - let bits = length.unwrap_or(512); - if bits == 512 { - Ok(HashAlgorithm { - kind: SizedAlgoKind::Blake2b(None), - create_fn: Box::new(move || Box::new(Blake2b::new())), - }) - } else { - Ok(HashAlgorithm { - kind: SizedAlgoKind::Blake2b(Some(bits)), - create_fn: Box::new(move || Box::new(Blake2b::with_output_bytes(bits))), - }) - } - } - AlgoKind::Blake3 => Ok(HashAlgorithm { - kind: SizedAlgoKind::Blake3, - create_fn: Box::new(|| Box::new(Blake3::new())), - }), - AlgoKind::Sm3 => Ok(HashAlgorithm { - kind: SizedAlgoKind::Sm3, - create_fn: Box::new(|| Box::new(Sm3::new())), - }), - AlgoKind::Shake128 => { - let bits = length.ok_or(ChecksumError::LengthRequired( - algo.to_uppercase().to_string(), - ))?; - Ok(HashAlgorithm { - kind: SizedAlgoKind::Shake128(bits), - create_fn: Box::new(|| Box::new(Shake128::new())), - }) - } - AlgoKind::Shake256 => { - let bits = length.ok_or(ChecksumError::LengthRequired( - algo.to_uppercase().to_string(), - ))?; - Ok(HashAlgorithm { - kind: SizedAlgoKind::Shake256(bits), - create_fn: Box::new(|| Box::new(Shake256::new())), - }) - } - AlgoKind::Sha2 => { - let len = validate_sha2_sha3_length(algo, length)?; - create_sha2(len) - } - AlgoKind::Sha3 => { - let len = validate_sha2_sha3_length(algo, length)?; - create_sha3(len) - } - } -} - #[derive(Debug, PartialEq, Eq, Clone, Copy)] enum LineFormat { AlgoBased, @@ -1136,7 +1034,7 @@ fn identify_algo_name_and_length( fn compute_and_check_digest_from_file( filename: &[u8], expected_checksum: &str, - mut algo: HashAlgorithm, + algo: SizedAlgoKind, opts: ChecksumOptions, ) -> Result<(), LineCheckError> { let (filename_to_check_unescaped, prefix) = unescape_filename(filename); @@ -1147,15 +1045,9 @@ fn compute_and_check_digest_from_file( let mut file_reader = BufReader::new(file_to_check); // Read the file and calculate the checksum - let create_fn = &mut algo.create_fn; - let mut digest = create_fn(); - let (calculated_checksum, _) = digest_reader( - &mut digest, - &mut file_reader, - opts.binary, - algo.kind.bitlen(), - ) - .unwrap(); + let mut digest = algo.create_digest(); + let (calculated_checksum, _) = + digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bitlen()).unwrap(); // Do the checksum validation let checksum_correct = expected_checksum == calculated_checksum; @@ -1196,7 +1088,7 @@ fn process_algo_based_line( let expected_checksum = get_expected_digest_as_hex_string(line_info, digest_char_length_hint) .ok_or(LineCheckError::ImproperlyFormatted)?; - let algo = detect_algo(algo_kind, algo_byte_len)?; + let algo = SizedAlgoKind::from_unsized(algo_kind, algo_byte_len)?; compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts) } @@ -1237,7 +1129,7 @@ fn process_non_algo_based_line( _ => (cli_algo_kind, cli_algo_length), }; - let algo = detect_algo(algo_kind, algo_byte_len)?; + let algo = SizedAlgoKind::from_unsized(algo_kind, algo_byte_len)?; compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts) } @@ -1475,29 +1367,29 @@ pub fn digest_reader( } /// Calculates the length of the digest. -pub fn calculate_blake2b_length(length: usize) -> UResult> { - calculate_blake2b_length_str(length.to_string().as_str()) +pub fn calculate_blake2b_length(bit_length: usize) -> UResult> { + calculate_blake2b_length_str(bit_length.to_string().as_str()) } /// Calculates the length of the digest. -pub fn calculate_blake2b_length_str(length: &str) -> UResult> { +pub fn calculate_blake2b_length_str(bit_length: &str) -> UResult> { // Blake2b's length is parsed in an u64. - match length.parse::() { + match bit_length.parse::() { Ok(0) => Ok(None), // Error cases Ok(n) if n > 512 => { - show_error!("{}", ChecksumError::InvalidLength(length.into())); + show_error!("{}", ChecksumError::InvalidLength(bit_length.into())); Err(ChecksumError::LengthTooBigForBlake("BLAKE2b".into()).into()) } Err(e) if *e.kind() == IntErrorKind::PosOverflow => { - show_error!("{}", ChecksumError::InvalidLength(length.into())); + show_error!("{}", ChecksumError::InvalidLength(bit_length.into())); Err(ChecksumError::LengthTooBigForBlake("BLAKE2b".into()).into()) } - Err(_) => Err(ChecksumError::InvalidLength(length.into()).into()), + Err(_) => Err(ChecksumError::InvalidLength(bit_length.into()).into()), Ok(n) if n % 8 != 0 => { - show_error!("{}", ChecksumError::InvalidLength(length.into())); + show_error!("{}", ChecksumError::InvalidLength(bit_length.into())); Err(ChecksumError::LengthNotMultipleOf8.into()) } @@ -1636,98 +1528,6 @@ mod tests { assert_eq!(calculate_blake2b_length(256).unwrap(), Some(32)); } - // #[test] - // fn test_detect_algo() { - // assert_eq!( - // detect_algo(ALGORITHM_OPTIONS_SYSV, None).unwrap().name, - // ALGORITHM_OPTIONS_SYSV - // ); - // assert_eq!( - // detect_algo(ALGORITHM_OPTIONS_BSD, None).unwrap().name, - // ALGORITHM_OPTIONS_BSD - // ); - // assert_eq!( - // detect_algo(ALGORITHM_OPTIONS_CRC, None).unwrap().name, - // ALGORITHM_OPTIONS_CRC - // ); - // assert_eq!( - // detect_algo(ALGORITHM_OPTIONS_MD5, None).unwrap().name, - // ALGORITHM_OPTIONS_MD5 - // ); - // assert_eq!( - // detect_algo(ALGORITHM_OPTIONS_SHA1, None).unwrap().name, - // ALGORITHM_OPTIONS_SHA1 - // ); - // assert_eq!( - // detect_algo(ALGORITHM_OPTIONS_SHA224, None).unwrap().name, - // ALGORITHM_OPTIONS_SHA224.to_ascii_uppercase() - // ); - // assert_eq!( - // detect_algo(ALGORITHM_OPTIONS_SHA256, None).unwrap().name, - // ALGORITHM_OPTIONS_SHA256.to_ascii_uppercase() - // ); - // assert_eq!( - // detect_algo(ALGORITHM_OPTIONS_SHA384, None).unwrap().name, - // ALGORITHM_OPTIONS_SHA384.to_ascii_uppercase() - // ); - // assert_eq!( - // detect_algo(ALGORITHM_OPTIONS_SHA512, None).unwrap().name, - // ALGORITHM_OPTIONS_SHA512.to_ascii_uppercase() - // ); - // assert_eq!( - // detect_algo(ALGORITHM_OPTIONS_BLAKE2B, None).unwrap().name, - // ALGORITHM_OPTIONS_BLAKE2B - // ); - // assert_eq!( - // detect_algo(ALGORITHM_OPTIONS_BLAKE3, None).unwrap().name, - // ALGORITHM_OPTIONS_BLAKE3 - // ); - // assert_eq!( - // detect_algo(ALGORITHM_OPTIONS_SM3, None).unwrap().name, - // ALGORITHM_OPTIONS_SM3 - // ); - // assert_eq!( - // detect_algo(ALGORITHM_OPTIONS_SHAKE128, Some(128)) - // .unwrap() - // .name, - // ALGORITHM_OPTIONS_SHAKE128 - // ); - // assert_eq!( - // detect_algo(ALGORITHM_OPTIONS_SHAKE256, Some(256)) - // .unwrap() - // .name, - // ALGORITHM_OPTIONS_SHAKE256 - // ); - - // // Older versions of checksum used to detect the "sha3" prefix, but not - // // anymore. - // assert!(detect_algo("sha3_224", Some(224)).is_err()); - // assert!(detect_algo("sha3_256", Some(256)).is_err()); - // assert!(detect_algo("sha3_384", Some(384)).is_err()); - // assert!(detect_algo("sha3_512", Some(512)).is_err()); - - // let sha3_224 = detect_algo("sha3", Some(224)).unwrap(); - // assert_eq!(sha3_224.name, "SHA3-224"); - // assert_eq!(sha3_224.bits, 224); - // let sha3_256 = detect_algo("sha3", Some(256)).unwrap(); - // assert_eq!(sha3_256.name, "SHA3-256"); - // assert_eq!(sha3_256.bits, 256); - // let sha3_384 = detect_algo("sha3", Some(384)).unwrap(); - // assert_eq!(sha3_384.name, "SHA3-384"); - // assert_eq!(sha3_384.bits, 384); - // let sha3_512 = detect_algo("sha3", Some(512)).unwrap(); - // assert_eq!(sha3_512.name, "SHA3-512"); - // assert_eq!(sha3_512.bits, 512); - - // assert!(detect_algo("sha3", None).is_err()); - - // assert_eq!(detect_algo("sha2", Some(224)).unwrap().name, "SHA224"); - // assert_eq!(detect_algo("sha2", Some(256)).unwrap().name, "SHA256"); - // assert_eq!(detect_algo("sha2", Some(384)).unwrap().name, "SHA384"); - // assert_eq!(detect_algo("sha2", Some(512)).unwrap().name, "SHA512"); - - // assert!(detect_algo("sha2", None).is_err()); - // } #[test] fn test_algo_based_parser() { #[allow(clippy::type_complexity)] From 0b7289310d274bfe678a66a2e0cb253a82b9b6d6 Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Thu, 6 Nov 2025 16:04:54 +0100 Subject: [PATCH 07/10] checksum: Create a sub-module for checksum validation --- src/uu/cksum/src/cksum.rs | 9 +- src/uu/hashsum/src/hashsum.rs | 27 +- src/uucore/src/lib/features/checksum/mod.rs | 614 +++++++++++++++ .../{checksum.rs => checksum/validate.rs} | 710 ++---------------- 4 files changed, 689 insertions(+), 671 deletions(-) create mode 100644 src/uucore/src/lib/features/checksum/mod.rs rename src/uucore/src/lib/features/{checksum.rs => checksum/validate.rs} (66%) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index b80139f67ae..a0a5c00df32 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -12,10 +12,10 @@ use std::fs::File; use std::io::{BufReader, Read, Write, stdin, stdout}; use std::iter; use std::path::Path; +use uucore::checksum::validate::{ChecksumOptions, ChecksumVerbose, perform_checksum_validation}; use uucore::checksum::{ - AlgoKind, ChecksumError, ChecksumOptions, ChecksumVerbose, SUPPORTED_ALGORITHMS, SizedAlgoKind, - calculate_blake2b_length_str, digest_reader, perform_checksum_validation, - sanitize_sha2_sha3_length_str, + AlgoKind, ChecksumError, SUPPORTED_ALGORITHMS, SizedAlgoKind, calculate_blake2b_length_str, + digest_reader, sanitize_sha2_sha3_length_str, }; use uucore::translate; @@ -428,8 +428,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // Execute the checksum validation based on the presence of files or the use of stdin let verbose = ChecksumVerbose::new(status, quiet, warn); - let opts = ChecksumOptions { - binary: binary_flag, + let opts = ChecksumValidateOptions { ignore_missing, strict, verbose, diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index c9f600c32f8..61bd0f0ff6c 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -15,17 +15,17 @@ use std::io::{BufReader, Read, stdin}; use std::iter; use std::num::ParseIntError; use std::path::Path; -use uucore::checksum::ChecksumVerbose; -use uucore::checksum::calculate_blake2b_length; -use uucore::checksum::digest_reader; -use uucore::checksum::escape_filename; -use uucore::checksum::perform_checksum_validation; -use uucore::checksum::{AlgoKind, ChecksumError}; -use uucore::checksum::{ChecksumOptions, SizedAlgoKind}; + +use uucore::checksum::validate::{ + ChecksumValidateOptions, ChecksumVerbose, perform_checksum_validation, +}; +use uucore::checksum::{ + AlgoKind, ChecksumError, SizedAlgoKind, calculate_blake2b_length, digest_reader, + escape_filename, +}; use uucore::error::{UResult, strip_errno}; -use uucore::format_usage; use uucore::sum::Digest; -use uucore::translate; +use uucore::{format_usage, translate}; const NAME: &str = "hashsum"; // Using the same read buffer size as GNU @@ -200,16 +200,14 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { // on Windows, allow --binary/--text to be used with --check // and keep the behavior of defaulting to binary #[cfg(not(windows))] - let binary = { + { let text_flag = matches.get_flag("text"); let binary_flag = matches.get_flag("binary"); if binary_flag || text_flag { return Err(ChecksumError::BinaryTextConflict.into()); } - - false - }; + } // Execute the checksum validation based on the presence of files or the use of stdin // Determine the source of input: a list of files or stdin. @@ -220,8 +218,7 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { let verbose = ChecksumVerbose::new(status, quiet, warn); - let opts = ChecksumOptions { - binary, + let opts = ChecksumValidateOptions { ignore_missing, strict, verbose, diff --git a/src/uucore/src/lib/features/checksum/mod.rs b/src/uucore/src/lib/features/checksum/mod.rs new file mode 100644 index 00000000000..29af7a49131 --- /dev/null +++ b/src/uucore/src/lib/features/checksum/mod.rs @@ -0,0 +1,614 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// spell-checker:ignore anotherfile invalidchecksum JWZG FFFD xffname prefixfilename bytelen bitlen hexdigit rsplit + +use os_display::Quotable; +use std::{ + io::{self, Read}, + num::IntErrorKind, + path::Path, +}; + +use crate::{ + error::{UError, UResult}, + show_error, + sum::{ + Blake2b, Blake3, Bsd, CRC32B, Crc, Digest, DigestWriter, Md5, Sha1, Sha3_224, Sha3_256, + Sha3_384, Sha3_512, Sha224, Sha256, Sha384, Sha512, Shake128, Shake256, Sm3, SysV, + }, +}; +use thiserror::Error; + +pub mod validate; + +pub const ALGORITHM_OPTIONS_SYSV: &str = "sysv"; +pub const ALGORITHM_OPTIONS_BSD: &str = "bsd"; +pub const ALGORITHM_OPTIONS_CRC: &str = "crc"; +pub const ALGORITHM_OPTIONS_CRC32B: &str = "crc32b"; +pub const ALGORITHM_OPTIONS_MD5: &str = "md5"; +pub const ALGORITHM_OPTIONS_SHA1: &str = "sha1"; +pub const ALGORITHM_OPTIONS_SHA2: &str = "sha2"; +pub const ALGORITHM_OPTIONS_SHA3: &str = "sha3"; + +pub const ALGORITHM_OPTIONS_SHA224: &str = "sha224"; +pub const ALGORITHM_OPTIONS_SHA256: &str = "sha256"; +pub const ALGORITHM_OPTIONS_SHA384: &str = "sha384"; +pub const ALGORITHM_OPTIONS_SHA512: &str = "sha512"; +pub const ALGORITHM_OPTIONS_BLAKE2B: &str = "blake2b"; +pub const ALGORITHM_OPTIONS_BLAKE3: &str = "blake3"; +pub const ALGORITHM_OPTIONS_SM3: &str = "sm3"; +pub const ALGORITHM_OPTIONS_SHAKE128: &str = "shake128"; +pub const ALGORITHM_OPTIONS_SHAKE256: &str = "shake256"; + +pub const SUPPORTED_ALGORITHMS: [&str; 17] = [ + ALGORITHM_OPTIONS_SYSV, + ALGORITHM_OPTIONS_BSD, + ALGORITHM_OPTIONS_CRC, + ALGORITHM_OPTIONS_CRC32B, + ALGORITHM_OPTIONS_MD5, + ALGORITHM_OPTIONS_SHA1, + ALGORITHM_OPTIONS_SHA2, + ALGORITHM_OPTIONS_SHA3, + ALGORITHM_OPTIONS_BLAKE2B, + ALGORITHM_OPTIONS_SM3, + // Legacy aliases for -a sha2 -l xxx + ALGORITHM_OPTIONS_SHA224, + ALGORITHM_OPTIONS_SHA256, + ALGORITHM_OPTIONS_SHA384, + ALGORITHM_OPTIONS_SHA512, + // Extra algorithms that are not valid `cksum --algorithm` as per GNU. + // TODO: Should we keep them or drop them to align our support with GNU ? + ALGORITHM_OPTIONS_BLAKE3, + ALGORITHM_OPTIONS_SHAKE128, + ALGORITHM_OPTIONS_SHAKE256, +]; + +/// Represents an algorithm kind. In some cases, it is not sufficient by itself +/// to know which algorithm to use exactly, because it lacks a digest length, +/// which is why [`SizedAlgoKind`] exists. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AlgoKind { + Sysv, + Bsd, + Crc, + Crc32b, + Md5, + Sm3, + Sha1, + Sha2, + Sha3, + Blake2b, + + // Available in cksum for backward compatibility + Sha224, + Sha256, + Sha384, + Sha512, + + // Not available in cksum + Shake128, + Shake256, + Blake3, +} + +impl AlgoKind { + /// Parses an [`AlgoKind`] from a string, only accepting valid cksum + /// `--algorithm` values. + pub fn from_cksum(algo: impl AsRef) -> UResult { + use AlgoKind::*; + Ok(match algo.as_ref() { + ALGORITHM_OPTIONS_SYSV => Sysv, + ALGORITHM_OPTIONS_BSD => Bsd, + ALGORITHM_OPTIONS_CRC => Crc, + ALGORITHM_OPTIONS_CRC32B => Crc32b, + ALGORITHM_OPTIONS_MD5 => Md5, + ALGORITHM_OPTIONS_SHA1 => Sha1, + ALGORITHM_OPTIONS_SHA2 => Sha2, + ALGORITHM_OPTIONS_SHA3 => Sha3, + ALGORITHM_OPTIONS_BLAKE2B => Blake2b, + ALGORITHM_OPTIONS_SM3 => Sm3, + + // For backward compatibility + ALGORITHM_OPTIONS_SHA224 => Sha224, + ALGORITHM_OPTIONS_SHA256 => Sha256, + ALGORITHM_OPTIONS_SHA384 => Sha384, + ALGORITHM_OPTIONS_SHA512 => Sha512, + _ => return Err(ChecksumError::UnknownAlgorithm(algo.as_ref().to_string()).into()), + }) + } + + /// Parses an algo kind from a string, accepting standalone binary names. + pub fn from_bin_name(algo: impl AsRef) -> UResult { + use AlgoKind::*; + Ok(match algo.as_ref() { + "md5sum" => Md5, + "sha1sum" => Sha1, + "sha224sum" => Sha224, + "sha256sum" => Sha256, + "sha384sum" => Sha384, + "sha512sum" => Sha512, + "sha3sum" => Sha3, + "b2sum" => Blake2b, + + _ => return Err(ChecksumError::UnknownAlgorithm(algo.as_ref().to_string()).into()), + }) + } + + /// Returns a string corresponding to the algorithm kind. + pub fn to_uppercase(self) -> &'static str { + use AlgoKind::*; + match self { + // Legacy algorithms + Sysv => "SYSV", + Bsd => "BSD", + Crc => "CRC", + Crc32b => "CRC32B", + + Md5 => "MD5", + Sm3 => "SM3", + Sha1 => "SHA1", + Sha2 => "SHA2", + Sha3 => "SHA3", + Blake2b => "BLAKE2b", // Note the lowercase b in the end here. + + // For backward compatibility + Sha224 => "SHA224", + Sha256 => "SHA256", + Sha384 => "SHA384", + Sha512 => "SHA512", + + Shake128 => "SHAKE128", + Shake256 => "SHAKE256", + Blake3 => "BLAKE3", + } + } + + /// Returns a string corresponding to the algorithm option in cksum `-a` + pub fn to_lowercase(self) -> &'static str { + use AlgoKind::*; + match self { + Sysv => "sysv", + Bsd => "bsd", + Crc => "crc", + Crc32b => "crc32b", + Md5 => "md5", + Sm3 => "sm3", + Sha1 => "sha1", + Sha2 => "sha2", + Sha3 => "sha3", + Blake2b => "blake2b", + + // For backward compatibility + Sha224 => "sha224", + Sha256 => "sha256", + Sha384 => "sha384", + Sha512 => "sha512", + + Shake128 => "shake128", + Shake256 => "shake256", + Blake3 => "blake3", + } + } + + pub fn is_legacy(self) -> bool { + use AlgoKind::*; + matches!(self, Sysv | Bsd | Crc | Crc32b) + } +} + +/// Holds a length for a SHA2 of SHA3 algorithm kind. +#[derive(Debug, Clone, Copy)] +pub enum ShaLength { + Len224, + Len256, + Len384, + Len512, +} + +impl ShaLength { + pub fn as_usize(self) -> usize { + match self { + Self::Len224 => 224, + Self::Len256 => 256, + Self::Len384 => 384, + Self::Len512 => 512, + } + } +} + +impl TryFrom for ShaLength { + type Error = ChecksumError; + + fn try_from(value: usize) -> Result { + use ShaLength::*; + match value { + 224 => Ok(Len224), + 256 => Ok(Len256), + 384 => Ok(Len384), + 512 => Ok(Len512), + _ => Err(ChecksumError::InvalidLengthForSha(value.to_string())), + } + } +} + +/// Represents an actual determined algorithm. +#[derive(Debug, Clone, Copy)] +pub enum SizedAlgoKind { + Sysv, + Bsd, + Crc, + Crc32b, + Md5, + Sm3, + Sha1, + Blake3, + Sha2(ShaLength), + Sha3(ShaLength), + // Note: we store Blake2b's length as BYTES. + Blake2b(Option), + Shake128(usize), + Shake256(usize), +} + +impl SizedAlgoKind { + pub fn from_unsized(kind: AlgoKind, byte_length: Option) -> UResult { + use AlgoKind as ak; + match (kind, byte_length) { + ( + ak::Sysv + | ak::Bsd + | ak::Crc + | ak::Crc32b + | ak::Md5 + | ak::Sm3 + | ak::Sha1 + | ak::Blake3 + | ak::Sha224 + | ak::Sha256 + | ak::Sha384 + | ak::Sha512, + Some(_), + ) => Err(ChecksumError::LengthOnlyForBlake2bSha2Sha3.into()), + + (ak::Sysv, _) => Ok(Self::Sysv), + (ak::Bsd, _) => Ok(Self::Bsd), + (ak::Crc, _) => Ok(Self::Crc), + (ak::Crc32b, _) => Ok(Self::Crc32b), + (ak::Md5, _) => Ok(Self::Md5), + (ak::Sm3, _) => Ok(Self::Sm3), + (ak::Sha1, _) => Ok(Self::Sha1), + (ak::Blake3, _) => Ok(Self::Blake3), + + (ak::Shake128, Some(l)) => Ok(Self::Shake128(l)), + (ak::Shake256, Some(l)) => Ok(Self::Shake256(l)), + (ak::Sha2, Some(l)) => Ok(Self::Sha2(ShaLength::try_from(l)?)), + (ak::Sha3, Some(l)) => Ok(Self::Sha3(ShaLength::try_from(l)?)), + (algo @ (ak::Sha2 | ak::Sha3), None) => { + Err(ChecksumError::LengthRequiredForSha(algo.to_lowercase().into()).into()) + } + // [`calculate_blake2b_length`] expects a length in bits but we + // have a length in bytes. + (ak::Blake2b, Some(l)) => Ok(Self::Blake2b(calculate_blake2b_length(8 * l)?)), + (ak::Blake2b, None) => Ok(Self::Blake2b(None)), + + (ak::Sha224, None) => Ok(Self::Sha2(ShaLength::Len224)), + (ak::Sha256, None) => Ok(Self::Sha2(ShaLength::Len256)), + (ak::Sha384, None) => Ok(Self::Sha2(ShaLength::Len384)), + (ak::Sha512, None) => Ok(Self::Sha2(ShaLength::Len512)), + (_, None) => Err(ChecksumError::LengthRequired(kind.to_uppercase().into()).into()), + } + } + + pub fn to_tag(self) -> String { + use SizedAlgoKind::*; + match self { + Md5 => "MD5".into(), + Sm3 => "SM3".into(), + Sha1 => "SHA1".into(), + Blake3 => "BLAKE3".into(), + Sha2(len) => format!("SHA{}", len.as_usize()), + Sha3(len) => format!("SHA3-{}", len.as_usize()), + Blake2b(Some(byte_len)) => format!("BLAKE2b-{}", byte_len * 8), + Blake2b(None) => "BLAKE2b".into(), + Shake128(_) => "SHAKE128".into(), + Shake256(_) => "SHAKE256".into(), + Sysv | Bsd | Crc | Crc32b => panic!("Should not be used for tagging"), + } + } + + pub fn create_digest(&self) -> Box { + use ShaLength::*; + match self { + Self::Sysv => Box::new(SysV::new()), + Self::Bsd => Box::new(Bsd::new()), + Self::Crc => Box::new(Crc::new()), + Self::Crc32b => Box::new(CRC32B::new()), + Self::Md5 => Box::new(Md5::new()), + Self::Sm3 => Box::new(Sm3::new()), + Self::Sha1 => Box::new(Sha1::new()), + Self::Blake3 => Box::new(Blake3::new()), + Self::Sha2(Len224) => Box::new(Sha224::new()), + Self::Sha2(Len256) => Box::new(Sha256::new()), + Self::Sha2(Len384) => Box::new(Sha384::new()), + Self::Sha2(Len512) => Box::new(Sha512::new()), + Self::Sha3(Len224) => Box::new(Sha3_224::new()), + Self::Sha3(Len256) => Box::new(Sha3_256::new()), + Self::Sha3(Len384) => Box::new(Sha3_384::new()), + Self::Sha3(Len512) => Box::new(Sha3_512::new()), + Self::Blake2b(Some(byte_len)) => Box::new(Blake2b::with_output_bytes(*byte_len)), + Self::Blake2b(None) => Box::new(Blake2b::new()), + Self::Shake128(_) => Box::new(Shake128::new()), + Self::Shake256(_) => Box::new(Shake256::new()), + } + } + + pub fn bitlen(&self) -> usize { + use SizedAlgoKind::*; + match self { + Sysv => 512, + Bsd => 1024, + Crc => 256, + Crc32b => 32, + Md5 => 128, + Sm3 => 512, + Sha1 => 160, + Blake3 => 256, + Sha2(len) => len.as_usize(), + Sha3(len) => len.as_usize(), + Blake2b(len) => len.unwrap_or(512), + Shake128(len) => *len, + Shake256(len) => *len, + } + } + pub fn is_legacy(&self) -> bool { + use SizedAlgoKind::*; + matches!(self, Sysv | Bsd | Crc | Crc32b) + } +} + +#[derive(Debug, Error)] +pub enum ChecksumError { + #[error("the --raw option is not supported with multiple files")] + RawMultipleFiles, + #[error("the --ignore-missing option is meaningful only when verifying checksums")] + IgnoreNotCheck, + #[error("the --strict option is meaningful only when verifying checksums")] + StrictNotCheck, + #[error("the --quiet option is meaningful only when verifying checksums")] + QuietNotCheck, + + // --length sanitization errors + #[error("--length required for {}", .0.quote())] + LengthRequired(String), + #[error("invalid length: {}", .0.quote())] + InvalidLength(String), + #[error("maximum digest length for {} is 512 bits", .0.quote())] + LengthTooBigForBlake(String), + #[error("length is not a multiple of 8")] + LengthNotMultipleOf8, + #[error("digest length for {} must be 224, 256, 384, or 512", .0.quote())] + InvalidLengthForSha(String), + #[error("--algorithm={0} requires specifying --length 224, 256, 384, or 512")] + LengthRequiredForSha(String), + #[error("--length is only supported with --algorithm blake2b, sha2, or sha3")] + LengthOnlyForBlake2bSha2Sha3, + + #[error("the --binary and --text options are meaningless when verifying checksums")] + BinaryTextConflict, + #[error("--text mode is only supported with --untagged")] + TextWithoutUntagged, + #[error("--check is not supported with --algorithm={{bsd,sysv,crc,crc32b}}")] + AlgorithmNotSupportedWithCheck, + #[error("You cannot combine multiple hash algorithms!")] + CombineMultipleAlgorithms, + #[error("Needs an algorithm to hash with.\nUse --help for more information.")] + NeedAlgorithmToHash, + #[error("unknown algorithm: {0}: clap should have prevented this case")] + UnknownAlgorithm(String), + #[error("")] + Io(#[from] io::Error), +} + +impl UError for ChecksumError { + fn code(&self) -> i32 { + 1 + } +} + +pub fn digest_reader( + digest: &mut Box, + reader: &mut T, + binary: bool, + output_bits: usize, +) -> io::Result<(String, usize)> { + digest.reset(); + + // Read bytes from `reader` and write those bytes to `digest`. + // + // If `binary` is `false` and the operating system is Windows, then + // `DigestWriter` replaces "\r\n" with "\n" before it writes the + // bytes into `digest`. Otherwise, it just inserts the bytes as-is. + // + // In order to support replacing "\r\n", we must call `finalize()` + // in order to support the possibility that the last character read + // from the reader was "\r". (This character gets buffered by + // `DigestWriter` and only written if the following character is + // "\n". But when "\r" is the last character read, we need to force + // it to be written.) + let mut digest_writer = DigestWriter::new(digest, binary); + let output_size = std::io::copy(reader, &mut digest_writer)? as usize; + digest_writer.finalize(); + + if digest.output_bits() > 0 { + Ok((digest.result_str(), output_size)) + } else { + // Assume it's SHAKE. result_str() doesn't work with shake (as of 8/30/2016) + let mut bytes = vec![0; output_bits.div_ceil(8)]; + digest.hash_finalize(&mut bytes); + Ok((hex::encode(bytes), output_size)) + } +} + +/// Calculates the length of the digest. +pub fn calculate_blake2b_length(bit_length: usize) -> UResult> { + calculate_blake2b_length_str(bit_length.to_string().as_str()) +} + +/// Calculates the length of the digest. +pub fn calculate_blake2b_length_str(bit_length: &str) -> UResult> { + // Blake2b's length is parsed in an u64. + match bit_length.parse::() { + Ok(0) => Ok(None), + + // Error cases + Ok(n) if n > 512 => { + show_error!("{}", ChecksumError::InvalidLength(bit_length.into())); + Err(ChecksumError::LengthTooBigForBlake("BLAKE2b".into()).into()) + } + Err(e) if *e.kind() == IntErrorKind::PosOverflow => { + show_error!("{}", ChecksumError::InvalidLength(bit_length.into())); + Err(ChecksumError::LengthTooBigForBlake("BLAKE2b".into()).into()) + } + Err(_) => Err(ChecksumError::InvalidLength(bit_length.into()).into()), + + Ok(n) if n % 8 != 0 => { + show_error!("{}", ChecksumError::InvalidLength(bit_length.into())); + Err(ChecksumError::LengthNotMultipleOf8.into()) + } + + // Valid cases + + // When length is 512, it is blake2b's default. So, don't show it + Ok(512) => Ok(None), + // Divide by 8, as our blake2b implementation expects bytes instead of bits. + Ok(n) => Ok(Some(n / 8)), + } +} + +pub fn validate_sha2_sha3_length(algo_name: AlgoKind, length: Option) -> UResult { + match length { + Some(224) => Ok(ShaLength::Len224), + Some(256) => Ok(ShaLength::Len256), + Some(384) => Ok(ShaLength::Len384), + Some(512) => Ok(ShaLength::Len512), + Some(len) => { + show_error!("{}", ChecksumError::InvalidLength(len.to_string())); + Err(ChecksumError::InvalidLengthForSha(algo_name.to_uppercase().into()).into()) + } + None => Err(ChecksumError::LengthRequiredForSha(algo_name.to_lowercase().into()).into()), + } +} + +pub fn sanitize_sha2_sha3_length_str(algo_kind: AlgoKind, length: &str) -> UResult { + // There is a difference in the errors sent when the length is not a number + // vs. its an invalid number. + // + // When inputting an invalid number, an extra error message it printed to + // remind of the accepted inputs. + let len = match length.parse::() { + Ok(l) => l, + // Note: Positive overflow while parsing counts as an invalid number, + // but a number still. + Err(e) if *e.kind() == IntErrorKind::PosOverflow => { + show_error!("{}", ChecksumError::InvalidLength(length.into())); + return Err(ChecksumError::InvalidLengthForSha(algo_kind.to_uppercase().into()).into()); + } + Err(_) => return Err(ChecksumError::InvalidLength(length.into()).into()), + }; + + if [224, 256, 384, 512].contains(&len) { + Ok(len) + } else { + show_error!("{}", ChecksumError::InvalidLength(length.into())); + Err(ChecksumError::InvalidLengthForSha(algo_kind.to_uppercase().into()).into()) + } +} + +pub fn unescape_filename(filename: &[u8]) -> (Vec, &'static str) { + let mut unescaped = Vec::with_capacity(filename.len()); + let mut byte_iter = filename.iter().peekable(); + loop { + let Some(byte) = byte_iter.next() else { + break; + }; + if *byte == b'\\' { + match byte_iter.next() { + Some(b'\\') => unescaped.push(b'\\'), + Some(b'n') => unescaped.push(b'\n'), + Some(b'r') => unescaped.push(b'\r'), + Some(x) => { + unescaped.push(b'\\'); + unescaped.push(*x); + } + _ => {} + } + } else { + unescaped.push(*byte); + } + } + let prefix = if unescaped == filename { "" } else { "\\" }; + (unescaped, prefix) +} + +pub fn escape_filename(filename: &Path) -> (String, &'static str) { + let original = filename.as_os_str().to_string_lossy(); + let escaped = original + .replace('\\', "\\\\") + .replace('\n', "\\n") + .replace('\r', "\\r"); + let prefix = if escaped == original { "" } else { "\\" }; + (escaped, prefix) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_unescape_filename() { + let (unescaped, prefix) = unescape_filename(b"test\\nfile.txt"); + assert_eq!(unescaped, b"test\nfile.txt"); + assert_eq!(prefix, "\\"); + let (unescaped, prefix) = unescape_filename(b"test\\nfile.txt"); + assert_eq!(unescaped, b"test\nfile.txt"); + assert_eq!(prefix, "\\"); + + let (unescaped, prefix) = unescape_filename(b"test\\rfile.txt"); + assert_eq!(unescaped, b"test\rfile.txt"); + assert_eq!(prefix, "\\"); + + let (unescaped, prefix) = unescape_filename(b"test\\\\file.txt"); + assert_eq!(unescaped, b"test\\file.txt"); + assert_eq!(prefix, "\\"); + } + + #[test] + fn test_escape_filename() { + let (escaped, prefix) = escape_filename(Path::new("testfile.txt")); + assert_eq!(escaped, "testfile.txt"); + assert_eq!(prefix, ""); + + let (escaped, prefix) = escape_filename(Path::new("test\nfile.txt")); + assert_eq!(escaped, "test\\nfile.txt"); + assert_eq!(prefix, "\\"); + + let (escaped, prefix) = escape_filename(Path::new("test\rfile.txt")); + assert_eq!(escaped, "test\\rfile.txt"); + assert_eq!(prefix, "\\"); + + let (escaped, prefix) = escape_filename(Path::new("test\\file.txt")); + assert_eq!(escaped, "test\\\\file.txt"); + assert_eq!(prefix, "\\"); + } + + #[test] + fn test_calculate_blake2b_length() { + assert_eq!(calculate_blake2b_length(0).unwrap(), None); + assert!(calculate_blake2b_length(10).is_err()); + assert!(calculate_blake2b_length(520).is_err()); + assert_eq!(calculate_blake2b_length(512).unwrap(), None); + assert_eq!(calculate_blake2b_length(256).unwrap(), Some(32)); + } +} diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum/validate.rs similarity index 66% rename from src/uucore/src/lib/features/checksum.rs rename to src/uucore/src/lib/features/checksum/validate.rs index aed5b911103..a1d851a05b6 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum/validate.rs @@ -2,377 +2,72 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore anotherfile invalidchecksum JWZG FFFD xffname prefixfilename bytelen bitlen hexdigit rsplit + +// spell-checker:ignore rsplit hexdigit bitlen bytelen invalidchecksum xffname + +use std::borrow::Cow; +use std::ffi::OsStr; +use std::fmt::Display; +use std::fs::File; +use std::io::{self, BufReader, Read, Write, stdin}; use data_encoding::BASE64; use os_display::Quotable; -use std::{ - borrow::Cow, - ffi::OsStr, - fmt::Display, - fs::File, - io::{self, BufReader, Read, Write, stdin}, - num::IntErrorKind, - path::Path, - str, -}; +use crate::checksum::{AlgoKind, ChecksumError, SizedAlgoKind, digest_reader, unescape_filename}; +use crate::error::{FromIo, UError, UResult, USimpleError}; +use crate::quoting_style::{QuotingStyle, locale_aware_escape_name}; use crate::{ - error::{FromIo, UError, UResult, USimpleError}, - os_str_as_bytes, os_str_from_bytes, - quoting_style::{QuotingStyle, locale_aware_escape_name}, - read_os_string_lines, show, show_error, show_warning_caps, - sum::{ - Blake2b, Blake3, Bsd, CRC32B, Crc, Digest, DigestWriter, Md5, Sha1, Sha3_224, Sha3_256, - Sha3_384, Sha3_512, Sha224, Sha256, Sha384, Sha512, Shake128, Shake256, Sm3, SysV, - }, + os_str_as_bytes, os_str_from_bytes, read_os_string_lines, show, show_error, show_warning_caps, util_name, }; -use thiserror::Error; - -pub const ALGORITHM_OPTIONS_SYSV: &str = "sysv"; -pub const ALGORITHM_OPTIONS_BSD: &str = "bsd"; -pub const ALGORITHM_OPTIONS_CRC: &str = "crc"; -pub const ALGORITHM_OPTIONS_CRC32B: &str = "crc32b"; -pub const ALGORITHM_OPTIONS_MD5: &str = "md5"; -pub const ALGORITHM_OPTIONS_SHA1: &str = "sha1"; -pub const ALGORITHM_OPTIONS_SHA2: &str = "sha2"; -pub const ALGORITHM_OPTIONS_SHA3: &str = "sha3"; - -pub const ALGORITHM_OPTIONS_SHA224: &str = "sha224"; -pub const ALGORITHM_OPTIONS_SHA256: &str = "sha256"; -pub const ALGORITHM_OPTIONS_SHA384: &str = "sha384"; -pub const ALGORITHM_OPTIONS_SHA512: &str = "sha512"; -pub const ALGORITHM_OPTIONS_BLAKE2B: &str = "blake2b"; -pub const ALGORITHM_OPTIONS_BLAKE3: &str = "blake3"; -pub const ALGORITHM_OPTIONS_SM3: &str = "sm3"; -pub const ALGORITHM_OPTIONS_SHAKE128: &str = "shake128"; -pub const ALGORITHM_OPTIONS_SHAKE256: &str = "shake256"; - -pub const SUPPORTED_ALGORITHMS: [&str; 17] = [ - ALGORITHM_OPTIONS_SYSV, - ALGORITHM_OPTIONS_BSD, - ALGORITHM_OPTIONS_CRC, - ALGORITHM_OPTIONS_CRC32B, - ALGORITHM_OPTIONS_MD5, - ALGORITHM_OPTIONS_SHA1, - ALGORITHM_OPTIONS_SHA2, - ALGORITHM_OPTIONS_SHA3, - ALGORITHM_OPTIONS_BLAKE2B, - ALGORITHM_OPTIONS_SM3, - // Legacy aliases for -a sha2 -l xxx - ALGORITHM_OPTIONS_SHA224, - ALGORITHM_OPTIONS_SHA256, - ALGORITHM_OPTIONS_SHA384, - ALGORITHM_OPTIONS_SHA512, - // Extra algorithms that are not valid `cksum --algorithm` as per GNU. - // TODO: Should we keep them or drop them to align our support with GNU ? - ALGORITHM_OPTIONS_BLAKE3, - ALGORITHM_OPTIONS_SHAKE128, - ALGORITHM_OPTIONS_SHAKE256, -]; - -/// Represents an algorithm kind. In some cases, it is not sufficient by itself -/// to know which algorithm to use exactly, because it lacks a digest length, -/// which is why [`SizedAlgoKind`] exists. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum AlgoKind { - Sysv, - Bsd, - Crc, - Crc32b, - Md5, - Sm3, - Sha1, - Sha2, - Sha3, - Blake2b, - - // Available in cksum for backward compatibility - Sha224, - Sha256, - Sha384, - Sha512, - - // Not available in cksum - Shake128, - Shake256, - Blake3, -} - -impl AlgoKind { - /// Parses an [`AlgoKind`] from a string, only accepting valid cksum - /// `--algorithm` values. - pub fn from_cksum(algo: impl AsRef) -> UResult { - use AlgoKind::*; - Ok(match algo.as_ref() { - ALGORITHM_OPTIONS_SYSV => Sysv, - ALGORITHM_OPTIONS_BSD => Bsd, - ALGORITHM_OPTIONS_CRC => Crc, - ALGORITHM_OPTIONS_CRC32B => Crc32b, - ALGORITHM_OPTIONS_MD5 => Md5, - ALGORITHM_OPTIONS_SHA1 => Sha1, - ALGORITHM_OPTIONS_SHA2 => Sha2, - ALGORITHM_OPTIONS_SHA3 => Sha3, - ALGORITHM_OPTIONS_BLAKE2B => Blake2b, - ALGORITHM_OPTIONS_SM3 => Sm3, - - // For backward compatibility - ALGORITHM_OPTIONS_SHA224 => Sha224, - ALGORITHM_OPTIONS_SHA256 => Sha256, - ALGORITHM_OPTIONS_SHA384 => Sha384, - ALGORITHM_OPTIONS_SHA512 => Sha512, - _ => return Err(ChecksumError::UnknownAlgorithm(algo.as_ref().to_string()).into()), - }) - } - - /// Parses an algo kind from a string, accepting standalone binary names. - pub fn from_bin_name(algo: impl AsRef) -> UResult { - use AlgoKind::*; - Ok(match algo.as_ref() { - "md5sum" => Md5, - "sha1sum" => Sha1, - "sha224sum" => Sha224, - "sha256sum" => Sha256, - "sha384sum" => Sha384, - "sha512sum" => Sha512, - "sha3sum" => Sha3, - "b2sum" => Blake2b, - - _ => return Err(ChecksumError::UnknownAlgorithm(algo.as_ref().to_string()).into()), - }) - } - - /// Returns a string corresponding to the algorithm kind. - pub fn to_uppercase(self) -> &'static str { - use AlgoKind::*; - match self { - // Legacy algorithms - Sysv => "SYSV", - Bsd => "BSD", - Crc => "CRC", - Crc32b => "CRC32B", - - Md5 => "MD5", - Sm3 => "SM3", - Sha1 => "SHA1", - Sha2 => "SHA2", - Sha3 => "SHA3", - Blake2b => "BLAKE2b", // Note the lowercase b in the end here. - - // For backward compatibility - Sha224 => "SHA224", - Sha256 => "SHA256", - Sha384 => "SHA384", - Sha512 => "SHA512", - - Shake128 => "SHAKE128", - Shake256 => "SHAKE256", - Blake3 => "BLAKE3", - } - } - /// Returns a string corresponding to the algorithm option in cksum `-a` - pub fn to_lowercase(self) -> &'static str { - use AlgoKind::*; - match self { - Sysv => "sysv", - Bsd => "bsd", - Crc => "crc", - Crc32b => "crc32b", - Md5 => "md5", - Sm3 => "sm3", - Sha1 => "sha1", - Sha2 => "sha2", - Sha3 => "sha3", - Blake2b => "blake2b", - - // For backward compatibility - Sha224 => "sha224", - Sha256 => "sha256", - Sha384 => "sha384", - Sha512 => "sha512", - - Shake128 => "shake128", - Shake256 => "shake256", - Blake3 => "blake3", - } - } - - pub fn is_legacy(self) -> bool { - use AlgoKind::*; - matches!(self, Sysv | Bsd | Crc | Crc32b) - } -} - -/// Holds a length for a SHA2 of SHA3 algorithm kind. -#[derive(Debug, Clone, Copy)] -pub enum ShaLength { - Len224, - Len256, - Len384, - Len512, +/// To what level should checksum validation print logging info. +#[derive(Debug, PartialEq, Eq, PartialOrd, Clone, Copy, Default)] +pub enum ChecksumVerbose { + Status, + Quiet, + #[default] + Normal, + Warning, } -impl ShaLength { - pub fn as_usize(self) -> usize { - match self { - Self::Len224 => 224, - Self::Len256 => 256, - Self::Len384 => 384, - Self::Len512 => 512, - } - } -} +impl ChecksumVerbose { + pub fn new(status: bool, quiet: bool, warn: bool) -> Self { + use ChecksumVerbose::*; -impl TryFrom for ShaLength { - type Error = ChecksumError; - - fn try_from(value: usize) -> Result { - use ShaLength::*; - match value { - 224 => Ok(Len224), - 256 => Ok(Len256), - 384 => Ok(Len384), - 512 => Ok(Len512), - _ => Err(ChecksumError::InvalidLengthForSha(value.to_string())), + // Assume only one of the three booleans will be enabled at once. + // This is ensured by clap's overriding arguments. + match (status, quiet, warn) { + (true, _, _) => Status, + (_, true, _) => Quiet, + (_, _, true) => Warning, + _ => Normal, } } -} -/// Represents an actual determined algorithm. -#[derive(Debug, Clone, Copy)] -pub enum SizedAlgoKind { - Sysv, - Bsd, - Crc, - Crc32b, - Md5, - Sm3, - Sha1, - Blake3, - Sha2(ShaLength), - Sha3(ShaLength), - // Note: we store Blake2b's length as BYTES. - Blake2b(Option), - Shake128(usize), - Shake256(usize), -} - -impl SizedAlgoKind { - pub fn from_unsized(kind: AlgoKind, byte_length: Option) -> UResult { - use AlgoKind as ak; - match (kind, byte_length) { - ( - ak::Sysv - | ak::Bsd - | ak::Crc - | ak::Crc32b - | ak::Md5 - | ak::Sm3 - | ak::Sha1 - | ak::Blake3 - | ak::Sha224 - | ak::Sha256 - | ak::Sha384 - | ak::Sha512, - Some(_), - ) => Err(ChecksumError::LengthOnlyForBlake2bSha2Sha3.into()), - - (ak::Sysv, _) => Ok(Self::Sysv), - (ak::Bsd, _) => Ok(Self::Bsd), - (ak::Crc, _) => Ok(Self::Crc), - (ak::Crc32b, _) => Ok(Self::Crc32b), - (ak::Md5, _) => Ok(Self::Md5), - (ak::Sm3, _) => Ok(Self::Sm3), - (ak::Sha1, _) => Ok(Self::Sha1), - (ak::Blake3, _) => Ok(Self::Blake3), - - (ak::Shake128, Some(l)) => Ok(Self::Shake128(l)), - (ak::Shake256, Some(l)) => Ok(Self::Shake256(l)), - (ak::Sha2, Some(l)) => Ok(Self::Sha2(ShaLength::try_from(l)?)), - (ak::Sha3, Some(l)) => Ok(Self::Sha3(ShaLength::try_from(l)?)), - (algo @ (ak::Sha2 | ak::Sha3), None) => { - Err(ChecksumError::LengthRequiredForSha(algo.to_lowercase().into()).into()) - } - // [`calculate_blake2b_length`] expects a length in bits but we - // have a length in bytes. - (ak::Blake2b, Some(l)) => Ok(Self::Blake2b(calculate_blake2b_length(8 * l)?)), - (ak::Blake2b, None) => Ok(Self::Blake2b(None)), - - (ak::Sha224, None) => Ok(Self::Sha2(ShaLength::Len224)), - (ak::Sha256, None) => Ok(Self::Sha2(ShaLength::Len256)), - (ak::Sha384, None) => Ok(Self::Sha2(ShaLength::Len384)), - (ak::Sha512, None) => Ok(Self::Sha2(ShaLength::Len512)), - (_, None) => Err(ChecksumError::LengthRequired(kind.to_uppercase().into()).into()), - } + #[inline] + pub fn over_status(self) -> bool { + self > Self::Status } - pub fn to_tag(self) -> String { - use SizedAlgoKind::*; - match self { - Md5 => "MD5".into(), - Sm3 => "SM3".into(), - Sha1 => "SHA1".into(), - Blake3 => "BLAKE3".into(), - Sha2(len) => format!("SHA{}", len.as_usize()), - Sha3(len) => format!("SHA3-{}", len.as_usize()), - Blake2b(Some(byte_len)) => format!("BLAKE2b-{}", byte_len * 8), - Blake2b(None) => "BLAKE2b".into(), - Shake128(_) => "SHAKE128".into(), - Shake256(_) => "SHAKE256".into(), - Sysv | Bsd | Crc | Crc32b => panic!("Should not be used for tagging"), - } + #[inline] + pub fn over_quiet(self) -> bool { + self > Self::Quiet } - pub fn create_digest(&self) -> Box { - use ShaLength::*; - match self { - Self::Sysv => Box::new(SysV::new()), - Self::Bsd => Box::new(Bsd::new()), - Self::Crc => Box::new(Crc::new()), - Self::Crc32b => Box::new(CRC32B::new()), - Self::Md5 => Box::new(Md5::new()), - Self::Sm3 => Box::new(Sm3::new()), - Self::Sha1 => Box::new(Sha1::new()), - Self::Blake3 => Box::new(Blake3::new()), - Self::Sha2(Len224) => Box::new(Sha224::new()), - Self::Sha2(Len256) => Box::new(Sha256::new()), - Self::Sha2(Len384) => Box::new(Sha384::new()), - Self::Sha2(Len512) => Box::new(Sha512::new()), - Self::Sha3(Len224) => Box::new(Sha3_224::new()), - Self::Sha3(Len256) => Box::new(Sha3_256::new()), - Self::Sha3(Len384) => Box::new(Sha3_384::new()), - Self::Sha3(Len512) => Box::new(Sha3_512::new()), - Self::Blake2b(Some(byte_len)) => Box::new(Blake2b::with_output_bytes(*byte_len)), - Self::Blake2b(None) => Box::new(Blake2b::new()), - Self::Shake128(_) => Box::new(Shake128::new()), - Self::Shake256(_) => Box::new(Shake256::new()), - } + #[inline] + pub fn at_least_warning(self) -> bool { + self >= Self::Warning } +} - pub fn bitlen(&self) -> usize { - use SizedAlgoKind::*; - match self { - Sysv => 512, - Bsd => 1024, - Crc => 256, - Crc32b => 32, - Md5 => 128, - Sm3 => 512, - Sha1 => 160, - Blake3 => 256, - Sha2(len) => len.as_usize(), - Sha3(len) => len.as_usize(), - Blake2b(len) => len.unwrap_or(512), - Shake128(len) => *len, - Shake256(len) => *len, - } - } - pub fn is_legacy(&self) -> bool { - use SizedAlgoKind::*; - matches!(self, Sysv | Bsd | Crc | Crc32b) - } +/// This struct regroups CLI flags. +#[derive(Debug, Default, Clone, Copy)] +pub struct ChecksumValidateOptions { + pub ignore_missing: bool, + pub strict: bool, + pub verbose: ChecksumVerbose, } /// This structure holds the count of checksum test lines' outcomes. @@ -453,103 +148,6 @@ impl From for FileCheckError { } } -#[derive(Debug, PartialEq, Eq, PartialOrd, Clone, Copy, Default)] -pub enum ChecksumVerbose { - Status, - Quiet, - #[default] - Normal, - Warning, -} - -impl ChecksumVerbose { - pub fn new(status: bool, quiet: bool, warn: bool) -> Self { - use ChecksumVerbose::*; - - // Assume only one of the three booleans will be enabled at once. - // This is ensured by clap's overriding arguments. - match (status, quiet, warn) { - (true, _, _) => Status, - (_, true, _) => Quiet, - (_, _, true) => Warning, - _ => Normal, - } - } - - #[inline] - pub fn over_status(self) -> bool { - self > Self::Status - } - - #[inline] - pub fn over_quiet(self) -> bool { - self > Self::Quiet - } - - #[inline] - pub fn at_least_warning(self) -> bool { - self >= Self::Warning - } -} - -/// This struct regroups CLI flags. -#[derive(Debug, Default, Clone, Copy)] -pub struct ChecksumOptions { - pub binary: bool, - pub ignore_missing: bool, - pub strict: bool, - pub verbose: ChecksumVerbose, -} - -#[derive(Debug, Error)] -pub enum ChecksumError { - #[error("the --raw option is not supported with multiple files")] - RawMultipleFiles, - #[error("the --ignore-missing option is meaningful only when verifying checksums")] - IgnoreNotCheck, - #[error("the --strict option is meaningful only when verifying checksums")] - StrictNotCheck, - #[error("the --quiet option is meaningful only when verifying checksums")] - QuietNotCheck, - - // --length sanitization errors - #[error("--length required for {}", .0.quote())] - LengthRequired(String), - #[error("invalid length: {}", .0.quote())] - InvalidLength(String), - #[error("maximum digest length for {} is 512 bits", .0.quote())] - LengthTooBigForBlake(String), - #[error("length is not a multiple of 8")] - LengthNotMultipleOf8, - #[error("digest length for {} must be 224, 256, 384, or 512", .0.quote())] - InvalidLengthForSha(String), - #[error("--algorithm={0} requires specifying --length 224, 256, 384, or 512")] - LengthRequiredForSha(String), - #[error("--length is only supported with --algorithm blake2b, sha2, or sha3")] - LengthOnlyForBlake2bSha2Sha3, - - #[error("the --binary and --text options are meaningless when verifying checksums")] - BinaryTextConflict, - #[error("--text mode is only supported with --untagged")] - TextWithoutUntagged, - #[error("--check is not supported with --algorithm={{bsd,sysv,crc,crc32b}}")] - AlgorithmNotSupportedWithCheck, - #[error("You cannot combine multiple hash algorithms!")] - CombineMultipleAlgorithms, - #[error("Needs an algorithm to hash with.\nUse --help for more information.")] - NeedAlgorithmToHash, - #[error("unknown algorithm: {0}: clap should have prevented this case")] - UnknownAlgorithm(String), - #[error("")] - Io(#[from] io::Error), -} - -impl UError for ChecksumError { - fn code(&self) -> i32 { - 1 - } -} - #[allow(clippy::comparison_chain)] fn print_cksum_report(res: &ChecksumResult) { if res.bad_format == 1 { @@ -899,16 +497,16 @@ fn get_expected_digest_as_hex_string( /// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-". fn get_file_to_check( filename: &OsStr, - opts: ChecksumOptions, + opts: ChecksumValidateOptions, ) -> Result, LineCheckError> { - let filename_bytes = os_str_as_bytes(filename).expect("UTF-8 error"); + let filename_bytes = os_str_as_bytes(filename).map_err(|e| LineCheckError::UError(e.into()))?; if filename == "-" { - Ok(Box::new(stdin())) // Use stdin if "-" is specified in the checksum file + Ok(Box::new(io::stdin())) // Use stdin if "-" is specified in the checksum file } else { let failed_open = || { print_file_report( - std::io::stdout(), + io::stdout(), filename_bytes, FileChecksumResult::CantOpen, "", @@ -1035,7 +633,7 @@ fn compute_and_check_digest_from_file( filename: &[u8], expected_checksum: &str, algo: SizedAlgoKind, - opts: ChecksumOptions, + opts: ChecksumValidateOptions, ) -> Result<(), LineCheckError> { let (filename_to_check_unescaped, prefix) = unescape_filename(filename); let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?; @@ -1047,7 +645,7 @@ fn compute_and_check_digest_from_file( // Read the file and calculate the checksum let mut digest = algo.create_digest(); let (calculated_checksum, _) = - digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bitlen()).unwrap(); + digest_reader(&mut digest, &mut file_reader, false, algo.bitlen()).unwrap(); // Do the checksum validation let checksum_correct = expected_checksum == calculated_checksum; @@ -1070,7 +668,7 @@ fn compute_and_check_digest_from_file( fn process_algo_based_line( line_info: &LineInfo, cli_algo_kind: Option, - opts: ChecksumOptions, + opts: ChecksumValidateOptions, last_algo: &mut Option, ) -> Result<(), LineCheckError> { let filename_to_check = line_info.filename.as_slice(); @@ -1099,7 +697,7 @@ fn process_non_algo_based_line( line_info: &LineInfo, cli_algo_kind: AlgoKind, cli_algo_length: Option, - opts: ChecksumOptions, + opts: ChecksumValidateOptions, ) -> Result<(), LineCheckError> { let mut filename_to_check = line_info.filename.as_slice(); if filename_to_check.starts_with(b"*") @@ -1145,7 +743,7 @@ fn process_checksum_line( i: usize, cli_algo_name: Option, cli_algo_length: Option, - opts: ChecksumOptions, + opts: ChecksumValidateOptions, cached_line_format: &mut Option, last_algo: &mut Option, ) -> Result<(), LineCheckError> { @@ -1178,7 +776,7 @@ fn process_checksum_file( filename_input: &OsStr, cli_algo_kind: Option, cli_algo_length: Option, - opts: ChecksumOptions, + opts: ChecksumValidateOptions, ) -> Result<(), FileCheckError> { let mut res = ChecksumResult::default(); @@ -1308,7 +906,7 @@ pub fn perform_checksum_validation<'a, I>( files: I, algo_kind: Option, length_input: Option, - opts: ChecksumOptions, + opts: ChecksumValidateOptions, ) -> UResult<()> where I: Iterator, @@ -1332,201 +930,11 @@ where } } -pub fn digest_reader( - digest: &mut Box, - reader: &mut T, - binary: bool, - output_bits: usize, -) -> io::Result<(String, usize)> { - digest.reset(); - - // Read bytes from `reader` and write those bytes to `digest`. - // - // If `binary` is `false` and the operating system is Windows, then - // `DigestWriter` replaces "\r\n" with "\n" before it writes the - // bytes into `digest`. Otherwise, it just inserts the bytes as-is. - // - // In order to support replacing "\r\n", we must call `finalize()` - // in order to support the possibility that the last character read - // from the reader was "\r". (This character gets buffered by - // `DigestWriter` and only written if the following character is - // "\n". But when "\r" is the last character read, we need to force - // it to be written.) - let mut digest_writer = DigestWriter::new(digest, binary); - let output_size = std::io::copy(reader, &mut digest_writer)? as usize; - digest_writer.finalize(); - - if digest.output_bits() > 0 { - Ok((digest.result_str(), output_size)) - } else { - // Assume it's SHAKE. result_str() doesn't work with shake (as of 8/30/2016) - let mut bytes = vec![0; output_bits.div_ceil(8)]; - digest.hash_finalize(&mut bytes); - Ok((hex::encode(bytes), output_size)) - } -} - -/// Calculates the length of the digest. -pub fn calculate_blake2b_length(bit_length: usize) -> UResult> { - calculate_blake2b_length_str(bit_length.to_string().as_str()) -} - -/// Calculates the length of the digest. -pub fn calculate_blake2b_length_str(bit_length: &str) -> UResult> { - // Blake2b's length is parsed in an u64. - match bit_length.parse::() { - Ok(0) => Ok(None), - - // Error cases - Ok(n) if n > 512 => { - show_error!("{}", ChecksumError::InvalidLength(bit_length.into())); - Err(ChecksumError::LengthTooBigForBlake("BLAKE2b".into()).into()) - } - Err(e) if *e.kind() == IntErrorKind::PosOverflow => { - show_error!("{}", ChecksumError::InvalidLength(bit_length.into())); - Err(ChecksumError::LengthTooBigForBlake("BLAKE2b".into()).into()) - } - Err(_) => Err(ChecksumError::InvalidLength(bit_length.into()).into()), - - Ok(n) if n % 8 != 0 => { - show_error!("{}", ChecksumError::InvalidLength(bit_length.into())); - Err(ChecksumError::LengthNotMultipleOf8.into()) - } - - // Valid cases - - // When length is 512, it is blake2b's default. So, don't show it - Ok(512) => Ok(None), - // Divide by 8, as our blake2b implementation expects bytes instead of bits. - Ok(n) => Ok(Some(n / 8)), - } -} - -pub fn validate_sha2_sha3_length(algo_name: AlgoKind, length: Option) -> UResult { - match length { - Some(224) => Ok(ShaLength::Len224), - Some(256) => Ok(ShaLength::Len256), - Some(384) => Ok(ShaLength::Len384), - Some(512) => Ok(ShaLength::Len512), - Some(len) => { - show_error!("{}", ChecksumError::InvalidLength(len.to_string())); - Err(ChecksumError::InvalidLengthForSha(algo_name.to_uppercase().into()).into()) - } - None => Err(ChecksumError::LengthRequiredForSha(algo_name.to_lowercase().into()).into()), - } -} - -pub fn sanitize_sha2_sha3_length_str(algo_kind: AlgoKind, length: &str) -> UResult { - // There is a difference in the errors sent when the length is not a number - // vs. its an invalid number. - // - // When inputting an invalid number, an extra error message it printed to - // remind of the accepted inputs. - let len = match length.parse::() { - Ok(l) => l, - // Note: Positive overflow while parsing counts as an invalid number, - // but a number still. - Err(e) if *e.kind() == IntErrorKind::PosOverflow => { - show_error!("{}", ChecksumError::InvalidLength(length.into())); - return Err(ChecksumError::InvalidLengthForSha(algo_kind.to_uppercase().into()).into()); - } - Err(_) => return Err(ChecksumError::InvalidLength(length.into()).into()), - }; - - if [224, 256, 384, 512].contains(&len) { - Ok(len) - } else { - show_error!("{}", ChecksumError::InvalidLength(length.into())); - Err(ChecksumError::InvalidLengthForSha(algo_kind.to_uppercase().into()).into()) - } -} - -pub fn unescape_filename(filename: &[u8]) -> (Vec, &'static str) { - let mut unescaped = Vec::with_capacity(filename.len()); - let mut byte_iter = filename.iter().peekable(); - loop { - let Some(byte) = byte_iter.next() else { - break; - }; - if *byte == b'\\' { - match byte_iter.next() { - Some(b'\\') => unescaped.push(b'\\'), - Some(b'n') => unescaped.push(b'\n'), - Some(b'r') => unescaped.push(b'\r'), - Some(x) => { - unescaped.push(b'\\'); - unescaped.push(*x); - } - _ => {} - } - } else { - unescaped.push(*byte); - } - } - let prefix = if unescaped == filename { "" } else { "\\" }; - (unescaped, prefix) -} - -pub fn escape_filename(filename: &Path) -> (String, &'static str) { - let original = filename.as_os_str().to_string_lossy(); - let escaped = original - .replace('\\', "\\\\") - .replace('\n', "\\n") - .replace('\r', "\\r"); - let prefix = if escaped == original { "" } else { "\\" }; - (escaped, prefix) -} - #[cfg(test)] mod tests { - use super::*; use std::ffi::OsString; - #[test] - fn test_unescape_filename() { - let (unescaped, prefix) = unescape_filename(b"test\\nfile.txt"); - assert_eq!(unescaped, b"test\nfile.txt"); - assert_eq!(prefix, "\\"); - let (unescaped, prefix) = unescape_filename(b"test\\nfile.txt"); - assert_eq!(unescaped, b"test\nfile.txt"); - assert_eq!(prefix, "\\"); - - let (unescaped, prefix) = unescape_filename(b"test\\rfile.txt"); - assert_eq!(unescaped, b"test\rfile.txt"); - assert_eq!(prefix, "\\"); - - let (unescaped, prefix) = unescape_filename(b"test\\\\file.txt"); - assert_eq!(unescaped, b"test\\file.txt"); - assert_eq!(prefix, "\\"); - } - - #[test] - fn test_escape_filename() { - let (escaped, prefix) = escape_filename(Path::new("testfile.txt")); - assert_eq!(escaped, "testfile.txt"); - assert_eq!(prefix, ""); - - let (escaped, prefix) = escape_filename(Path::new("test\nfile.txt")); - assert_eq!(escaped, "test\\nfile.txt"); - assert_eq!(prefix, "\\"); - - let (escaped, prefix) = escape_filename(Path::new("test\rfile.txt")); - assert_eq!(escaped, "test\\rfile.txt"); - assert_eq!(prefix, "\\"); - - let (escaped, prefix) = escape_filename(Path::new("test\\file.txt")); - assert_eq!(escaped, "test\\\\file.txt"); - assert_eq!(prefix, "\\"); - } - - #[test] - fn test_calculate_blake2b_length() { - assert_eq!(calculate_blake2b_length(0).unwrap(), None); - assert!(calculate_blake2b_length(10).is_err()); - assert!(calculate_blake2b_length(520).is_err()); - assert_eq!(calculate_blake2b_length(512).unwrap(), None); - assert_eq!(calculate_blake2b_length(256).unwrap(), Some(32)); - } + use super::*; #[test] fn test_algo_based_parser() { @@ -1769,7 +1177,7 @@ mod tests { #[test] fn test_print_file_report() { - let opts = ChecksumOptions::default(); + let opts = ChecksumValidateOptions::default(); let cases: &[(&[u8], FileChecksumResult, &str, &[u8])] = &[ (b"filename", FileChecksumResult::Ok, "", b"filename: OK\n"), From 3849d91d69f00652ec2e6cf51d29f931ebb34d72 Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Thu, 6 Nov 2025 16:42:04 +0100 Subject: [PATCH 08/10] checksum: Move cksum computation to uucore::checksum --- src/uu/cksum/src/cksum.rs | 262 +----------------- .../src/lib/features/checksum/compute.rs | 246 ++++++++++++++++ src/uucore/src/lib/features/checksum/mod.rs | 1 + 3 files changed, 259 insertions(+), 250 deletions(-) create mode 100644 src/uucore/src/lib/features/checksum/compute.rs diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index a0a5c00df32..0a246b8e9e3 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -8,257 +8,20 @@ use clap::builder::ValueParser; use clap::{Arg, ArgAction, Command}; use std::ffi::{OsStr, OsString}; -use std::fs::File; -use std::io::{BufReader, Read, Write, stdin, stdout}; use std::iter; -use std::path::Path; -use uucore::checksum::validate::{ChecksumOptions, ChecksumVerbose, perform_checksum_validation}; +use uucore::checksum::compute::{ + ChecksumComputeOptions, DigestFormat, OutputFormat, ReadingMode, perform_checksum_computation, +}; +use uucore::checksum::validate::{ + ChecksumValidateOptions, ChecksumVerbose, perform_checksum_validation, +}; use uucore::checksum::{ AlgoKind, ChecksumError, SUPPORTED_ALGORITHMS, SizedAlgoKind, calculate_blake2b_length_str, - digest_reader, sanitize_sha2_sha3_length_str, + sanitize_sha2_sha3_length_str, }; -use uucore::translate; - -use uucore::{ - encoding, - error::{FromIo, UResult, USimpleError}, - format_usage, - line_ending::LineEnding, - os_str_as_bytes, show, - sum::Digest, -}; - -struct Options { - algo_kind: SizedAlgoKind, - digest: Box, - output_format: OutputFormat, - line_ending: LineEnding, -} - -/// Reading mode used to compute digest. -/// -/// On most linux systems, this is irrelevant, as there is no distinction -/// between text and binary files. Refer to GNU's cksum documentation for more -/// information. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum ReadingMode { - Binary, - Text, -} - -impl ReadingMode { - #[inline] - fn as_char(&self) -> char { - match self { - Self::Binary => '*', - Self::Text => ' ', - } - } -} - -/// Whether to write the digest as hexadecimal or encoded in base64. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum DigestFormat { - Hexadecimal, - Base64, -} - -impl DigestFormat { - #[inline] - fn is_base64(&self) -> bool { - *self == Self::Base64 - } -} - -/// Holds the representation that shall be used for printing a checksum line -#[derive(Debug, PartialEq, Eq)] -enum OutputFormat { - /// Raw digest - Raw, - - /// Selected for older algorithms which had their custom formatting - /// - /// Default for crc, sysv, bsd - Legacy, - - /// `$ALGO_NAME ($FILENAME) = $DIGEST` - Tagged(DigestFormat), - - /// '$DIGEST $FLAG$FILENAME' - /// where 'flag' depends on the reading mode - /// - /// Default for standalone checksum utilities - Untagged(DigestFormat, ReadingMode), -} - -impl OutputFormat { - #[inline] - fn is_raw(&self) -> bool { - *self == Self::Raw - } -} - -fn print_legacy_checksum( - options: &Options, - filename: &OsStr, - sum: &str, - size: usize, -) -> UResult<()> { - debug_assert!(options.algo_kind.is_legacy()); - - // Print the sum - match options.algo_kind { - SizedAlgoKind::Sysv => print!( - "{} {}", - sum.parse::().unwrap(), - size.div_ceil(options.algo_kind.bitlen()), - ), - SizedAlgoKind::Bsd => { - // The BSD checksum output is 5 digit integer - let bsd_width = 5; - print!( - "{:0bsd_width$} {:bsd_width$}", - sum.parse::().unwrap(), - size.div_ceil(options.algo_kind.bitlen()), - ); - } - SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { - print!("{sum} {size}"); - } - _ => unreachable!("Not a legacy algorithm"), - } - - // Print the filename after a space if not stdin - if filename != "-" { - print!(" "); - let _dropped_result = stdout().write_all(os_str_as_bytes(filename)?); - } - - Ok(()) -} - -fn print_tagged_checksum(options: &Options, filename: &OsStr, sum: &String) -> UResult<()> { - // Print algo name and opening parenthesis. - print!("{} (", options.algo_kind.to_tag()); - - // Print filename - let _dropped_result = stdout().write_all(os_str_as_bytes(filename)?); - - // Print closing parenthesis and sum - print!(") = {sum}"); - - Ok(()) -} - -fn print_untagged_checksum( - filename: &OsStr, - sum: &String, - reading_mode: ReadingMode, -) -> UResult<()> { - // Print checksum and reading mode flag - print!("{sum} {}", reading_mode.as_char()); - - // Print filename - let _dropped_result = stdout().write_all(os_str_as_bytes(filename)?); - - Ok(()) -} - -/// Calculate checksum -/// -/// # Arguments -/// -/// * `options` - CLI options for the assigning checksum algorithm -/// * `files` - A iterator of [`OsStr`] which is a bunch of files that are using for calculating checksum -fn cksum<'a, I>(mut options: Options, files: I) -> UResult<()> -where - I: Iterator, -{ - let mut files = files.peekable(); - - while let Some(filename) = files.next() { - // Check that in raw mode, we are not provided with several files. - if options.output_format.is_raw() && files.peek().is_some() { - return Err(Box::new(ChecksumError::RawMultipleFiles)); - } - - let filepath = Path::new(filename); - let stdin_buf; - let file_buf; - if filepath.is_dir() { - show!(USimpleError::new( - 1, - translate!("cksum-error-is-directory", "file" => filepath.display()) - )); - continue; - } - - // Handle the file input - let mut file = BufReader::new(if filename == "-" { - stdin_buf = stdin(); - Box::new(stdin_buf) as Box - } else { - file_buf = match File::open(filepath) { - Ok(file) => file, - Err(err) => { - show!(err.map_err_context(|| filepath.to_string_lossy().to_string())); - continue; - } - }; - Box::new(file_buf) as Box - }); - - let (sum_hex, sz) = digest_reader( - &mut options.digest, - &mut file, - false, - options.algo_kind.bitlen(), - ) - .map_err_context(|| translate!("cksum-error-failed-to-read-input"))?; - - // Encodes the sum if df is Base64, leaves as-is otherwise. - let encode_sum = |sum: String, df: DigestFormat| { - if df.is_base64() { - encoding::for_cksum::BASE64.encode(&hex::decode(sum).unwrap()) - } else { - sum - } - }; - - match options.output_format { - OutputFormat::Raw => { - let bytes = match options.algo_kind { - SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { - sum_hex.parse::().unwrap().to_be_bytes().to_vec() - } - SizedAlgoKind::Sysv | SizedAlgoKind::Bsd => { - sum_hex.parse::().unwrap().to_be_bytes().to_vec() - } - _ => hex::decode(sum_hex).unwrap(), - }; - // Cannot handle multiple files anyway, output immediately. - stdout().write_all(&bytes)?; - return Ok(()); - } - OutputFormat::Legacy => { - print_legacy_checksum(&options, filename, &sum_hex, sz)?; - } - OutputFormat::Tagged(digest_format) => { - print_tagged_checksum(&options, filename, &encode_sum(sum_hex, digest_format))?; - } - OutputFormat::Untagged(digest_format, reading_mode) => { - print_untagged_checksum( - filename, - &encode_sum(sum_hex, digest_format), - reading_mode, - )?; - } - } - - print!("{}", options.line_ending); - } - Ok(()) -} +use uucore::error::UResult; +use uucore::line_ending::LineEnding; +use uucore::{format_usage, translate}; mod options { pub const ALGORITHM: &str = "algorithm"; @@ -455,14 +218,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { matches.get_flag(options::BASE64), ); - let opts = Options { + let opts = ChecksumComputeOptions { algo_kind: algo, - digest: algo.create_digest(), output_format, line_ending, }; - cksum(opts, files)?; + perform_checksum_computation(opts, files)?; Ok(()) } diff --git a/src/uucore/src/lib/features/checksum/compute.rs b/src/uucore/src/lib/features/checksum/compute.rs new file mode 100644 index 00000000000..015e9bb0f53 --- /dev/null +++ b/src/uucore/src/lib/features/checksum/compute.rs @@ -0,0 +1,246 @@ +use std::ffi::OsStr; +use std::fs::File; +use std::io::{self, BufReader, Read, Write}; +use std::path::Path; + +use crate::checksum::{ChecksumError, SizedAlgoKind, digest_reader}; +use crate::error::{FromIo, UResult, USimpleError}; +use crate::line_ending::LineEnding; +use crate::{encoding, os_str_as_bytes, show, translate}; + +pub struct ChecksumComputeOptions { + pub algo_kind: SizedAlgoKind, + pub output_format: OutputFormat, + pub line_ending: LineEnding, +} + +/// Reading mode used to compute digest. +/// +/// On most linux systems, this is irrelevant, as there is no distinction +/// between text and binary files. Refer to GNU's cksum documentation for more +/// information. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ReadingMode { + Binary, + Text, +} + +impl ReadingMode { + #[inline] + fn as_char(&self) -> char { + match self { + Self::Binary => '*', + Self::Text => ' ', + } + } +} + +/// Whether to write the digest as hexadecimal or encoded in base64. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DigestFormat { + Hexadecimal, + Base64, +} + +impl DigestFormat { + #[inline] + fn is_base64(&self) -> bool { + *self == Self::Base64 + } +} + +/// Holds the representation that shall be used for printing a checksum line +#[derive(Debug, PartialEq, Eq)] +pub enum OutputFormat { + /// Raw digest + Raw, + + /// Selected for older algorithms which had their custom formatting + /// + /// Default for crc, sysv, bsd + Legacy, + + /// `$ALGO_NAME ($FILENAME) = $DIGEST` + Tagged(DigestFormat), + + /// '$DIGEST $FLAG$FILENAME' + /// where 'flag' depends on the reading mode + /// + /// Default for standalone checksum utilities + Untagged(DigestFormat, ReadingMode), +} + +impl OutputFormat { + #[inline] + fn is_raw(&self) -> bool { + *self == Self::Raw + } +} + +fn print_legacy_checksum( + options: &ChecksumComputeOptions, + filename: &OsStr, + sum: &str, + size: usize, +) -> UResult<()> { + debug_assert!(options.algo_kind.is_legacy()); + + // Print the sum + match options.algo_kind { + SizedAlgoKind::Sysv => print!( + "{} {}", + sum.parse::().unwrap(), + size.div_ceil(options.algo_kind.bitlen()), + ), + SizedAlgoKind::Bsd => { + // The BSD checksum output is 5 digit integer + let bsd_width = 5; + print!( + "{:0bsd_width$} {:bsd_width$}", + sum.parse::().unwrap(), + size.div_ceil(options.algo_kind.bitlen()), + ); + } + SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { + print!("{sum} {size}"); + } + _ => unreachable!("Not a legacy algorithm"), + } + + // Print the filename after a space if not stdin + if filename != "-" { + print!(" "); + let _dropped_result = io::stdout().write_all(os_str_as_bytes(filename)?); + } + + Ok(()) +} + +fn print_tagged_checksum( + options: &ChecksumComputeOptions, + filename: &OsStr, + sum: &String, +) -> UResult<()> { + // Print algo name and opening parenthesis. + print!("{} (", options.algo_kind.to_tag()); + + // Print filename + let _dropped_result = io::stdout().write_all(os_str_as_bytes(filename)?); + + // Print closing parenthesis and sum + print!(") = {sum}"); + + Ok(()) +} + +fn print_untagged_checksum( + filename: &OsStr, + sum: &String, + reading_mode: ReadingMode, +) -> UResult<()> { + // Print checksum and reading mode flag + print!("{sum} {}", reading_mode.as_char()); + + // Print filename + let _dropped_result = io::stdout().write_all(os_str_as_bytes(filename)?); + + Ok(()) +} + +/// Calculate checksum +/// +/// # Arguments +/// +/// * `options` - CLI options for the assigning checksum algorithm +/// * `files` - A iterator of [`OsStr`] which is a bunch of files that are using for calculating checksum +pub fn perform_checksum_computation<'a, I>(options: ChecksumComputeOptions, files: I) -> UResult<()> +where + I: Iterator, +{ + let mut files = files.peekable(); + + while let Some(filename) = files.next() { + // Check that in raw mode, we are not provided with several files. + if options.output_format.is_raw() && files.peek().is_some() { + return Err(Box::new(ChecksumError::RawMultipleFiles)); + } + + let filepath = Path::new(filename); + let stdin_buf; + let file_buf; + if filepath.is_dir() { + show!(USimpleError::new( + 1, + translate!("cksum-error-is-directory", "file" => filepath.display()) + )); + continue; + } + + // Handle the file input + let mut file = BufReader::new(if filename == "-" { + stdin_buf = io::stdin(); + Box::new(stdin_buf) as Box + } else { + file_buf = match File::open(filepath) { + Ok(file) => file, + Err(err) => { + show!(err.map_err_context(|| filepath.to_string_lossy().to_string())); + continue; + } + }; + Box::new(file_buf) as Box + }); + + let mut digest = options.algo_kind.create_digest(); + + let (sum_hex, sz) = digest_reader( + &mut digest, + &mut file, + false, + options.algo_kind.bitlen(), + ) + .map_err_context(|| translate!("cksum-error-failed-to-read-input"))?; + + // Encodes the sum if df is Base64, leaves as-is otherwise. + let encode_sum = |sum: String, df: DigestFormat| { + if df.is_base64() { + encoding::for_cksum::BASE64.encode(&hex::decode(sum).unwrap()) + } else { + sum + } + }; + + match options.output_format { + OutputFormat::Raw => { + let bytes = match options.algo_kind { + SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { + sum_hex.parse::().unwrap().to_be_bytes().to_vec() + } + SizedAlgoKind::Sysv | SizedAlgoKind::Bsd => { + sum_hex.parse::().unwrap().to_be_bytes().to_vec() + } + _ => hex::decode(sum_hex).unwrap(), + }; + // Cannot handle multiple files anyway, output immediately. + io::stdout().write_all(&bytes)?; + return Ok(()); + } + OutputFormat::Legacy => { + print_legacy_checksum(&options, filename, &sum_hex, sz)?; + } + OutputFormat::Tagged(digest_format) => { + print_tagged_checksum(&options, filename, &encode_sum(sum_hex, digest_format))?; + } + OutputFormat::Untagged(digest_format, reading_mode) => { + print_untagged_checksum( + filename, + &encode_sum(sum_hex, digest_format), + reading_mode, + )?; + } + } + + print!("{}", options.line_ending); + } + Ok(()) +} diff --git a/src/uucore/src/lib/features/checksum/mod.rs b/src/uucore/src/lib/features/checksum/mod.rs index 29af7a49131..a3b7e53c92f 100644 --- a/src/uucore/src/lib/features/checksum/mod.rs +++ b/src/uucore/src/lib/features/checksum/mod.rs @@ -21,6 +21,7 @@ use crate::{ }; use thiserror::Error; +pub mod compute; pub mod validate; pub const ALGORITHM_OPTIONS_SYSV: &str = "sysv"; From c97ce1bb8e08cbe891027ae72343d6310bdd8f05 Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Thu, 6 Nov 2025 18:15:34 +0100 Subject: [PATCH 09/10] checksum: Adapt checksum computation to hashsum --- src/uu/cksum/src/cksum.rs | 40 +---- src/uu/hashsum/Cargo.toml | 2 +- src/uu/hashsum/src/hashsum.rs | 154 ++++-------------- .../src/lib/features/checksum/compute.rs | 133 ++++++++++++--- src/uucore/src/lib/features/checksum/mod.rs | 38 ++--- 5 files changed, 164 insertions(+), 203 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 0a246b8e9e3..7ff0243ebdb 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -10,7 +10,7 @@ use clap::{Arg, ArgAction, Command}; use std::ffi::{OsStr, OsString}; use std::iter; use uucore::checksum::compute::{ - ChecksumComputeOptions, DigestFormat, OutputFormat, ReadingMode, perform_checksum_computation, + ChecksumComputeOptions, figure_out_output_format, perform_checksum_computation, }; use uucore::checksum::validate::{ ChecksumValidateOptions, ChecksumVerbose, perform_checksum_validation, @@ -84,43 +84,6 @@ fn handle_tag_text_binary_flags>( Ok((tag, binary)) } -/// Use already-processed arguments to decide the output format. -fn figure_out_output_format( - algo: SizedAlgoKind, - tag: bool, - binary: bool, - raw: bool, - base64: bool, -) -> OutputFormat { - // Raw output format takes precedence over anything else. - if raw { - return OutputFormat::Raw; - } - - // Then, if the algo is legacy, takes precedence over the rest - if algo.is_legacy() { - return OutputFormat::Legacy; - } - - let digest_format = if base64 { - DigestFormat::Base64 - } else { - DigestFormat::Hexadecimal - }; - - // After that, decide between tagged and untagged output - if tag { - OutputFormat::Tagged(digest_format) - } else { - let reading_mode = if binary { - ReadingMode::Binary - } else { - ReadingMode::Text - }; - OutputFormat::Untagged(digest_format, reading_mode) - } -} - /// Sanitize the `--length` argument depending on `--algorithm` and `--length`. fn maybe_sanitize_length( algo_cli: Option, @@ -222,6 +185,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { algo_kind: algo, output_format, line_ending, + no_names: false, }; perform_checksum_computation(opts, files)?; diff --git a/src/uu/hashsum/Cargo.toml b/src/uu/hashsum/Cargo.toml index 00eb152edb4..ec382870bdc 100644 --- a/src/uu/hashsum/Cargo.toml +++ b/src/uu/hashsum/Cargo.toml @@ -19,7 +19,7 @@ path = "src/hashsum.rs" [dependencies] clap = { workspace = true } -uucore = { workspace = true, features = ["checksum", "sum"] } +uucore = { workspace = true, features = ["checksum", "encoding", "sum"] } fluent = { workspace = true } [[bin]] diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 61bd0f0ff6c..ebe3ac03360 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -5,47 +5,26 @@ // spell-checker:ignore (ToDO) algo, algoname, bitlen, regexes, nread, nonames -use clap::ArgAction; -use clap::builder::ValueParser; -use clap::value_parser; -use clap::{Arg, ArgMatches, Command}; use std::ffi::{OsStr, OsString}; -use std::fs::File; -use std::io::{BufReader, Read, stdin}; use std::iter; use std::num::ParseIntError; use std::path::Path; +use clap::builder::ValueParser; +use clap::{Arg, ArgAction, ArgMatches, Command, value_parser}; + +use uucore::checksum::compute::{ + ChecksumComputeOptions, figure_out_output_format, perform_checksum_computation, +}; use uucore::checksum::validate::{ ChecksumValidateOptions, ChecksumVerbose, perform_checksum_validation, }; -use uucore::checksum::{ - AlgoKind, ChecksumError, SizedAlgoKind, calculate_blake2b_length, digest_reader, - escape_filename, -}; -use uucore::error::{UResult, strip_errno}; -use uucore::sum::Digest; +use uucore::checksum::{AlgoKind, ChecksumError, SizedAlgoKind, calculate_blake2b_length}; +use uucore::error::UResult; +use uucore::line_ending::LineEnding; use uucore::{format_usage, translate}; const NAME: &str = "hashsum"; -// Using the same read buffer size as GNU -const READ_BUFFER_SIZE: usize = 32 * 1024; - -struct Options<'a> { - algo: SizedAlgoKind, - digest: Box, - binary: bool, - binary_name: &'a str, - //check: bool, - tag: bool, - nonames: bool, - //status: bool, - //quiet: bool, - //strict: bool, - //warn: bool, - zero: bool, - //ignore_missing: bool, -} /// Creates a hasher instance based on the command-line flags. /// @@ -186,9 +165,9 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { }; let check = matches.get_flag("check"); let status = matches.get_flag("status"); - let quiet = matches.get_flag("quiet") || status; + let quiet = matches.get_flag("quiet"); let strict = matches.get_flag("strict"); - let warn = matches.get_flag("warn") && !status; + let warn = matches.get_flag("warn"); let ignore_missing = matches.get_flag("ignore-missing"); if ignore_missing && !check { @@ -232,33 +211,36 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { return Err(ChecksumError::StrictNotCheck.into()); } - let nonames = *matches + let no_names = *matches .try_get_one("no-names") .unwrap_or(None) .unwrap_or(&false); - let zero = matches.get_flag("zero"); + let line_ending = LineEnding::from_zero_flag(matches.get_flag("zero")); let algo = SizedAlgoKind::from_unsized(algo_kind, length)?; - let opts = Options { - algo, - digest: algo.create_digest(), - binary, - binary_name: &binary_name, - tag: matches.get_flag("tag"), - nonames, - //status, - //quiet, - //warn, - zero, - //ignore_missing, + let opts = ChecksumComputeOptions { + algo_kind: algo, + output_format: figure_out_output_format( + algo, + matches.get_flag(options::TAG), + binary, + /* raw */ false, + /* base64: */ false, + ), + line_ending, + no_names, }; + let files = matches.get_many::(options::FILE).map_or_else( + // No files given, read from stdin. + || Box::new(iter::once(OsStr::new("-"))) as Box>, + // At least one file given, read from them. + |files| Box::new(files.map(OsStr::new)) as Box>, + ); + // Show the hashsum of the input - match matches.get_many::(options::FILE) { - Some(files) => hashsum(opts, files.map(|f| f.as_os_str())), - None => hashsum(opts, iter::once(OsStr::new("-"))), - } + perform_checksum_computation(opts, files) } mod options { @@ -489,75 +471,3 @@ fn uu_app(binary_name: &str) -> (Command, bool) { (command, is_hashsum_bin) } - -#[allow(clippy::cognitive_complexity)] -fn hashsum<'a, I>(mut options: Options, files: I) -> UResult<()> -where - I: Iterator, -{ - let binary_marker = if options.binary { "*" } else { " " }; - let mut err_found = None; - for filename in files { - let filename = Path::new(filename); - - let mut file = BufReader::with_capacity( - READ_BUFFER_SIZE, - if filename == OsStr::new("-") { - Box::new(stdin()) as Box - } else { - let file_buf = match File::open(filename) { - Ok(f) => f, - Err(e) => { - eprintln!( - "{}: {}: {}", - options.binary_name, - filename.to_string_lossy(), - strip_errno(&e) - ); - err_found = Some(ChecksumError::Io(e)); - continue; - } - }; - Box::new(file_buf) as Box - }, - ); - - let sum = match digest_reader( - &mut options.digest, - &mut file, - options.binary, - options.algo.bitlen(), - ) { - Ok((sum, _)) => sum, - Err(e) => { - eprintln!( - "{}: {}: {}", - options.binary_name, - filename.to_string_lossy(), - strip_errno(&e) - ); - err_found = Some(ChecksumError::Io(e)); - continue; - } - }; - - let (escaped_filename, prefix) = escape_filename(filename); - if options.tag { - println!( - "{prefix}{} ({escaped_filename}) = {sum}", - options.algo.to_tag() - ); - } else if options.nonames { - println!("{sum}"); - } else if options.zero { - // with zero, we don't escape the filename - print!("{sum} {binary_marker}{}\0", filename.display()); - } else { - println!("{prefix}{sum} {binary_marker}{escaped_filename}"); - } - } - match err_found { - None => Ok(()), - Some(e) => Err(Box::new(e)), - } -} diff --git a/src/uucore/src/lib/features/checksum/compute.rs b/src/uucore/src/lib/features/checksum/compute.rs index 015e9bb0f53..426b2633e34 100644 --- a/src/uucore/src/lib/features/checksum/compute.rs +++ b/src/uucore/src/lib/features/checksum/compute.rs @@ -1,17 +1,36 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore bitlen + use std::ffi::OsStr; use std::fs::File; use std::io::{self, BufReader, Read, Write}; use std::path::Path; -use crate::checksum::{ChecksumError, SizedAlgoKind, digest_reader}; +use crate::checksum::{ChecksumError, SizedAlgoKind, digest_reader, escape_filename}; use crate::error::{FromIo, UResult, USimpleError}; use crate::line_ending::LineEnding; -use crate::{encoding, os_str_as_bytes, show, translate}; +use crate::{encoding, show, translate}; + +/// Use the same buffer size as GNU when reading a file to create a checksum +/// from it: 32 KiB. +const READ_BUFFER_SIZE: usize = 32 * 1024; pub struct ChecksumComputeOptions { + /// Which algorithm to use to compute the digest. pub algo_kind: SizedAlgoKind, + + /// Printing format to use for each checksum. pub output_format: OutputFormat, + + /// Whether to finish lines with '\n' or '\0'. pub line_ending: LineEnding, + + /// (non-GNU option) Do not print file names + pub no_names: bool, } /// Reading mode used to compute digest. @@ -77,6 +96,43 @@ impl OutputFormat { } } +/// Use already-processed arguments to decide the output format. +pub fn figure_out_output_format( + algo: SizedAlgoKind, + tag: bool, + binary: bool, + raw: bool, + base64: bool, +) -> OutputFormat { + // Raw output format takes precedence over anything else. + if raw { + return OutputFormat::Raw; + } + + // Then, if the algo is legacy, takes precedence over the rest + if algo.is_legacy() { + return OutputFormat::Legacy; + } + + let digest_format = if base64 { + DigestFormat::Base64 + } else { + DigestFormat::Hexadecimal + }; + + // After that, decide between tagged and untagged output + if tag { + OutputFormat::Tagged(digest_format) + } else { + let reading_mode = if binary { + ReadingMode::Binary + } else { + ReadingMode::Text + }; + OutputFormat::Untagged(digest_format, reading_mode) + } +} + fn print_legacy_checksum( options: &ChecksumComputeOptions, filename: &OsStr, @@ -85,6 +141,14 @@ fn print_legacy_checksum( ) -> UResult<()> { debug_assert!(options.algo_kind.is_legacy()); + let (escaped_filename, prefix) = if options.line_ending == LineEnding::Nul { + (filename.to_string_lossy().to_string(), "") + } else { + escape_filename(filename) + }; + + print!("{prefix}"); + // Print the sum match options.algo_kind { SizedAlgoKind::Sysv => print!( @@ -108,9 +172,9 @@ fn print_legacy_checksum( } // Print the filename after a space if not stdin - if filename != "-" { + if escaped_filename != "-" { print!(" "); - let _dropped_result = io::stdout().write_all(os_str_as_bytes(filename)?); + let _dropped_result = io::stdout().write_all(escaped_filename.as_bytes()); } Ok(()) @@ -121,11 +185,17 @@ fn print_tagged_checksum( filename: &OsStr, sum: &String, ) -> UResult<()> { + let (escaped_filename, prefix) = if options.line_ending == LineEnding::Nul { + (filename.to_string_lossy().to_string(), "") + } else { + escape_filename(filename) + }; + // Print algo name and opening parenthesis. - print!("{} (", options.algo_kind.to_tag()); + print!("{prefix}{} (", options.algo_kind.to_tag()); // Print filename - let _dropped_result = io::stdout().write_all(os_str_as_bytes(filename)?); + let _dropped_result = io::stdout().write_all(escaped_filename.as_bytes()); // Print closing parenthesis and sum print!(") = {sum}"); @@ -134,15 +204,28 @@ fn print_tagged_checksum( } fn print_untagged_checksum( + options: &ChecksumComputeOptions, filename: &OsStr, sum: &String, reading_mode: ReadingMode, ) -> UResult<()> { + // early check for the "no-names" option + if options.no_names { + print!("{sum}"); + return Ok(()); + } + + let (escaped_filename, prefix) = if options.line_ending == LineEnding::Nul { + (filename.to_string_lossy().to_string(), "") + } else { + escape_filename(filename) + }; + // Print checksum and reading mode flag - print!("{sum} {}", reading_mode.as_char()); + print!("{prefix}{sum} {}", reading_mode.as_char()); // Print filename - let _dropped_result = io::stdout().write_all(os_str_as_bytes(filename)?); + let _dropped_result = io::stdout().write_all(escaped_filename.as_bytes()); Ok(()) } @@ -171,25 +254,30 @@ where if filepath.is_dir() { show!(USimpleError::new( 1, - translate!("cksum-error-is-directory", "file" => filepath.display()) + // TODO: Rework translation, which is broken since this code moved to uucore + // translate!("cksum-error-is-directory", "file" => filepath.display()) + format!("{}: Is a directory", filepath.display()) )); continue; } // Handle the file input - let mut file = BufReader::new(if filename == "-" { - stdin_buf = io::stdin(); - Box::new(stdin_buf) as Box - } else { - file_buf = match File::open(filepath) { - Ok(file) => file, - Err(err) => { - show!(err.map_err_context(|| filepath.to_string_lossy().to_string())); - continue; - } - }; - Box::new(file_buf) as Box - }); + let mut file = BufReader::with_capacity( + READ_BUFFER_SIZE, + if filename == "-" { + stdin_buf = io::stdin(); + Box::new(stdin_buf) as Box + } else { + file_buf = match File::open(filepath) { + Ok(file) => file, + Err(err) => { + show!(err.map_err_context(|| filepath.to_string_lossy().into())); + continue; + } + }; + Box::new(file_buf) as Box + }, + ); let mut digest = options.algo_kind.create_digest(); @@ -233,6 +321,7 @@ where } OutputFormat::Untagged(digest_format, reading_mode) => { print_untagged_checksum( + &options, filename, &encode_sum(sum_hex, digest_format), reading_mode, diff --git a/src/uucore/src/lib/features/checksum/mod.rs b/src/uucore/src/lib/features/checksum/mod.rs index a3b7e53c92f..87c8836fd7f 100644 --- a/src/uucore/src/lib/features/checksum/mod.rs +++ b/src/uucore/src/lib/features/checksum/mod.rs @@ -2,24 +2,22 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore anotherfile invalidchecksum JWZG FFFD xffname prefixfilename bytelen bitlen hexdigit rsplit + +// spell-checker:ignore bitlen + +use std::ffi::OsStr; +use std::io::{self, Read}; +use std::num::IntErrorKind; use os_display::Quotable; -use std::{ - io::{self, Read}, - num::IntErrorKind, - path::Path, -}; +use thiserror::Error; -use crate::{ - error::{UError, UResult}, - show_error, - sum::{ - Blake2b, Blake3, Bsd, CRC32B, Crc, Digest, DigestWriter, Md5, Sha1, Sha3_224, Sha3_256, - Sha3_384, Sha3_512, Sha224, Sha256, Sha384, Sha512, Shake128, Shake256, Sm3, SysV, - }, +use crate::error::{UError, UResult}; +use crate::show_error; +use crate::sum::{ + Blake2b, Blake3, Bsd, CRC32B, Crc, Digest, DigestWriter, Md5, Sha1, Sha3_224, Sha3_256, + Sha3_384, Sha3_512, Sha224, Sha256, Sha384, Sha512, Shake128, Shake256, Sm3, SysV, }; -use thiserror::Error; pub mod compute; pub mod validate; @@ -553,8 +551,8 @@ pub fn unescape_filename(filename: &[u8]) -> (Vec, &'static str) { (unescaped, prefix) } -pub fn escape_filename(filename: &Path) -> (String, &'static str) { - let original = filename.as_os_str().to_string_lossy(); +pub fn escape_filename(filename: &OsStr) -> (String, &'static str) { + let original = filename.to_string_lossy(); let escaped = original .replace('\\', "\\\\") .replace('\n', "\\n") @@ -587,19 +585,19 @@ mod tests { #[test] fn test_escape_filename() { - let (escaped, prefix) = escape_filename(Path::new("testfile.txt")); + let (escaped, prefix) = escape_filename(OsStr::new("testfile.txt")); assert_eq!(escaped, "testfile.txt"); assert_eq!(prefix, ""); - let (escaped, prefix) = escape_filename(Path::new("test\nfile.txt")); + let (escaped, prefix) = escape_filename(OsStr::new("test\nfile.txt")); assert_eq!(escaped, "test\\nfile.txt"); assert_eq!(prefix, "\\"); - let (escaped, prefix) = escape_filename(Path::new("test\rfile.txt")); + let (escaped, prefix) = escape_filename(OsStr::new("test\rfile.txt")); assert_eq!(escaped, "test\\rfile.txt"); assert_eq!(prefix, "\\"); - let (escaped, prefix) = escape_filename(Path::new("test\\file.txt")); + let (escaped, prefix) = escape_filename(OsStr::new("test\\file.txt")); assert_eq!(escaped, "test\\\\file.txt"); assert_eq!(prefix, "\\"); } From 6d6a9917ad08d7ceaf3eead765790fa0860d3635 Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Thu, 6 Nov 2025 20:18:39 +0100 Subject: [PATCH 10/10] checksum: fix binary flag on windows, ignore test --- src/uu/cksum/src/cksum.rs | 1 + src/uu/hashsum/src/hashsum.rs | 1 + src/uucore/src/lib/features/checksum/compute.rs | 5 ++++- src/uucore/src/lib/features/checksum/validate.rs | 12 ++++++++++-- tests/by-util/test_hashsum.rs | 13 ++++--------- 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 7ff0243ebdb..499fc52c00d 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -185,6 +185,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { algo_kind: algo, output_format, line_ending, + binary: false, no_names: false, }; diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index ebe3ac03360..d6258210f17 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -229,6 +229,7 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { /* base64: */ false, ), line_ending, + binary, no_names, }; diff --git a/src/uucore/src/lib/features/checksum/compute.rs b/src/uucore/src/lib/features/checksum/compute.rs index 426b2633e34..e91c54166d7 100644 --- a/src/uucore/src/lib/features/checksum/compute.rs +++ b/src/uucore/src/lib/features/checksum/compute.rs @@ -29,6 +29,9 @@ pub struct ChecksumComputeOptions { /// Whether to finish lines with '\n' or '\0'. pub line_ending: LineEnding, + /// On windows, open files as binary instead of text + pub binary: bool, + /// (non-GNU option) Do not print file names pub no_names: bool, } @@ -284,7 +287,7 @@ where let (sum_hex, sz) = digest_reader( &mut digest, &mut file, - false, + options.binary, options.algo_kind.bitlen(), ) .map_err_context(|| translate!("cksum-error-failed-to-read-input"))?; diff --git a/src/uucore/src/lib/features/checksum/validate.rs b/src/uucore/src/lib/features/checksum/validate.rs index a1d851a05b6..b68032925ec 100644 --- a/src/uucore/src/lib/features/checksum/validate.rs +++ b/src/uucore/src/lib/features/checksum/validate.rs @@ -644,8 +644,16 @@ fn compute_and_check_digest_from_file( // Read the file and calculate the checksum let mut digest = algo.create_digest(); - let (calculated_checksum, _) = - digest_reader(&mut digest, &mut file_reader, false, algo.bitlen()).unwrap(); + + // TODO: improve function signature to use ReadingMode instead of binary bool + // Set binary to false because --binary is not supported with --check + let (calculated_checksum, _) = digest_reader( + &mut digest, + &mut file_reader, + /* binary */ false, + algo.bitlen(), + ) + .unwrap(); // Do the checksum validation let checksum_correct = expected_checksum == calculated_checksum; diff --git a/tests/by-util/test_hashsum.rs b/tests/by-util/test_hashsum.rs index b2eb96879b7..beaf994e1e8 100644 --- a/tests/by-util/test_hashsum.rs +++ b/tests/by-util/test_hashsum.rs @@ -107,17 +107,12 @@ macro_rules! test_digest { at.write("a", "file1\n"); at.write("c", "file3\n"); - #[cfg(unix)] - let file_not_found_str = "No such file or directory"; - #[cfg(not(unix))] - let file_not_found_str = "The system cannot find the file specified"; - ts.ucmd() .args(&[DIGEST_ARG, BITS_ARG, "a", "b", "c"]) .fails() .stdout_contains("a\n") .stdout_contains("c\n") - .stderr_contains(format!("b: {file_not_found_str}")); + .stderr_contains("b: No such file or directory"); } } )*) @@ -1097,11 +1092,11 @@ fn test_sha256_stdin_binary() { ); } +// This test is currently disabled on windows #[test] +#[cfg_attr(windows, ignore = "Discussion is in #9168")] fn test_check_sha256_binary() { - let ts = TestScenario::new(util_name!()); - - ts.ucmd() + new_ucmd!() .args(&[ "--sha256", "--bits=256",