diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index c7a3e969b4d..499fc52c00d 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -3,270 +3,25 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) fname, algo +// spell-checker:ignore (ToDO) fname, algo, bitlen use clap::builder::ValueParser; use clap::{Arg, ArgAction, Command}; use std::ffi::{OsStr, OsString}; -use std::fs::File; -use std::io::{BufReader, Read, Write, stdin, stdout}; use std::iter; -use std::path::Path; -use uucore::checksum::{ - ALGORITHM_OPTIONS_BLAKE2B, ALGORITHM_OPTIONS_BSD, ALGORITHM_OPTIONS_CRC, - ALGORITHM_OPTIONS_CRC32B, ALGORITHM_OPTIONS_SHA2, ALGORITHM_OPTIONS_SHA3, - ALGORITHM_OPTIONS_SYSV, ChecksumError, ChecksumOptions, ChecksumVerbose, HashAlgorithm, - LEGACY_ALGORITHMS, SUPPORTED_ALGORITHMS, calculate_blake2b_length_str, detect_algo, - digest_reader, perform_checksum_validation, sanitize_sha2_sha3_length_str, +use uucore::checksum::compute::{ + ChecksumComputeOptions, figure_out_output_format, perform_checksum_computation, }; -use uucore::translate; - -use uucore::{ - encoding, - error::{FromIo, UResult, USimpleError}, - format_usage, - line_ending::LineEnding, - os_str_as_bytes, show, - sum::Digest, +use uucore::checksum::validate::{ + ChecksumValidateOptions, ChecksumVerbose, perform_checksum_validation, }; - -struct Options { - algo_name: &'static str, - digest: Box, - output_bits: usize, - length: Option, - output_format: OutputFormat, - line_ending: LineEnding, -} - -/// Reading mode used to compute digest. -/// -/// On most linux systems, this is irrelevant, as there is no distinction -/// between text and binary files. Refer to GNU's cksum documentation for more -/// information. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum ReadingMode { - Binary, - Text, -} - -impl ReadingMode { - #[inline] - fn as_char(&self) -> char { - match self { - Self::Binary => '*', - Self::Text => ' ', - } - } -} - -/// Whether to write the digest as hexadecimal or encoded in base64. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum DigestFormat { - Hexadecimal, - Base64, -} - -impl DigestFormat { - #[inline] - fn is_base64(&self) -> bool { - *self == Self::Base64 - } -} - -/// Holds the representation that shall be used for printing a checksum line -#[derive(Debug, PartialEq, Eq)] -enum OutputFormat { - /// Raw digest - Raw, - - /// Selected for older algorithms which had their custom formatting - /// - /// Default for crc, sysv, bsd - Legacy, - - /// `$ALGO_NAME ($FILENAME) = $DIGEST` - Tagged(DigestFormat), - - /// '$DIGEST $FLAG$FILENAME' - /// where 'flag' depends on the reading mode - /// - /// Default for standalone checksum utilities - Untagged(DigestFormat, ReadingMode), -} - -impl OutputFormat { - #[inline] - fn is_raw(&self) -> bool { - *self == Self::Raw - } -} - -fn print_legacy_checksum( - options: &Options, - filename: &OsStr, - sum: &str, - size: usize, -) -> UResult<()> { - debug_assert!(LEGACY_ALGORITHMS.contains(&options.algo_name)); - - // Print the sum - match options.algo_name { - ALGORITHM_OPTIONS_SYSV => print!( - "{} {}", - sum.parse::().unwrap(), - size.div_ceil(options.output_bits), - ), - ALGORITHM_OPTIONS_BSD => { - // The BSD checksum output is 5 digit integer - let bsd_width = 5; - print!( - "{:0bsd_width$} {:bsd_width$}", - sum.parse::().unwrap(), - size.div_ceil(options.output_bits), - ); - } - ALGORITHM_OPTIONS_CRC | ALGORITHM_OPTIONS_CRC32B => { - print!("{sum} {size}"); - } - _ => unreachable!("Not a legacy algorithm"), - } - - // Print the filename after a space if not stdin - if filename != "-" { - print!(" "); - let _dropped_result = stdout().write_all(os_str_as_bytes(filename)?); - } - - Ok(()) -} - -fn print_tagged_checksum(options: &Options, filename: &OsStr, sum: &String) -> UResult<()> { - // Print algo name and opening parenthesis. - print!( - "{} (", - match (options.algo_name, options.length) { - // Multiply the length by 8, as we want to print the length in bits. - (ALGORITHM_OPTIONS_BLAKE2B, Some(l)) => format!("BLAKE2b-{}", l * 8), - (ALGORITHM_OPTIONS_BLAKE2B, None) => "BLAKE2b".into(), - (name, _) => name.to_ascii_uppercase(), - } - ); - - // Print filename - let _dropped_result = stdout().write_all(os_str_as_bytes(filename)?); - - // Print closing parenthesis and sum - print!(") = {sum}"); - - Ok(()) -} - -fn print_untagged_checksum( - filename: &OsStr, - sum: &String, - reading_mode: ReadingMode, -) -> UResult<()> { - // Print checksum and reading mode flag - print!("{sum} {}", reading_mode.as_char()); - - // Print filename - let _dropped_result = stdout().write_all(os_str_as_bytes(filename)?); - - Ok(()) -} - -/// Calculate checksum -/// -/// # Arguments -/// -/// * `options` - CLI options for the assigning checksum algorithm -/// * `files` - A iterator of [`OsStr`] which is a bunch of files that are using for calculating checksum -fn cksum<'a, I>(mut options: Options, files: I) -> UResult<()> -where - I: Iterator, -{ - let mut files = files.peekable(); - - while let Some(filename) = files.next() { - // Check that in raw mode, we are not provided with several files. - if options.output_format.is_raw() && files.peek().is_some() { - return Err(Box::new(ChecksumError::RawMultipleFiles)); - } - - let filepath = Path::new(filename); - let stdin_buf; - let file_buf; - if filepath.is_dir() { - show!(USimpleError::new( - 1, - translate!("cksum-error-is-directory", "file" => filepath.display()) - )); - continue; - } - - // Handle the file input - let mut file = BufReader::new(if filename == "-" { - stdin_buf = stdin(); - Box::new(stdin_buf) as Box - } else { - file_buf = match File::open(filepath) { - Ok(file) => file, - Err(err) => { - show!(err.map_err_context(|| filepath.to_string_lossy().to_string())); - continue; - } - }; - Box::new(file_buf) as Box - }); - - let (sum_hex, sz) = - digest_reader(&mut options.digest, &mut file, false, options.output_bits) - .map_err_context(|| translate!("cksum-error-failed-to-read-input"))?; - - // Encodes the sum if df is Base64, leaves as-is otherwise. - let encode_sum = |sum: String, df: DigestFormat| { - if df.is_base64() { - encoding::for_cksum::BASE64.encode(&hex::decode(sum).unwrap()) - } else { - sum - } - }; - - match options.output_format { - OutputFormat::Raw => { - let bytes = match options.algo_name { - ALGORITHM_OPTIONS_CRC | ALGORITHM_OPTIONS_CRC32B => { - sum_hex.parse::().unwrap().to_be_bytes().to_vec() - } - ALGORITHM_OPTIONS_SYSV | ALGORITHM_OPTIONS_BSD => { - sum_hex.parse::().unwrap().to_be_bytes().to_vec() - } - _ => hex::decode(sum_hex).unwrap(), - }; - // Cannot handle multiple files anyway, output immediately. - stdout().write_all(&bytes)?; - return Ok(()); - } - OutputFormat::Legacy => { - print_legacy_checksum(&options, filename, &sum_hex, sz)?; - } - OutputFormat::Tagged(digest_format) => { - print_tagged_checksum(&options, filename, &encode_sum(sum_hex, digest_format))?; - } - OutputFormat::Untagged(digest_format, reading_mode) => { - print_untagged_checksum( - filename, - &encode_sum(sum_hex, digest_format), - reading_mode, - )?; - } - } - - print!("{}", options.line_ending); - } - Ok(()) -} +use uucore::checksum::{ + AlgoKind, ChecksumError, SUPPORTED_ALGORITHMS, SizedAlgoKind, calculate_blake2b_length_str, + sanitize_sha2_sha3_length_str, +}; +use uucore::error::UResult; +use uucore::line_ending::LineEnding; +use uucore::{format_usage, translate}; mod options { pub const ALGORITHM: &str = "algorithm"; @@ -329,46 +84,9 @@ fn handle_tag_text_binary_flags>( Ok((tag, binary)) } -/// Use already-processed arguments to decide the output format. -fn figure_out_output_format( - algo: &HashAlgorithm, - tag: bool, - binary: bool, - raw: bool, - base64: bool, -) -> OutputFormat { - // Raw output format takes precedence over anything else. - if raw { - return OutputFormat::Raw; - } - - // Then, if the algo is legacy, takes precedence over the rest - if LEGACY_ALGORITHMS.contains(&algo.name) { - return OutputFormat::Legacy; - } - - let digest_format = if base64 { - DigestFormat::Base64 - } else { - DigestFormat::Hexadecimal - }; - - // After that, decide between tagged and untagged output - if tag { - OutputFormat::Tagged(digest_format) - } else { - let reading_mode = if binary { - ReadingMode::Binary - } else { - ReadingMode::Text - }; - OutputFormat::Untagged(digest_format, reading_mode) - } -} - /// Sanitize the `--length` argument depending on `--algorithm` and `--length`. fn maybe_sanitize_length( - algo_cli: Option<&str>, + algo_cli: Option, input_length: Option<&str>, ) -> UResult> { match (algo_cli, input_length) { @@ -376,12 +94,12 @@ fn maybe_sanitize_length( (_, None) => Ok(None), // For SHA2 and SHA3, if a length is provided, ensure it is correct. - (Some(algo @ (ALGORITHM_OPTIONS_SHA2 | ALGORITHM_OPTIONS_SHA3)), Some(s_len)) => { + (Some(algo @ (AlgoKind::Sha2 | AlgoKind::Sha3)), Some(s_len)) => { sanitize_sha2_sha3_length_str(algo, s_len).map(Some) } // For BLAKE2b, if a length is provided, validate it. - (Some(ALGORITHM_OPTIONS_BLAKE2B), Some(len)) => calculate_blake2b_length_str(len), + (Some(AlgoKind::Blake2b), Some(len)) => calculate_blake2b_length_str(len), // For any other provided algorithm, check if length is 0. // Otherwise, this is an error. @@ -398,7 +116,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let algo_cli = matches .get_one::(options::ALGORITHM) - .map(String::as_str); + .map(AlgoKind::from_cksum) + .transpose()?; let input_length = matches .get_one::(options::LENGTH) @@ -415,7 +134,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { if check { // cksum does not support '--check'ing legacy algorithms - if algo_cli.is_some_and(|algo_name| LEGACY_ALGORITHMS.contains(&algo_name)) { + if algo_cli.is_some_and(AlgoKind::is_legacy) { return Err(ChecksumError::AlgorithmNotSupportedWithCheck.into()); } @@ -435,8 +154,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // Execute the checksum validation based on the presence of files or the use of stdin let verbose = ChecksumVerbose::new(status, quiet, warn); - let opts = ChecksumOptions { - binary: binary_flag, + let opts = ChecksumValidateOptions { ignore_missing, strict, verbose, @@ -448,31 +166,30 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // Not --check // Set the default algorithm to CRC when not '--check'ing. - let algo_name = algo_cli.unwrap_or(ALGORITHM_OPTIONS_CRC); + let algo_kind = algo_cli.unwrap_or(AlgoKind::Crc); let (tag, binary) = handle_tag_text_binary_flags(std::env::args_os())?; - let algo = detect_algo(algo_name, length)?; + let algo = SizedAlgoKind::from_unsized(algo_kind, length)?; let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO)); let output_format = figure_out_output_format( - &algo, + algo, tag, binary, matches.get_flag(options::RAW), matches.get_flag(options::BASE64), ); - let opts = Options { - algo_name: algo.name, - digest: (algo.create_fn)(), - output_bits: algo.bits, - length, + let opts = ChecksumComputeOptions { + algo_kind: algo, output_format, line_ending, + binary: false, + no_names: false, }; - cksum(opts, files)?; + perform_checksum_computation(opts, files)?; Ok(()) } diff --git a/src/uu/hashsum/Cargo.toml b/src/uu/hashsum/Cargo.toml index 00eb152edb4..ec382870bdc 100644 --- a/src/uu/hashsum/Cargo.toml +++ b/src/uu/hashsum/Cargo.toml @@ -19,7 +19,7 @@ path = "src/hashsum.rs" [dependencies] clap = { workspace = true } -uucore = { workspace = true, features = ["checksum", "sum"] } +uucore = { workspace = true, features = ["checksum", "encoding", "sum"] } fluent = { workspace = true } [[bin]] diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 7edc916fb3f..d6258210f17 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -3,53 +3,28 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) algo, algoname, regexes, nread, nonames +// spell-checker:ignore (ToDO) algo, algoname, bitlen, regexes, nread, nonames -use clap::ArgAction; -use clap::builder::ValueParser; -use clap::value_parser; -use clap::{Arg, ArgMatches, Command}; use std::ffi::{OsStr, OsString}; -use std::fs::File; -use std::io::{BufReader, Read, stdin}; use std::iter; use std::num::ParseIntError; use std::path::Path; -use uucore::checksum::ChecksumError; -use uucore::checksum::ChecksumOptions; -use uucore::checksum::ChecksumVerbose; -use uucore::checksum::HashAlgorithm; -use uucore::checksum::calculate_blake2b_length; -use uucore::checksum::create_sha3; -use uucore::checksum::detect_algo; -use uucore::checksum::digest_reader; -use uucore::checksum::escape_filename; -use uucore::checksum::perform_checksum_validation; -use uucore::error::{UResult, strip_errno}; -use uucore::format_usage; -use uucore::sum::{Digest, Sha3_224, Sha3_256, Sha3_384, Sha3_512, Shake128, Shake256}; -use uucore::translate; + +use clap::builder::ValueParser; +use clap::{Arg, ArgAction, ArgMatches, Command, value_parser}; + +use uucore::checksum::compute::{ + ChecksumComputeOptions, figure_out_output_format, perform_checksum_computation, +}; +use uucore::checksum::validate::{ + ChecksumValidateOptions, ChecksumVerbose, perform_checksum_validation, +}; +use uucore::checksum::{AlgoKind, ChecksumError, SizedAlgoKind, calculate_blake2b_length}; +use uucore::error::UResult; +use uucore::line_ending::LineEnding; +use uucore::{format_usage, translate}; const NAME: &str = "hashsum"; -// Using the same read buffer size as GNU -const READ_BUFFER_SIZE: usize = 32 * 1024; - -struct Options<'a> { - algoname: &'static str, - digest: Box, - binary: bool, - binary_name: &'a str, - //check: bool, - tag: bool, - nonames: bool, - //status: bool, - //quiet: bool, - //strict: bool, - //warn: bool, - output_bits: usize, - zero: bool, - //ignore_missing: bool, -} /// Creates a hasher instance based on the command-line flags. /// @@ -63,10 +38,10 @@ struct Options<'a> { /// the output length in bits or an Err if multiple hash algorithms are specified or if a /// required flag is missing. #[allow(clippy::cognitive_complexity)] -fn create_algorithm_from_flags(matches: &ArgMatches) -> UResult { - let mut alg: Option = None; +fn create_algorithm_from_flags(matches: &ArgMatches) -> UResult<(AlgoKind, Option)> { + let mut alg: Option<(AlgoKind, Option)> = None; - let mut set_or_err = |new_alg: HashAlgorithm| -> UResult<()> { + let mut set_or_err = |new_alg: (AlgoKind, Option)| -> UResult<()> { if alg.is_some() { return Err(ChecksumError::CombineMultipleAlgorithms.into()); } @@ -75,80 +50,57 @@ fn create_algorithm_from_flags(matches: &ArgMatches) -> UResult { }; if matches.get_flag("md5") { - set_or_err(detect_algo("md5sum", None)?)?; + set_or_err((AlgoKind::Md5, None))?; } if matches.get_flag("sha1") { - set_or_err(detect_algo("sha1sum", None)?)?; + set_or_err((AlgoKind::Sha1, None))?; } if matches.get_flag("sha224") { - set_or_err(detect_algo("sha224sum", None)?)?; + set_or_err((AlgoKind::Sha224, None))?; } if matches.get_flag("sha256") { - set_or_err(detect_algo("sha256sum", None)?)?; + set_or_err((AlgoKind::Sha256, None))?; } if matches.get_flag("sha384") { - set_or_err(detect_algo("sha384sum", None)?)?; + set_or_err((AlgoKind::Sha384, None))?; } if matches.get_flag("sha512") { - set_or_err(detect_algo("sha512sum", None)?)?; + set_or_err((AlgoKind::Sha512, None))?; } if matches.get_flag("b2sum") { - set_or_err(detect_algo("b2sum", None)?)?; + set_or_err((AlgoKind::Blake2b, None))?; } if matches.get_flag("b3sum") { - set_or_err(detect_algo("b3sum", None)?)?; + set_or_err((AlgoKind::Blake3, None))?; } if matches.get_flag("sha3") { match matches.get_one::("bits") { - Some(bits) => set_or_err(create_sha3(*bits)?)?, + Some(bits @ (224 | 256 | 384 | 512)) => set_or_err((AlgoKind::Sha3, Some(*bits)))?, + Some(bits) => return Err(ChecksumError::InvalidLengthForSha(bits.to_string()).into()), None => return Err(ChecksumError::LengthRequired("SHA3".into()).into()), } } if matches.get_flag("sha3-224") { - set_or_err(HashAlgorithm { - name: "SHA3-224", - create_fn: Box::new(|| Box::new(Sha3_224::new())), - bits: 224, - })?; + set_or_err((AlgoKind::Sha3, Some(224)))?; } if matches.get_flag("sha3-256") { - set_or_err(HashAlgorithm { - name: "SHA3-256", - create_fn: Box::new(|| Box::new(Sha3_256::new())), - bits: 256, - })?; + set_or_err((AlgoKind::Sha3, Some(256)))?; } if matches.get_flag("sha3-384") { - set_or_err(HashAlgorithm { - name: "SHA3-384", - create_fn: Box::new(|| Box::new(Sha3_384::new())), - bits: 384, - })?; + set_or_err((AlgoKind::Sha3, Some(384)))?; } if matches.get_flag("sha3-512") { - set_or_err(HashAlgorithm { - name: "SHA3-512", - create_fn: Box::new(|| Box::new(Sha3_512::new())), - bits: 512, - })?; + set_or_err((AlgoKind::Sha3, Some(512)))?; } if matches.get_flag("shake128") { match matches.get_one::("bits") { - Some(bits) => set_or_err(HashAlgorithm { - name: "SHAKE128", - create_fn: Box::new(|| Box::new(Shake128::new())), - bits: *bits, - })?, + Some(bits) => set_or_err((AlgoKind::Shake128, Some(*bits)))?, None => return Err(ChecksumError::LengthRequired("SHAKE128".into()).into()), } } if matches.get_flag("shake256") { match matches.get_one::("bits") { - Some(bits) => set_or_err(HashAlgorithm { - name: "SHAKE256", - create_fn: Box::new(|| Box::new(Shake256::new())), - bits: *bits, - })?, + Some(bits) => set_or_err((AlgoKind::Shake256, Some(*bits)))?, None => return Err(ChecksumError::LengthRequired("SHAKE256".into()).into()), } } @@ -198,10 +150,10 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { None => None, }; - let algo = if is_hashsum_bin { + let (algo_kind, length) = if is_hashsum_bin { create_algorithm_from_flags(&matches)? } else { - detect_algo(&binary_name, length)? + (AlgoKind::from_bin_name(&binary_name)?, length) }; let binary = if matches.get_flag("binary") { @@ -213,9 +165,9 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { }; let check = matches.get_flag("check"); let status = matches.get_flag("status"); - let quiet = matches.get_flag("quiet") || status; + let quiet = matches.get_flag("quiet"); let strict = matches.get_flag("strict"); - let warn = matches.get_flag("warn") && !status; + let warn = matches.get_flag("warn"); let ignore_missing = matches.get_flag("ignore-missing"); if ignore_missing && !check { @@ -227,16 +179,14 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { // on Windows, allow --binary/--text to be used with --check // and keep the behavior of defaulting to binary #[cfg(not(windows))] - let binary = { + { let text_flag = matches.get_flag("text"); let binary_flag = matches.get_flag("binary"); if binary_flag || text_flag { return Err(ChecksumError::BinaryTextConflict.into()); } - - false - }; + } // Execute the checksum validation based on the presence of files or the use of stdin // Determine the source of input: a list of files or stdin. @@ -247,52 +197,51 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { let verbose = ChecksumVerbose::new(status, quiet, warn); - let opts = ChecksumOptions { - binary, + let opts = ChecksumValidateOptions { ignore_missing, strict, verbose, }; // Execute the checksum validation - return perform_checksum_validation( - input.iter().copied(), - Some(algo.name), - Some(algo.bits), - opts, - ); + return perform_checksum_validation(input.iter().copied(), Some(algo_kind), length, opts); } else if quiet { return Err(ChecksumError::QuietNotCheck.into()); } else if strict { return Err(ChecksumError::StrictNotCheck.into()); } - let nonames = *matches + let no_names = *matches .try_get_one("no-names") .unwrap_or(None) .unwrap_or(&false); - let zero = matches.get_flag("zero"); + let line_ending = LineEnding::from_zero_flag(matches.get_flag("zero")); + + let algo = SizedAlgoKind::from_unsized(algo_kind, length)?; - let opts = Options { - algoname: algo.name, - digest: (algo.create_fn)(), - output_bits: algo.bits, + let opts = ChecksumComputeOptions { + algo_kind: algo, + output_format: figure_out_output_format( + algo, + matches.get_flag(options::TAG), + binary, + /* raw */ false, + /* base64: */ false, + ), + line_ending, binary, - binary_name: &binary_name, - tag: matches.get_flag("tag"), - nonames, - //status, - //quiet, - //warn, - zero, - //ignore_missing, + no_names, }; + let files = matches.get_many::(options::FILE).map_or_else( + // No files given, read from stdin. + || Box::new(iter::once(OsStr::new("-"))) as Box>, + // At least one file given, read from them. + |files| Box::new(files.map(OsStr::new)) as Box>, + ); + // Show the hashsum of the input - match matches.get_many::(options::FILE) { - Some(files) => hashsum(opts, files.map(|f| f.as_os_str())), - None => hashsum(opts, iter::once(OsStr::new("-"))), - } + perform_checksum_computation(opts, files) } mod options { @@ -523,87 +472,3 @@ fn uu_app(binary_name: &str) -> (Command, bool) { (command, is_hashsum_bin) } - -#[allow(clippy::cognitive_complexity)] -fn hashsum<'a, I>(mut options: Options, files: I) -> UResult<()> -where - I: Iterator, -{ - let binary_marker = if options.binary { "*" } else { " " }; - let mut err_found = None; - for filename in files { - let filename = Path::new(filename); - - let mut file = BufReader::with_capacity( - READ_BUFFER_SIZE, - if filename == OsStr::new("-") { - Box::new(stdin()) as Box - } else { - let file_buf = match File::open(filename) { - Ok(f) => f, - Err(e) => { - eprintln!( - "{}: {}: {}", - options.binary_name, - filename.to_string_lossy(), - strip_errno(&e) - ); - err_found = Some(ChecksumError::Io(e)); - continue; - } - }; - Box::new(file_buf) as Box - }, - ); - - let sum = match digest_reader( - &mut options.digest, - &mut file, - options.binary, - options.output_bits, - ) { - Ok((sum, _)) => sum, - Err(e) => { - eprintln!( - "{}: {}: {}", - options.binary_name, - filename.to_string_lossy(), - strip_errno(&e) - ); - err_found = Some(ChecksumError::Io(e)); - continue; - } - }; - - let (escaped_filename, prefix) = escape_filename(filename); - if options.tag { - if options.algoname == "blake2b" { - if options.digest.output_bits() == 512 { - println!("BLAKE2b ({escaped_filename}) = {sum}"); - } else { - // special case for BLAKE2b with non-default output length - println!( - "BLAKE2b-{} ({escaped_filename}) = {sum}", - options.digest.output_bits() - ); - } - } else { - println!( - "{prefix}{} ({escaped_filename}) = {sum}", - options.algoname.to_ascii_uppercase() - ); - } - } else if options.nonames { - println!("{sum}"); - } else if options.zero { - // with zero, we don't escape the filename - print!("{sum} {binary_marker}{}\0", filename.display()); - } else { - println!("{prefix}{sum} {binary_marker}{escaped_filename}"); - } - } - match err_found { - None => Ok(()), - Some(e) => Err(Box::new(e)), - } -} diff --git a/src/uucore/src/lib/features/checksum/compute.rs b/src/uucore/src/lib/features/checksum/compute.rs new file mode 100644 index 00000000000..e91c54166d7 --- /dev/null +++ b/src/uucore/src/lib/features/checksum/compute.rs @@ -0,0 +1,338 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore bitlen + +use std::ffi::OsStr; +use std::fs::File; +use std::io::{self, BufReader, Read, Write}; +use std::path::Path; + +use crate::checksum::{ChecksumError, SizedAlgoKind, digest_reader, escape_filename}; +use crate::error::{FromIo, UResult, USimpleError}; +use crate::line_ending::LineEnding; +use crate::{encoding, show, translate}; + +/// Use the same buffer size as GNU when reading a file to create a checksum +/// from it: 32 KiB. +const READ_BUFFER_SIZE: usize = 32 * 1024; + +pub struct ChecksumComputeOptions { + /// Which algorithm to use to compute the digest. + pub algo_kind: SizedAlgoKind, + + /// Printing format to use for each checksum. + pub output_format: OutputFormat, + + /// Whether to finish lines with '\n' or '\0'. + pub line_ending: LineEnding, + + /// On windows, open files as binary instead of text + pub binary: bool, + + /// (non-GNU option) Do not print file names + pub no_names: bool, +} + +/// Reading mode used to compute digest. +/// +/// On most linux systems, this is irrelevant, as there is no distinction +/// between text and binary files. Refer to GNU's cksum documentation for more +/// information. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ReadingMode { + Binary, + Text, +} + +impl ReadingMode { + #[inline] + fn as_char(&self) -> char { + match self { + Self::Binary => '*', + Self::Text => ' ', + } + } +} + +/// Whether to write the digest as hexadecimal or encoded in base64. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DigestFormat { + Hexadecimal, + Base64, +} + +impl DigestFormat { + #[inline] + fn is_base64(&self) -> bool { + *self == Self::Base64 + } +} + +/// Holds the representation that shall be used for printing a checksum line +#[derive(Debug, PartialEq, Eq)] +pub enum OutputFormat { + /// Raw digest + Raw, + + /// Selected for older algorithms which had their custom formatting + /// + /// Default for crc, sysv, bsd + Legacy, + + /// `$ALGO_NAME ($FILENAME) = $DIGEST` + Tagged(DigestFormat), + + /// '$DIGEST $FLAG$FILENAME' + /// where 'flag' depends on the reading mode + /// + /// Default for standalone checksum utilities + Untagged(DigestFormat, ReadingMode), +} + +impl OutputFormat { + #[inline] + fn is_raw(&self) -> bool { + *self == Self::Raw + } +} + +/// Use already-processed arguments to decide the output format. +pub fn figure_out_output_format( + algo: SizedAlgoKind, + tag: bool, + binary: bool, + raw: bool, + base64: bool, +) -> OutputFormat { + // Raw output format takes precedence over anything else. + if raw { + return OutputFormat::Raw; + } + + // Then, if the algo is legacy, takes precedence over the rest + if algo.is_legacy() { + return OutputFormat::Legacy; + } + + let digest_format = if base64 { + DigestFormat::Base64 + } else { + DigestFormat::Hexadecimal + }; + + // After that, decide between tagged and untagged output + if tag { + OutputFormat::Tagged(digest_format) + } else { + let reading_mode = if binary { + ReadingMode::Binary + } else { + ReadingMode::Text + }; + OutputFormat::Untagged(digest_format, reading_mode) + } +} + +fn print_legacy_checksum( + options: &ChecksumComputeOptions, + filename: &OsStr, + sum: &str, + size: usize, +) -> UResult<()> { + debug_assert!(options.algo_kind.is_legacy()); + + let (escaped_filename, prefix) = if options.line_ending == LineEnding::Nul { + (filename.to_string_lossy().to_string(), "") + } else { + escape_filename(filename) + }; + + print!("{prefix}"); + + // Print the sum + match options.algo_kind { + SizedAlgoKind::Sysv => print!( + "{} {}", + sum.parse::().unwrap(), + size.div_ceil(options.algo_kind.bitlen()), + ), + SizedAlgoKind::Bsd => { + // The BSD checksum output is 5 digit integer + let bsd_width = 5; + print!( + "{:0bsd_width$} {:bsd_width$}", + sum.parse::().unwrap(), + size.div_ceil(options.algo_kind.bitlen()), + ); + } + SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { + print!("{sum} {size}"); + } + _ => unreachable!("Not a legacy algorithm"), + } + + // Print the filename after a space if not stdin + if escaped_filename != "-" { + print!(" "); + let _dropped_result = io::stdout().write_all(escaped_filename.as_bytes()); + } + + Ok(()) +} + +fn print_tagged_checksum( + options: &ChecksumComputeOptions, + filename: &OsStr, + sum: &String, +) -> UResult<()> { + let (escaped_filename, prefix) = if options.line_ending == LineEnding::Nul { + (filename.to_string_lossy().to_string(), "") + } else { + escape_filename(filename) + }; + + // Print algo name and opening parenthesis. + print!("{prefix}{} (", options.algo_kind.to_tag()); + + // Print filename + let _dropped_result = io::stdout().write_all(escaped_filename.as_bytes()); + + // Print closing parenthesis and sum + print!(") = {sum}"); + + Ok(()) +} + +fn print_untagged_checksum( + options: &ChecksumComputeOptions, + filename: &OsStr, + sum: &String, + reading_mode: ReadingMode, +) -> UResult<()> { + // early check for the "no-names" option + if options.no_names { + print!("{sum}"); + return Ok(()); + } + + let (escaped_filename, prefix) = if options.line_ending == LineEnding::Nul { + (filename.to_string_lossy().to_string(), "") + } else { + escape_filename(filename) + }; + + // Print checksum and reading mode flag + print!("{prefix}{sum} {}", reading_mode.as_char()); + + // Print filename + let _dropped_result = io::stdout().write_all(escaped_filename.as_bytes()); + + Ok(()) +} + +/// Calculate checksum +/// +/// # Arguments +/// +/// * `options` - CLI options for the assigning checksum algorithm +/// * `files` - A iterator of [`OsStr`] which is a bunch of files that are using for calculating checksum +pub fn perform_checksum_computation<'a, I>(options: ChecksumComputeOptions, files: I) -> UResult<()> +where + I: Iterator, +{ + let mut files = files.peekable(); + + while let Some(filename) = files.next() { + // Check that in raw mode, we are not provided with several files. + if options.output_format.is_raw() && files.peek().is_some() { + return Err(Box::new(ChecksumError::RawMultipleFiles)); + } + + let filepath = Path::new(filename); + let stdin_buf; + let file_buf; + if filepath.is_dir() { + show!(USimpleError::new( + 1, + // TODO: Rework translation, which is broken since this code moved to uucore + // translate!("cksum-error-is-directory", "file" => filepath.display()) + format!("{}: Is a directory", filepath.display()) + )); + continue; + } + + // Handle the file input + let mut file = BufReader::with_capacity( + READ_BUFFER_SIZE, + if filename == "-" { + stdin_buf = io::stdin(); + Box::new(stdin_buf) as Box + } else { + file_buf = match File::open(filepath) { + Ok(file) => file, + Err(err) => { + show!(err.map_err_context(|| filepath.to_string_lossy().into())); + continue; + } + }; + Box::new(file_buf) as Box + }, + ); + + let mut digest = options.algo_kind.create_digest(); + + let (sum_hex, sz) = digest_reader( + &mut digest, + &mut file, + options.binary, + options.algo_kind.bitlen(), + ) + .map_err_context(|| translate!("cksum-error-failed-to-read-input"))?; + + // Encodes the sum if df is Base64, leaves as-is otherwise. + let encode_sum = |sum: String, df: DigestFormat| { + if df.is_base64() { + encoding::for_cksum::BASE64.encode(&hex::decode(sum).unwrap()) + } else { + sum + } + }; + + match options.output_format { + OutputFormat::Raw => { + let bytes = match options.algo_kind { + SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { + sum_hex.parse::().unwrap().to_be_bytes().to_vec() + } + SizedAlgoKind::Sysv | SizedAlgoKind::Bsd => { + sum_hex.parse::().unwrap().to_be_bytes().to_vec() + } + _ => hex::decode(sum_hex).unwrap(), + }; + // Cannot handle multiple files anyway, output immediately. + io::stdout().write_all(&bytes)?; + return Ok(()); + } + OutputFormat::Legacy => { + print_legacy_checksum(&options, filename, &sum_hex, sz)?; + } + OutputFormat::Tagged(digest_format) => { + print_tagged_checksum(&options, filename, &encode_sum(sum_hex, digest_format))?; + } + OutputFormat::Untagged(digest_format, reading_mode) => { + print_untagged_checksum( + &options, + filename, + &encode_sum(sum_hex, digest_format), + reading_mode, + )?; + } + } + + print!("{}", options.line_ending); + } + Ok(()) +} diff --git a/src/uucore/src/lib/features/checksum/mod.rs b/src/uucore/src/lib/features/checksum/mod.rs new file mode 100644 index 00000000000..87c8836fd7f --- /dev/null +++ b/src/uucore/src/lib/features/checksum/mod.rs @@ -0,0 +1,613 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore bitlen + +use std::ffi::OsStr; +use std::io::{self, Read}; +use std::num::IntErrorKind; + +use os_display::Quotable; +use thiserror::Error; + +use crate::error::{UError, UResult}; +use crate::show_error; +use crate::sum::{ + Blake2b, Blake3, Bsd, CRC32B, Crc, Digest, DigestWriter, Md5, Sha1, Sha3_224, Sha3_256, + Sha3_384, Sha3_512, Sha224, Sha256, Sha384, Sha512, Shake128, Shake256, Sm3, SysV, +}; + +pub mod compute; +pub mod validate; + +pub const ALGORITHM_OPTIONS_SYSV: &str = "sysv"; +pub const ALGORITHM_OPTIONS_BSD: &str = "bsd"; +pub const ALGORITHM_OPTIONS_CRC: &str = "crc"; +pub const ALGORITHM_OPTIONS_CRC32B: &str = "crc32b"; +pub const ALGORITHM_OPTIONS_MD5: &str = "md5"; +pub const ALGORITHM_OPTIONS_SHA1: &str = "sha1"; +pub const ALGORITHM_OPTIONS_SHA2: &str = "sha2"; +pub const ALGORITHM_OPTIONS_SHA3: &str = "sha3"; + +pub const ALGORITHM_OPTIONS_SHA224: &str = "sha224"; +pub const ALGORITHM_OPTIONS_SHA256: &str = "sha256"; +pub const ALGORITHM_OPTIONS_SHA384: &str = "sha384"; +pub const ALGORITHM_OPTIONS_SHA512: &str = "sha512"; +pub const ALGORITHM_OPTIONS_BLAKE2B: &str = "blake2b"; +pub const ALGORITHM_OPTIONS_BLAKE3: &str = "blake3"; +pub const ALGORITHM_OPTIONS_SM3: &str = "sm3"; +pub const ALGORITHM_OPTIONS_SHAKE128: &str = "shake128"; +pub const ALGORITHM_OPTIONS_SHAKE256: &str = "shake256"; + +pub const SUPPORTED_ALGORITHMS: [&str; 17] = [ + ALGORITHM_OPTIONS_SYSV, + ALGORITHM_OPTIONS_BSD, + ALGORITHM_OPTIONS_CRC, + ALGORITHM_OPTIONS_CRC32B, + ALGORITHM_OPTIONS_MD5, + ALGORITHM_OPTIONS_SHA1, + ALGORITHM_OPTIONS_SHA2, + ALGORITHM_OPTIONS_SHA3, + ALGORITHM_OPTIONS_BLAKE2B, + ALGORITHM_OPTIONS_SM3, + // Legacy aliases for -a sha2 -l xxx + ALGORITHM_OPTIONS_SHA224, + ALGORITHM_OPTIONS_SHA256, + ALGORITHM_OPTIONS_SHA384, + ALGORITHM_OPTIONS_SHA512, + // Extra algorithms that are not valid `cksum --algorithm` as per GNU. + // TODO: Should we keep them or drop them to align our support with GNU ? + ALGORITHM_OPTIONS_BLAKE3, + ALGORITHM_OPTIONS_SHAKE128, + ALGORITHM_OPTIONS_SHAKE256, +]; + +/// Represents an algorithm kind. In some cases, it is not sufficient by itself +/// to know which algorithm to use exactly, because it lacks a digest length, +/// which is why [`SizedAlgoKind`] exists. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AlgoKind { + Sysv, + Bsd, + Crc, + Crc32b, + Md5, + Sm3, + Sha1, + Sha2, + Sha3, + Blake2b, + + // Available in cksum for backward compatibility + Sha224, + Sha256, + Sha384, + Sha512, + + // Not available in cksum + Shake128, + Shake256, + Blake3, +} + +impl AlgoKind { + /// Parses an [`AlgoKind`] from a string, only accepting valid cksum + /// `--algorithm` values. + pub fn from_cksum(algo: impl AsRef) -> UResult { + use AlgoKind::*; + Ok(match algo.as_ref() { + ALGORITHM_OPTIONS_SYSV => Sysv, + ALGORITHM_OPTIONS_BSD => Bsd, + ALGORITHM_OPTIONS_CRC => Crc, + ALGORITHM_OPTIONS_CRC32B => Crc32b, + ALGORITHM_OPTIONS_MD5 => Md5, + ALGORITHM_OPTIONS_SHA1 => Sha1, + ALGORITHM_OPTIONS_SHA2 => Sha2, + ALGORITHM_OPTIONS_SHA3 => Sha3, + ALGORITHM_OPTIONS_BLAKE2B => Blake2b, + ALGORITHM_OPTIONS_SM3 => Sm3, + + // For backward compatibility + ALGORITHM_OPTIONS_SHA224 => Sha224, + ALGORITHM_OPTIONS_SHA256 => Sha256, + ALGORITHM_OPTIONS_SHA384 => Sha384, + ALGORITHM_OPTIONS_SHA512 => Sha512, + _ => return Err(ChecksumError::UnknownAlgorithm(algo.as_ref().to_string()).into()), + }) + } + + /// Parses an algo kind from a string, accepting standalone binary names. + pub fn from_bin_name(algo: impl AsRef) -> UResult { + use AlgoKind::*; + Ok(match algo.as_ref() { + "md5sum" => Md5, + "sha1sum" => Sha1, + "sha224sum" => Sha224, + "sha256sum" => Sha256, + "sha384sum" => Sha384, + "sha512sum" => Sha512, + "sha3sum" => Sha3, + "b2sum" => Blake2b, + + _ => return Err(ChecksumError::UnknownAlgorithm(algo.as_ref().to_string()).into()), + }) + } + + /// Returns a string corresponding to the algorithm kind. + pub fn to_uppercase(self) -> &'static str { + use AlgoKind::*; + match self { + // Legacy algorithms + Sysv => "SYSV", + Bsd => "BSD", + Crc => "CRC", + Crc32b => "CRC32B", + + Md5 => "MD5", + Sm3 => "SM3", + Sha1 => "SHA1", + Sha2 => "SHA2", + Sha3 => "SHA3", + Blake2b => "BLAKE2b", // Note the lowercase b in the end here. + + // For backward compatibility + Sha224 => "SHA224", + Sha256 => "SHA256", + Sha384 => "SHA384", + Sha512 => "SHA512", + + Shake128 => "SHAKE128", + Shake256 => "SHAKE256", + Blake3 => "BLAKE3", + } + } + + /// Returns a string corresponding to the algorithm option in cksum `-a` + pub fn to_lowercase(self) -> &'static str { + use AlgoKind::*; + match self { + Sysv => "sysv", + Bsd => "bsd", + Crc => "crc", + Crc32b => "crc32b", + Md5 => "md5", + Sm3 => "sm3", + Sha1 => "sha1", + Sha2 => "sha2", + Sha3 => "sha3", + Blake2b => "blake2b", + + // For backward compatibility + Sha224 => "sha224", + Sha256 => "sha256", + Sha384 => "sha384", + Sha512 => "sha512", + + Shake128 => "shake128", + Shake256 => "shake256", + Blake3 => "blake3", + } + } + + pub fn is_legacy(self) -> bool { + use AlgoKind::*; + matches!(self, Sysv | Bsd | Crc | Crc32b) + } +} + +/// Holds a length for a SHA2 of SHA3 algorithm kind. +#[derive(Debug, Clone, Copy)] +pub enum ShaLength { + Len224, + Len256, + Len384, + Len512, +} + +impl ShaLength { + pub fn as_usize(self) -> usize { + match self { + Self::Len224 => 224, + Self::Len256 => 256, + Self::Len384 => 384, + Self::Len512 => 512, + } + } +} + +impl TryFrom for ShaLength { + type Error = ChecksumError; + + fn try_from(value: usize) -> Result { + use ShaLength::*; + match value { + 224 => Ok(Len224), + 256 => Ok(Len256), + 384 => Ok(Len384), + 512 => Ok(Len512), + _ => Err(ChecksumError::InvalidLengthForSha(value.to_string())), + } + } +} + +/// Represents an actual determined algorithm. +#[derive(Debug, Clone, Copy)] +pub enum SizedAlgoKind { + Sysv, + Bsd, + Crc, + Crc32b, + Md5, + Sm3, + Sha1, + Blake3, + Sha2(ShaLength), + Sha3(ShaLength), + // Note: we store Blake2b's length as BYTES. + Blake2b(Option), + Shake128(usize), + Shake256(usize), +} + +impl SizedAlgoKind { + pub fn from_unsized(kind: AlgoKind, byte_length: Option) -> UResult { + use AlgoKind as ak; + match (kind, byte_length) { + ( + ak::Sysv + | ak::Bsd + | ak::Crc + | ak::Crc32b + | ak::Md5 + | ak::Sm3 + | ak::Sha1 + | ak::Blake3 + | ak::Sha224 + | ak::Sha256 + | ak::Sha384 + | ak::Sha512, + Some(_), + ) => Err(ChecksumError::LengthOnlyForBlake2bSha2Sha3.into()), + + (ak::Sysv, _) => Ok(Self::Sysv), + (ak::Bsd, _) => Ok(Self::Bsd), + (ak::Crc, _) => Ok(Self::Crc), + (ak::Crc32b, _) => Ok(Self::Crc32b), + (ak::Md5, _) => Ok(Self::Md5), + (ak::Sm3, _) => Ok(Self::Sm3), + (ak::Sha1, _) => Ok(Self::Sha1), + (ak::Blake3, _) => Ok(Self::Blake3), + + (ak::Shake128, Some(l)) => Ok(Self::Shake128(l)), + (ak::Shake256, Some(l)) => Ok(Self::Shake256(l)), + (ak::Sha2, Some(l)) => Ok(Self::Sha2(ShaLength::try_from(l)?)), + (ak::Sha3, Some(l)) => Ok(Self::Sha3(ShaLength::try_from(l)?)), + (algo @ (ak::Sha2 | ak::Sha3), None) => { + Err(ChecksumError::LengthRequiredForSha(algo.to_lowercase().into()).into()) + } + // [`calculate_blake2b_length`] expects a length in bits but we + // have a length in bytes. + (ak::Blake2b, Some(l)) => Ok(Self::Blake2b(calculate_blake2b_length(8 * l)?)), + (ak::Blake2b, None) => Ok(Self::Blake2b(None)), + + (ak::Sha224, None) => Ok(Self::Sha2(ShaLength::Len224)), + (ak::Sha256, None) => Ok(Self::Sha2(ShaLength::Len256)), + (ak::Sha384, None) => Ok(Self::Sha2(ShaLength::Len384)), + (ak::Sha512, None) => Ok(Self::Sha2(ShaLength::Len512)), + (_, None) => Err(ChecksumError::LengthRequired(kind.to_uppercase().into()).into()), + } + } + + pub fn to_tag(self) -> String { + use SizedAlgoKind::*; + match self { + Md5 => "MD5".into(), + Sm3 => "SM3".into(), + Sha1 => "SHA1".into(), + Blake3 => "BLAKE3".into(), + Sha2(len) => format!("SHA{}", len.as_usize()), + Sha3(len) => format!("SHA3-{}", len.as_usize()), + Blake2b(Some(byte_len)) => format!("BLAKE2b-{}", byte_len * 8), + Blake2b(None) => "BLAKE2b".into(), + Shake128(_) => "SHAKE128".into(), + Shake256(_) => "SHAKE256".into(), + Sysv | Bsd | Crc | Crc32b => panic!("Should not be used for tagging"), + } + } + + pub fn create_digest(&self) -> Box { + use ShaLength::*; + match self { + Self::Sysv => Box::new(SysV::new()), + Self::Bsd => Box::new(Bsd::new()), + Self::Crc => Box::new(Crc::new()), + Self::Crc32b => Box::new(CRC32B::new()), + Self::Md5 => Box::new(Md5::new()), + Self::Sm3 => Box::new(Sm3::new()), + Self::Sha1 => Box::new(Sha1::new()), + Self::Blake3 => Box::new(Blake3::new()), + Self::Sha2(Len224) => Box::new(Sha224::new()), + Self::Sha2(Len256) => Box::new(Sha256::new()), + Self::Sha2(Len384) => Box::new(Sha384::new()), + Self::Sha2(Len512) => Box::new(Sha512::new()), + Self::Sha3(Len224) => Box::new(Sha3_224::new()), + Self::Sha3(Len256) => Box::new(Sha3_256::new()), + Self::Sha3(Len384) => Box::new(Sha3_384::new()), + Self::Sha3(Len512) => Box::new(Sha3_512::new()), + Self::Blake2b(Some(byte_len)) => Box::new(Blake2b::with_output_bytes(*byte_len)), + Self::Blake2b(None) => Box::new(Blake2b::new()), + Self::Shake128(_) => Box::new(Shake128::new()), + Self::Shake256(_) => Box::new(Shake256::new()), + } + } + + pub fn bitlen(&self) -> usize { + use SizedAlgoKind::*; + match self { + Sysv => 512, + Bsd => 1024, + Crc => 256, + Crc32b => 32, + Md5 => 128, + Sm3 => 512, + Sha1 => 160, + Blake3 => 256, + Sha2(len) => len.as_usize(), + Sha3(len) => len.as_usize(), + Blake2b(len) => len.unwrap_or(512), + Shake128(len) => *len, + Shake256(len) => *len, + } + } + pub fn is_legacy(&self) -> bool { + use SizedAlgoKind::*; + matches!(self, Sysv | Bsd | Crc | Crc32b) + } +} + +#[derive(Debug, Error)] +pub enum ChecksumError { + #[error("the --raw option is not supported with multiple files")] + RawMultipleFiles, + #[error("the --ignore-missing option is meaningful only when verifying checksums")] + IgnoreNotCheck, + #[error("the --strict option is meaningful only when verifying checksums")] + StrictNotCheck, + #[error("the --quiet option is meaningful only when verifying checksums")] + QuietNotCheck, + + // --length sanitization errors + #[error("--length required for {}", .0.quote())] + LengthRequired(String), + #[error("invalid length: {}", .0.quote())] + InvalidLength(String), + #[error("maximum digest length for {} is 512 bits", .0.quote())] + LengthTooBigForBlake(String), + #[error("length is not a multiple of 8")] + LengthNotMultipleOf8, + #[error("digest length for {} must be 224, 256, 384, or 512", .0.quote())] + InvalidLengthForSha(String), + #[error("--algorithm={0} requires specifying --length 224, 256, 384, or 512")] + LengthRequiredForSha(String), + #[error("--length is only supported with --algorithm blake2b, sha2, or sha3")] + LengthOnlyForBlake2bSha2Sha3, + + #[error("the --binary and --text options are meaningless when verifying checksums")] + BinaryTextConflict, + #[error("--text mode is only supported with --untagged")] + TextWithoutUntagged, + #[error("--check is not supported with --algorithm={{bsd,sysv,crc,crc32b}}")] + AlgorithmNotSupportedWithCheck, + #[error("You cannot combine multiple hash algorithms!")] + CombineMultipleAlgorithms, + #[error("Needs an algorithm to hash with.\nUse --help for more information.")] + NeedAlgorithmToHash, + #[error("unknown algorithm: {0}: clap should have prevented this case")] + UnknownAlgorithm(String), + #[error("")] + Io(#[from] io::Error), +} + +impl UError for ChecksumError { + fn code(&self) -> i32 { + 1 + } +} + +pub fn digest_reader( + digest: &mut Box, + reader: &mut T, + binary: bool, + output_bits: usize, +) -> io::Result<(String, usize)> { + digest.reset(); + + // Read bytes from `reader` and write those bytes to `digest`. + // + // If `binary` is `false` and the operating system is Windows, then + // `DigestWriter` replaces "\r\n" with "\n" before it writes the + // bytes into `digest`. Otherwise, it just inserts the bytes as-is. + // + // In order to support replacing "\r\n", we must call `finalize()` + // in order to support the possibility that the last character read + // from the reader was "\r". (This character gets buffered by + // `DigestWriter` and only written if the following character is + // "\n". But when "\r" is the last character read, we need to force + // it to be written.) + let mut digest_writer = DigestWriter::new(digest, binary); + let output_size = std::io::copy(reader, &mut digest_writer)? as usize; + digest_writer.finalize(); + + if digest.output_bits() > 0 { + Ok((digest.result_str(), output_size)) + } else { + // Assume it's SHAKE. result_str() doesn't work with shake (as of 8/30/2016) + let mut bytes = vec![0; output_bits.div_ceil(8)]; + digest.hash_finalize(&mut bytes); + Ok((hex::encode(bytes), output_size)) + } +} + +/// Calculates the length of the digest. +pub fn calculate_blake2b_length(bit_length: usize) -> UResult> { + calculate_blake2b_length_str(bit_length.to_string().as_str()) +} + +/// Calculates the length of the digest. +pub fn calculate_blake2b_length_str(bit_length: &str) -> UResult> { + // Blake2b's length is parsed in an u64. + match bit_length.parse::() { + Ok(0) => Ok(None), + + // Error cases + Ok(n) if n > 512 => { + show_error!("{}", ChecksumError::InvalidLength(bit_length.into())); + Err(ChecksumError::LengthTooBigForBlake("BLAKE2b".into()).into()) + } + Err(e) if *e.kind() == IntErrorKind::PosOverflow => { + show_error!("{}", ChecksumError::InvalidLength(bit_length.into())); + Err(ChecksumError::LengthTooBigForBlake("BLAKE2b".into()).into()) + } + Err(_) => Err(ChecksumError::InvalidLength(bit_length.into()).into()), + + Ok(n) if n % 8 != 0 => { + show_error!("{}", ChecksumError::InvalidLength(bit_length.into())); + Err(ChecksumError::LengthNotMultipleOf8.into()) + } + + // Valid cases + + // When length is 512, it is blake2b's default. So, don't show it + Ok(512) => Ok(None), + // Divide by 8, as our blake2b implementation expects bytes instead of bits. + Ok(n) => Ok(Some(n / 8)), + } +} + +pub fn validate_sha2_sha3_length(algo_name: AlgoKind, length: Option) -> UResult { + match length { + Some(224) => Ok(ShaLength::Len224), + Some(256) => Ok(ShaLength::Len256), + Some(384) => Ok(ShaLength::Len384), + Some(512) => Ok(ShaLength::Len512), + Some(len) => { + show_error!("{}", ChecksumError::InvalidLength(len.to_string())); + Err(ChecksumError::InvalidLengthForSha(algo_name.to_uppercase().into()).into()) + } + None => Err(ChecksumError::LengthRequiredForSha(algo_name.to_lowercase().into()).into()), + } +} + +pub fn sanitize_sha2_sha3_length_str(algo_kind: AlgoKind, length: &str) -> UResult { + // There is a difference in the errors sent when the length is not a number + // vs. its an invalid number. + // + // When inputting an invalid number, an extra error message it printed to + // remind of the accepted inputs. + let len = match length.parse::() { + Ok(l) => l, + // Note: Positive overflow while parsing counts as an invalid number, + // but a number still. + Err(e) if *e.kind() == IntErrorKind::PosOverflow => { + show_error!("{}", ChecksumError::InvalidLength(length.into())); + return Err(ChecksumError::InvalidLengthForSha(algo_kind.to_uppercase().into()).into()); + } + Err(_) => return Err(ChecksumError::InvalidLength(length.into()).into()), + }; + + if [224, 256, 384, 512].contains(&len) { + Ok(len) + } else { + show_error!("{}", ChecksumError::InvalidLength(length.into())); + Err(ChecksumError::InvalidLengthForSha(algo_kind.to_uppercase().into()).into()) + } +} + +pub fn unescape_filename(filename: &[u8]) -> (Vec, &'static str) { + let mut unescaped = Vec::with_capacity(filename.len()); + let mut byte_iter = filename.iter().peekable(); + loop { + let Some(byte) = byte_iter.next() else { + break; + }; + if *byte == b'\\' { + match byte_iter.next() { + Some(b'\\') => unescaped.push(b'\\'), + Some(b'n') => unescaped.push(b'\n'), + Some(b'r') => unescaped.push(b'\r'), + Some(x) => { + unescaped.push(b'\\'); + unescaped.push(*x); + } + _ => {} + } + } else { + unescaped.push(*byte); + } + } + let prefix = if unescaped == filename { "" } else { "\\" }; + (unescaped, prefix) +} + +pub fn escape_filename(filename: &OsStr) -> (String, &'static str) { + let original = filename.to_string_lossy(); + let escaped = original + .replace('\\', "\\\\") + .replace('\n', "\\n") + .replace('\r', "\\r"); + let prefix = if escaped == original { "" } else { "\\" }; + (escaped, prefix) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_unescape_filename() { + let (unescaped, prefix) = unescape_filename(b"test\\nfile.txt"); + assert_eq!(unescaped, b"test\nfile.txt"); + assert_eq!(prefix, "\\"); + let (unescaped, prefix) = unescape_filename(b"test\\nfile.txt"); + assert_eq!(unescaped, b"test\nfile.txt"); + assert_eq!(prefix, "\\"); + + let (unescaped, prefix) = unescape_filename(b"test\\rfile.txt"); + assert_eq!(unescaped, b"test\rfile.txt"); + assert_eq!(prefix, "\\"); + + let (unescaped, prefix) = unescape_filename(b"test\\\\file.txt"); + assert_eq!(unescaped, b"test\\file.txt"); + assert_eq!(prefix, "\\"); + } + + #[test] + fn test_escape_filename() { + let (escaped, prefix) = escape_filename(OsStr::new("testfile.txt")); + assert_eq!(escaped, "testfile.txt"); + assert_eq!(prefix, ""); + + let (escaped, prefix) = escape_filename(OsStr::new("test\nfile.txt")); + assert_eq!(escaped, "test\\nfile.txt"); + assert_eq!(prefix, "\\"); + + let (escaped, prefix) = escape_filename(OsStr::new("test\rfile.txt")); + assert_eq!(escaped, "test\\rfile.txt"); + assert_eq!(prefix, "\\"); + + let (escaped, prefix) = escape_filename(OsStr::new("test\\file.txt")); + assert_eq!(escaped, "test\\\\file.txt"); + assert_eq!(prefix, "\\"); + } + + #[test] + fn test_calculate_blake2b_length() { + assert_eq!(calculate_blake2b_length(0).unwrap(), None); + assert!(calculate_blake2b_length(10).is_err()); + assert!(calculate_blake2b_length(520).is_err()); + assert_eq!(calculate_blake2b_length(512).unwrap(), None); + assert_eq!(calculate_blake2b_length(256).unwrap(), Some(32)); + } +} diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum/validate.rs similarity index 64% rename from src/uucore/src/lib/features/checksum.rs rename to src/uucore/src/lib/features/checksum/validate.rs index 324dba7b3f8..b68032925ec 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum/validate.rs @@ -2,85 +2,72 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore anotherfile invalidchecksum JWZG FFFD xffname prefixfilename bytelen bitlen hexdigit rsplit + +// spell-checker:ignore rsplit hexdigit bitlen bytelen invalidchecksum xffname + +use std::borrow::Cow; +use std::ffi::OsStr; +use std::fmt::Display; +use std::fs::File; +use std::io::{self, BufReader, Read, Write, stdin}; use data_encoding::BASE64; use os_display::Quotable; -use std::{ - borrow::Cow, - ffi::OsStr, - fmt::Display, - fs::File, - io::{self, BufReader, Read, Write, stdin}, - num::IntErrorKind, - path::Path, - str, -}; +use crate::checksum::{AlgoKind, ChecksumError, SizedAlgoKind, digest_reader, unescape_filename}; +use crate::error::{FromIo, UError, UResult, USimpleError}; +use crate::quoting_style::{QuotingStyle, locale_aware_escape_name}; use crate::{ - error::{FromIo, UError, UResult, USimpleError}, - os_str_as_bytes, os_str_from_bytes, - quoting_style::{QuotingStyle, locale_aware_escape_name}, - read_os_string_lines, show, show_error, show_warning_caps, - sum::{ - Blake2b, Blake3, Bsd, CRC32B, Crc, Digest, DigestWriter, Md5, Sha1, Sha3_224, Sha3_256, - Sha3_384, Sha3_512, Sha224, Sha256, Sha384, Sha512, Shake128, Shake256, Sm3, SysV, - }, + os_str_as_bytes, os_str_from_bytes, read_os_string_lines, show, show_error, show_warning_caps, util_name, }; -use thiserror::Error; - -pub const ALGORITHM_OPTIONS_SYSV: &str = "sysv"; -pub const ALGORITHM_OPTIONS_BSD: &str = "bsd"; -pub const ALGORITHM_OPTIONS_CRC: &str = "crc"; -pub const ALGORITHM_OPTIONS_CRC32B: &str = "crc32b"; -pub const ALGORITHM_OPTIONS_MD5: &str = "md5"; -pub const ALGORITHM_OPTIONS_SHA1: &str = "sha1"; -pub const ALGORITHM_OPTIONS_SHA2: &str = "sha2"; -pub const ALGORITHM_OPTIONS_SHA3: &str = "sha3"; - -pub const ALGORITHM_OPTIONS_SHA224: &str = "sha224"; -pub const ALGORITHM_OPTIONS_SHA256: &str = "sha256"; -pub const ALGORITHM_OPTIONS_SHA384: &str = "sha384"; -pub const ALGORITHM_OPTIONS_SHA512: &str = "sha512"; -pub const ALGORITHM_OPTIONS_BLAKE2B: &str = "blake2b"; -pub const ALGORITHM_OPTIONS_BLAKE3: &str = "blake3"; -pub const ALGORITHM_OPTIONS_SM3: &str = "sm3"; -pub const ALGORITHM_OPTIONS_SHAKE128: &str = "shake128"; -pub const ALGORITHM_OPTIONS_SHAKE256: &str = "shake256"; - -pub const SUPPORTED_ALGORITHMS: [&str; 17] = [ - ALGORITHM_OPTIONS_SYSV, - ALGORITHM_OPTIONS_BSD, - ALGORITHM_OPTIONS_CRC, - ALGORITHM_OPTIONS_CRC32B, - ALGORITHM_OPTIONS_MD5, - ALGORITHM_OPTIONS_SHA1, - ALGORITHM_OPTIONS_SHA2, - ALGORITHM_OPTIONS_SHA3, - ALGORITHM_OPTIONS_BLAKE2B, - ALGORITHM_OPTIONS_SM3, - // Extra algorithms that are not valid `cksum --algorithm` - ALGORITHM_OPTIONS_SHA224, - ALGORITHM_OPTIONS_SHA256, - ALGORITHM_OPTIONS_SHA384, - ALGORITHM_OPTIONS_SHA512, - ALGORITHM_OPTIONS_BLAKE3, - ALGORITHM_OPTIONS_SHAKE128, - ALGORITHM_OPTIONS_SHAKE256, -]; - -pub const LEGACY_ALGORITHMS: [&str; 4] = [ - ALGORITHM_OPTIONS_SYSV, - ALGORITHM_OPTIONS_BSD, - ALGORITHM_OPTIONS_CRC, - ALGORITHM_OPTIONS_CRC32B, -]; - -pub struct HashAlgorithm { - pub name: &'static str, - pub create_fn: Box Box>, - pub bits: usize, + +/// To what level should checksum validation print logging info. +#[derive(Debug, PartialEq, Eq, PartialOrd, Clone, Copy, Default)] +pub enum ChecksumVerbose { + Status, + Quiet, + #[default] + Normal, + Warning, +} + +impl ChecksumVerbose { + pub fn new(status: bool, quiet: bool, warn: bool) -> Self { + use ChecksumVerbose::*; + + // Assume only one of the three booleans will be enabled at once. + // This is ensured by clap's overriding arguments. + match (status, quiet, warn) { + (true, _, _) => Status, + (_, true, _) => Quiet, + (_, _, true) => Warning, + _ => Normal, + } + } + + #[inline] + pub fn over_status(self) -> bool { + self > Self::Status + } + + #[inline] + pub fn over_quiet(self) -> bool { + self > Self::Quiet + } + + #[inline] + pub fn at_least_warning(self) -> bool { + self >= Self::Warning + } +} + +/// This struct regroups CLI flags. +#[derive(Debug, Default, Clone, Copy)] +pub struct ChecksumValidateOptions { + pub ignore_missing: bool, + pub strict: bool, + pub verbose: ChecksumVerbose, } /// This structure holds the count of checksum test lines' outcomes. @@ -161,156 +148,6 @@ impl From for FileCheckError { } } -#[derive(Debug, PartialEq, Eq, PartialOrd, Clone, Copy, Default)] -pub enum ChecksumVerbose { - Status, - Quiet, - #[default] - Normal, - Warning, -} - -impl ChecksumVerbose { - pub fn new(status: bool, quiet: bool, warn: bool) -> Self { - use ChecksumVerbose::*; - - // Assume only one of the three booleans will be enabled at once. - // This is ensured by clap's overriding arguments. - match (status, quiet, warn) { - (true, _, _) => Status, - (_, true, _) => Quiet, - (_, _, true) => Warning, - _ => Normal, - } - } - - #[inline] - pub fn over_status(self) -> bool { - self > Self::Status - } - - #[inline] - pub fn over_quiet(self) -> bool { - self > Self::Quiet - } - - #[inline] - pub fn at_least_warning(self) -> bool { - self >= Self::Warning - } -} - -/// This struct regroups CLI flags. -#[derive(Debug, Default, Clone, Copy)] -pub struct ChecksumOptions { - pub binary: bool, - pub ignore_missing: bool, - pub strict: bool, - pub verbose: ChecksumVerbose, -} - -#[derive(Debug, Error)] -pub enum ChecksumError { - #[error("the --raw option is not supported with multiple files")] - RawMultipleFiles, - #[error("the --ignore-missing option is meaningful only when verifying checksums")] - IgnoreNotCheck, - #[error("the --strict option is meaningful only when verifying checksums")] - StrictNotCheck, - #[error("the --quiet option is meaningful only when verifying checksums")] - QuietNotCheck, - #[error("--length required for {}", .0.quote())] - LengthRequired(String), - #[error("invalid length: {}", .0.quote())] - InvalidLength(String), - #[error("digest length for {} must be 224, 256, 384, or 512", .0.quote())] - InvalidLengthForSha(String), - #[error("--algorithm={0} requires specifying --length 224, 256, 384, or 512")] - LengthRequiredForSha(String), - #[error("--length is only supported with --algorithm blake2b, sha2, or sha3")] - LengthOnlyForBlake2bSha2Sha3, - #[error("the --binary and --text options are meaningless when verifying checksums")] - BinaryTextConflict, - #[error("--text mode is only supported with --untagged")] - TextWithoutUntagged, - #[error("--check is not supported with --algorithm={{bsd,sysv,crc,crc32b}}")] - AlgorithmNotSupportedWithCheck, - #[error("You cannot combine multiple hash algorithms!")] - CombineMultipleAlgorithms, - #[error("Needs an algorithm to hash with.\nUse --help for more information.")] - NeedAlgorithmToHash, - #[error("unknown algorithm: {0}: clap should have prevented this case")] - UnknownAlgorithm(String), - #[error("")] - Io(#[from] io::Error), -} - -impl UError for ChecksumError { - fn code(&self) -> i32 { - 1 - } -} - -/// Creates a SHA3 hasher instance based on the specified bits argument. -/// -/// # Returns -/// -/// Returns a `UResult` with an `HashAlgorithm` or an `Err` if an unsupported -/// output size is provided. -pub fn create_sha3(bits: usize) -> UResult { - match bits { - 224 => Ok(HashAlgorithm { - name: "SHA3-224", - create_fn: Box::new(|| Box::new(Sha3_224::new())), - bits: 224, - }), - 256 => Ok(HashAlgorithm { - name: "SHA3-256", - create_fn: Box::new(|| Box::new(Sha3_256::new())), - bits: 256, - }), - 384 => Ok(HashAlgorithm { - name: "SHA3-384", - create_fn: Box::new(|| Box::new(Sha3_384::new())), - bits: 384, - }), - 512 => Ok(HashAlgorithm { - name: "SHA3-512", - create_fn: Box::new(|| Box::new(Sha3_512::new())), - bits: 512, - }), - - _ => Err(ChecksumError::InvalidLengthForSha("SHA3".into()).into()), - } -} - -pub fn create_sha2(bits: usize) -> UResult { - match bits { - 224 => Ok(HashAlgorithm { - name: "SHA224", - create_fn: Box::new(|| Box::new(Sha224::new())), - bits: 224, - }), - 256 => Ok(HashAlgorithm { - name: "SHA256", - create_fn: Box::new(|| Box::new(Sha256::new())), - bits: 256, - }), - 384 => Ok(HashAlgorithm { - name: "SHA384", - create_fn: Box::new(|| Box::new(Sha384::new())), - bits: 384, - }), - 512 => Ok(HashAlgorithm { - name: "SHA512", - create_fn: Box::new(|| Box::new(Sha512::new())), - bits: 512, - }), - - _ => Err(ChecksumError::InvalidLengthForSha("SHA2".into()).into()), - } -} - #[allow(clippy::comparison_chain)] fn print_cksum_report(res: &ChecksumResult) { if res.bad_format == 1 { @@ -395,104 +232,6 @@ fn print_file_report( } } -pub fn detect_algo(algo: &str, length: Option) -> UResult { - match algo { - ALGORITHM_OPTIONS_SYSV => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_SYSV, - create_fn: Box::new(|| Box::new(SysV::new())), - bits: 512, - }), - ALGORITHM_OPTIONS_BSD => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_BSD, - create_fn: Box::new(|| Box::new(Bsd::new())), - bits: 1024, - }), - ALGORITHM_OPTIONS_CRC => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_CRC, - create_fn: Box::new(|| Box::new(Crc::new())), - bits: 256, - }), - ALGORITHM_OPTIONS_CRC32B => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_CRC32B, - create_fn: Box::new(|| Box::new(CRC32B::new())), - bits: 32, - }), - ALGORITHM_OPTIONS_MD5 | "md5sum" => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_MD5, - create_fn: Box::new(|| Box::new(Md5::new())), - bits: 128, - }), - ALGORITHM_OPTIONS_SHA1 | "sha1sum" => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_SHA1, - create_fn: Box::new(|| Box::new(Sha1::new())), - bits: 160, - }), - ALGORITHM_OPTIONS_SHA224 | "sha224sum" => Ok(create_sha2(224)?), - ALGORITHM_OPTIONS_SHA256 | "sha256sum" => Ok(create_sha2(256)?), - ALGORITHM_OPTIONS_SHA384 | "sha384sum" => Ok(create_sha2(384)?), - ALGORITHM_OPTIONS_SHA512 | "sha512sum" => Ok(create_sha2(512)?), - ALGORITHM_OPTIONS_BLAKE2B | "b2sum" => { - // Set default length to 512 if None - let bits = length.unwrap_or(512); - if bits == 512 { - Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_BLAKE2B, - create_fn: Box::new(move || Box::new(Blake2b::new())), - bits: 512, - }) - } else { - Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_BLAKE2B, - create_fn: Box::new(move || Box::new(Blake2b::with_output_bytes(bits))), - bits, - }) - } - } - ALGORITHM_OPTIONS_BLAKE3 | "b3sum" => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_BLAKE3, - create_fn: Box::new(|| Box::new(Blake3::new())), - bits: 256, - }), - ALGORITHM_OPTIONS_SM3 => Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_SM3, - create_fn: Box::new(|| Box::new(Sm3::new())), - bits: 512, - }), - algo @ (ALGORITHM_OPTIONS_SHAKE128 | "shake128sum") => { - let bits = length.ok_or(ChecksumError::LengthRequired(algo.to_ascii_uppercase()))?; - Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_SHAKE128, - create_fn: Box::new(|| Box::new(Shake128::new())), - bits, - }) - } - algo @ (ALGORITHM_OPTIONS_SHAKE256 | "shake256sum") => { - let bits = length.ok_or(ChecksumError::LengthRequired(algo.to_ascii_uppercase()))?; - Ok(HashAlgorithm { - name: ALGORITHM_OPTIONS_SHAKE256, - create_fn: Box::new(|| Box::new(Shake256::new())), - bits, - }) - } - algo @ ALGORITHM_OPTIONS_SHA2 => { - let bits = validate_sha2_sha3_length(algo, length)?; - create_sha2(bits) - } - algo @ ALGORITHM_OPTIONS_SHA3 => { - let bits = validate_sha2_sha3_length(algo, length)?; - create_sha3(bits) - } - - // TODO: `hashsum` specific, to remove once hashsum is removed. - algo @ ("sha3-224" | "sha3-256" | "sha3-384" | "sha3-512") => { - let bits: usize = algo.strip_prefix("sha3-").unwrap().parse().unwrap(); - create_sha3(bits) - } - - algo => Err(ChecksumError::UnknownAlgorithm(algo.into()).into()), - } -} - #[derive(Debug, PartialEq, Eq, Clone, Copy)] enum LineFormat { AlgoBased, @@ -758,16 +497,16 @@ fn get_expected_digest_as_hex_string( /// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-". fn get_file_to_check( filename: &OsStr, - opts: ChecksumOptions, + opts: ChecksumValidateOptions, ) -> Result, LineCheckError> { - let filename_bytes = os_str_as_bytes(filename).expect("UTF-8 error"); + let filename_bytes = os_str_as_bytes(filename).map_err(|e| LineCheckError::UError(e.into()))?; if filename == "-" { - Ok(Box::new(stdin())) // Use stdin if "-" is specified in the checksum file + Ok(Box::new(io::stdin())) // Use stdin if "-" is specified in the checksum file } else { let failed_open = || { print_file_report( - std::io::stdout(), + io::stdout(), filename_bytes, FileChecksumResult::CantOpen, "", @@ -836,11 +575,14 @@ fn get_input_file(filename: &OsStr) -> UResult> { /// Gets the algorithm name and length from the `LineInfo` if the algo-based format is matched. fn identify_algo_name_and_length( line_info: &LineInfo, - algo_name_input: Option<&str>, + algo_name_input: Option, last_algo: &mut Option, -) -> Result<(String, Option), LineCheckError> { +) -> Result<(AlgoKind, Option), LineCheckError> { let algo_from_line = line_info.algo_name.clone().unwrap_or_default(); - let line_algo = algo_from_line.to_lowercase(); + let Ok(line_algo) = AlgoKind::from_cksum(algo_from_line.to_lowercase()) else { + // Unknown algorithm + return Err(LineCheckError::ImproperlyFormatted); + }; *last_algo = Some(algo_from_line); // check if we are called with XXXsum (example: md5sum) but we detected a @@ -848,31 +590,21 @@ fn identify_algo_name_and_length( // // Also handle the case cksum -s sm3 but the file contains other formats if let Some(algo_name_input) = algo_name_input { - match (algo_name_input, line_algo.as_str()) { + match (algo_name_input, line_algo) { (l, r) if l == r => (), // Edge case for SHA2, which matches SHA(224|256|384|512) ( - ALGORITHM_OPTIONS_SHA2, - ALGORITHM_OPTIONS_SHA224 - | ALGORITHM_OPTIONS_SHA256 - | ALGORITHM_OPTIONS_SHA384 - | ALGORITHM_OPTIONS_SHA512, + AlgoKind::Sha2, + AlgoKind::Sha224 | AlgoKind::Sha256 | AlgoKind::Sha384 | AlgoKind::Sha512, ) => (), _ => return Err(LineCheckError::ImproperlyFormatted), } } - if !SUPPORTED_ALGORITHMS.contains(&line_algo.as_str()) { - // Not supported algo, leave early - return Err(LineCheckError::ImproperlyFormatted); - } - let bytes = if let Some(bitlen) = line_info.algo_bit_len { - match line_algo.as_str() { - ALGORITHM_OPTIONS_BLAKE2B if bitlen % 8 == 0 => Some(bitlen / 8), - ALGORITHM_OPTIONS_SHA2 | ALGORITHM_OPTIONS_SHA3 - if [224, 256, 384, 512].contains(&bitlen) => - { + match line_algo { + AlgoKind::Blake2b if bitlen % 8 == 0 => Some(bitlen / 8), + AlgoKind::Sha2 | AlgoKind::Sha3 if [224, 256, 384, 512].contains(&bitlen) => { Some(bitlen) } // Either @@ -885,7 +617,7 @@ fn identify_algo_name_and_length( // the given length is wrong because it's not a multiple of 8. _ => return Err(LineCheckError::ImproperlyFormatted), } - } else if line_algo == ALGORITHM_OPTIONS_BLAKE2B { + } else if line_algo == AlgoKind::Blake2b { // Default length with BLAKE2b, Some(64) } else { @@ -900,8 +632,8 @@ fn identify_algo_name_and_length( fn compute_and_check_digest_from_file( filename: &[u8], expected_checksum: &str, - mut algo: HashAlgorithm, - opts: ChecksumOptions, + algo: SizedAlgoKind, + opts: ChecksumValidateOptions, ) -> Result<(), LineCheckError> { let (filename_to_check_unescaped, prefix) = unescape_filename(filename); let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?; @@ -911,10 +643,17 @@ fn compute_and_check_digest_from_file( let mut file_reader = BufReader::new(file_to_check); // Read the file and calculate the checksum - let create_fn = &mut algo.create_fn; - let mut digest = create_fn(); - let (calculated_checksum, _) = - digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap(); + let mut digest = algo.create_digest(); + + // TODO: improve function signature to use ReadingMode instead of binary bool + // Set binary to false because --binary is not supported with --check + let (calculated_checksum, _) = digest_reader( + &mut digest, + &mut file_reader, + /* binary */ false, + algo.bitlen(), + ) + .unwrap(); // Do the checksum validation let checksum_correct = expected_checksum == calculated_checksum; @@ -936,26 +675,26 @@ fn compute_and_check_digest_from_file( /// Check a digest checksum with non-algo based pre-treatment. fn process_algo_based_line( line_info: &LineInfo, - cli_algo_name: Option<&str>, - opts: ChecksumOptions, + cli_algo_kind: Option, + opts: ChecksumValidateOptions, last_algo: &mut Option, ) -> Result<(), LineCheckError> { let filename_to_check = line_info.filename.as_slice(); - let (algo_name, algo_byte_len) = - identify_algo_name_and_length(line_info, cli_algo_name, last_algo)?; + let (algo_kind, algo_byte_len) = + identify_algo_name_and_length(line_info, cli_algo_kind, last_algo)?; // If the digest bitlen is known, we can check the format of the expected // checksum with it. - let digest_char_length_hint = match (algo_name.as_str(), algo_byte_len) { - (ALGORITHM_OPTIONS_BLAKE2B, Some(bytelen)) => Some(bytelen * 2), + let digest_char_length_hint = match (algo_kind, algo_byte_len) { + (AlgoKind::Blake2b, Some(bytelen)) => Some(bytelen * 2), _ => None, }; let expected_checksum = get_expected_digest_as_hex_string(line_info, digest_char_length_hint) .ok_or(LineCheckError::ImproperlyFormatted)?; - let algo = detect_algo(&algo_name, algo_byte_len)?; + let algo = SizedAlgoKind::from_unsized(algo_kind, algo_byte_len)?; compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts) } @@ -964,9 +703,9 @@ fn process_algo_based_line( fn process_non_algo_based_line( line_number: usize, line_info: &LineInfo, - cli_algo_name: &str, + cli_algo_kind: AlgoKind, cli_algo_length: Option, - opts: ChecksumOptions, + opts: ChecksumValidateOptions, ) -> Result<(), LineCheckError> { let mut filename_to_check = line_info.filename.as_slice(); if filename_to_check.starts_with(b"*") @@ -982,24 +721,21 @@ fn process_non_algo_based_line( // When a specific algorithm name is input, use it and use the provided // bits except when dealing with blake2b, sha2 and sha3, where we will // detect the length. - let (algo_name, algo_byte_len) = match cli_algo_name { - ALGORITHM_OPTIONS_BLAKE2B => { + let (algo_kind, algo_byte_len) = match cli_algo_kind { + AlgoKind::Blake2b => { // division by 2 converts the length of the Blake2b checksum from // hexadecimal characters to bytes, as each byte is represented by // two hexadecimal characters. - ( - ALGORITHM_OPTIONS_BLAKE2B.to_string(), - Some(expected_checksum.len() / 2), - ) + (AlgoKind::Blake2b, Some(expected_checksum.len() / 2)) } - algo @ (ALGORITHM_OPTIONS_SHA2 | ALGORITHM_OPTIONS_SHA3) => { + algo @ (AlgoKind::Sha2 | AlgoKind::Sha3) => { // multiplication by 4 to get the number of bits - (algo.to_string(), Some(expected_checksum.len() * 4)) + (algo, Some(expected_checksum.len() * 4)) } - _ => (cli_algo_name.to_lowercase(), cli_algo_length), + _ => (cli_algo_kind, cli_algo_length), }; - let algo = detect_algo(&algo_name, algo_byte_len)?; + let algo = SizedAlgoKind::from_unsized(algo_kind, algo_byte_len)?; compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts) } @@ -1013,9 +749,9 @@ fn process_non_algo_based_line( fn process_checksum_line( line: &OsStr, i: usize, - cli_algo_name: Option<&str>, + cli_algo_name: Option, cli_algo_length: Option, - opts: ChecksumOptions, + opts: ChecksumValidateOptions, cached_line_format: &mut Option, last_algo: &mut Option, ) -> Result<(), LineCheckError> { @@ -1046,9 +782,9 @@ fn process_checksum_line( fn process_checksum_file( filename_input: &OsStr, - cli_algo_name: Option<&str>, + cli_algo_kind: Option, cli_algo_length: Option, - opts: ChecksumOptions, + opts: ChecksumValidateOptions, ) -> Result<(), FileCheckError> { let mut res = ChecksumResult::default(); @@ -1083,7 +819,7 @@ fn process_checksum_file( let line_result = process_checksum_line( line, i, - cli_algo_name, + cli_algo_kind, cli_algo_length, opts, &mut cached_line_format, @@ -1107,12 +843,12 @@ fn process_checksum_file( res.bad_format += 1; if opts.verbose.at_least_warning() { - let algo = if let Some(algo_name_input) = cli_algo_name { - Cow::Owned(algo_name_input.to_uppercase()) + let algo = if let Some(algo_name_input) = cli_algo_kind { + algo_name_input.to_uppercase() } else if let Some(algo) = &last_algo { - Cow::Borrowed(algo.as_str()) + algo.as_str() } else { - Cow::Borrowed("Unknown algorithm") + "Unknown algorithm" }; eprintln!( "{}: {}: {}: improperly formatted {algo} checksum line", @@ -1176,9 +912,9 @@ fn process_checksum_file( /// Do the checksum validation (can be strict or not) pub fn perform_checksum_validation<'a, I>( files: I, - algo_name_input: Option<&str>, + algo_kind: Option, length_input: Option, - opts: ChecksumOptions, + opts: ChecksumValidateOptions, ) -> UResult<()> where I: Iterator, @@ -1188,7 +924,7 @@ where // if cksum has several input files, it will print the result for each file for filename_input in files { use FileCheckError::*; - match process_checksum_file(filename_input, algo_name_input, length_input, opts) { + match process_checksum_file(filename_input, algo_kind, length_input, opts) { Err(UError(e)) => return Err(e), Err(Failed | CantOpenChecksumFile) => failed = true, Ok(_) => (), @@ -1202,293 +938,11 @@ where } } -pub fn digest_reader( - digest: &mut Box, - reader: &mut T, - binary: bool, - output_bits: usize, -) -> io::Result<(String, usize)> { - digest.reset(); - - // Read bytes from `reader` and write those bytes to `digest`. - // - // If `binary` is `false` and the operating system is Windows, then - // `DigestWriter` replaces "\r\n" with "\n" before it writes the - // bytes into `digest`. Otherwise, it just inserts the bytes as-is. - // - // In order to support replacing "\r\n", we must call `finalize()` - // in order to support the possibility that the last character read - // from the reader was "\r". (This character gets buffered by - // `DigestWriter` and only written if the following character is - // "\n". But when "\r" is the last character read, we need to force - // it to be written.) - let mut digest_writer = DigestWriter::new(digest, binary); - let output_size = std::io::copy(reader, &mut digest_writer)? as usize; - digest_writer.finalize(); - - if digest.output_bits() > 0 { - Ok((digest.result_str(), output_size)) - } else { - // Assume it's SHAKE. result_str() doesn't work with shake (as of 8/30/2016) - let mut bytes = vec![0; output_bits.div_ceil(8)]; - digest.hash_finalize(&mut bytes); - Ok((hex::encode(bytes), output_size)) - } -} - -/// Calculates the length of the digest. -pub fn calculate_blake2b_length(length: usize) -> UResult> { - calculate_blake2b_length_str(length.to_string().as_str()) -} - -/// Calculates the length of the digest. -pub fn calculate_blake2b_length_str(length: &str) -> UResult> { - match length.parse() { - Ok(0) => Ok(None), - Ok(n) if n % 8 != 0 => { - show_error!("{}", ChecksumError::InvalidLength(length.into())); - Err(io::Error::new(io::ErrorKind::InvalidInput, "length is not a multiple of 8").into()) - } - Ok(n) if n > 512 => { - show_error!("{}", ChecksumError::InvalidLength(length.into())); - Err(io::Error::new( - io::ErrorKind::InvalidInput, - format!( - "maximum digest length for {} is 512 bits", - "BLAKE2b".quote() - ), - ) - .into()) - } - Ok(n) => { - // Divide by 8, as our blake2b implementation expects bytes instead of bits. - if n == 512 { - // When length is 512, it is blake2b's default. - // So, don't show it - Ok(None) - } else { - Ok(Some(n / 8)) - } - } - Err(_) => Err(ChecksumError::InvalidLength(length.into()).into()), - } -} - -pub fn validate_sha2_sha3_length(algo_name: &str, length: Option) -> UResult { - match length { - Some(len @ (224 | 256 | 384 | 512)) => Ok(len), - Some(len) => { - show_error!("{}", ChecksumError::InvalidLength(len.to_string())); - Err(ChecksumError::InvalidLengthForSha(algo_name.to_ascii_uppercase()).into()) - } - None => Err(ChecksumError::LengthRequiredForSha(algo_name.into()).into()), - } -} - -pub fn sanitize_sha2_sha3_length_str(algo_name: &str, length: &str) -> UResult { - // There is a difference in the errors sent when the length is not a number - // vs. its an invalid number. - // - // When inputting an invalid number, an extra error message it printed to - // remind of the accepted inputs. - let len = match length.parse::() { - Ok(l) => l, - // Note: Positive overflow while parsing counts as an invalid number, - // but a number still. - Err(e) if *e.kind() == IntErrorKind::PosOverflow => { - show_error!("{}", ChecksumError::InvalidLength(length.into())); - return Err(ChecksumError::InvalidLengthForSha(algo_name.to_ascii_uppercase()).into()); - } - Err(_) => return Err(ChecksumError::InvalidLength(length.into()).into()), - }; - - if [224, 256, 384, 512].contains(&len) { - Ok(len) - } else { - show_error!("{}", ChecksumError::InvalidLength(length.into())); - Err(ChecksumError::InvalidLengthForSha(algo_name.to_ascii_uppercase()).into()) - } -} - -pub fn unescape_filename(filename: &[u8]) -> (Vec, &'static str) { - let mut unescaped = Vec::with_capacity(filename.len()); - let mut byte_iter = filename.iter().peekable(); - loop { - let Some(byte) = byte_iter.next() else { - break; - }; - if *byte == b'\\' { - match byte_iter.next() { - Some(b'\\') => unescaped.push(b'\\'), - Some(b'n') => unescaped.push(b'\n'), - Some(b'r') => unescaped.push(b'\r'), - Some(x) => { - unescaped.push(b'\\'); - unescaped.push(*x); - } - _ => {} - } - } else { - unescaped.push(*byte); - } - } - let prefix = if unescaped == filename { "" } else { "\\" }; - (unescaped, prefix) -} - -pub fn escape_filename(filename: &Path) -> (String, &'static str) { - let original = filename.as_os_str().to_string_lossy(); - let escaped = original - .replace('\\', "\\\\") - .replace('\n', "\\n") - .replace('\r', "\\r"); - let prefix = if escaped == original { "" } else { "\\" }; - (escaped, prefix) -} - #[cfg(test)] mod tests { - use super::*; use std::ffi::OsString; - #[test] - fn test_unescape_filename() { - let (unescaped, prefix) = unescape_filename(b"test\\nfile.txt"); - assert_eq!(unescaped, b"test\nfile.txt"); - assert_eq!(prefix, "\\"); - let (unescaped, prefix) = unescape_filename(b"test\\nfile.txt"); - assert_eq!(unescaped, b"test\nfile.txt"); - assert_eq!(prefix, "\\"); - - let (unescaped, prefix) = unescape_filename(b"test\\rfile.txt"); - assert_eq!(unescaped, b"test\rfile.txt"); - assert_eq!(prefix, "\\"); - - let (unescaped, prefix) = unescape_filename(b"test\\\\file.txt"); - assert_eq!(unescaped, b"test\\file.txt"); - assert_eq!(prefix, "\\"); - } - - #[test] - fn test_escape_filename() { - let (escaped, prefix) = escape_filename(Path::new("testfile.txt")); - assert_eq!(escaped, "testfile.txt"); - assert_eq!(prefix, ""); - - let (escaped, prefix) = escape_filename(Path::new("test\nfile.txt")); - assert_eq!(escaped, "test\\nfile.txt"); - assert_eq!(prefix, "\\"); - - let (escaped, prefix) = escape_filename(Path::new("test\rfile.txt")); - assert_eq!(escaped, "test\\rfile.txt"); - assert_eq!(prefix, "\\"); - - let (escaped, prefix) = escape_filename(Path::new("test\\file.txt")); - assert_eq!(escaped, "test\\\\file.txt"); - assert_eq!(prefix, "\\"); - } - - #[test] - fn test_calculate_blake2b_length() { - assert_eq!(calculate_blake2b_length(0).unwrap(), None); - assert!(calculate_blake2b_length(10).is_err()); - assert!(calculate_blake2b_length(520).is_err()); - assert_eq!(calculate_blake2b_length(512).unwrap(), None); - assert_eq!(calculate_blake2b_length(256).unwrap(), Some(32)); - } - - #[test] - fn test_detect_algo() { - assert_eq!( - detect_algo(ALGORITHM_OPTIONS_SYSV, None).unwrap().name, - ALGORITHM_OPTIONS_SYSV - ); - assert_eq!( - detect_algo(ALGORITHM_OPTIONS_BSD, None).unwrap().name, - ALGORITHM_OPTIONS_BSD - ); - assert_eq!( - detect_algo(ALGORITHM_OPTIONS_CRC, None).unwrap().name, - ALGORITHM_OPTIONS_CRC - ); - assert_eq!( - detect_algo(ALGORITHM_OPTIONS_MD5, None).unwrap().name, - ALGORITHM_OPTIONS_MD5 - ); - assert_eq!( - detect_algo(ALGORITHM_OPTIONS_SHA1, None).unwrap().name, - ALGORITHM_OPTIONS_SHA1 - ); - assert_eq!( - detect_algo(ALGORITHM_OPTIONS_SHA224, None).unwrap().name, - ALGORITHM_OPTIONS_SHA224.to_ascii_uppercase() - ); - assert_eq!( - detect_algo(ALGORITHM_OPTIONS_SHA256, None).unwrap().name, - ALGORITHM_OPTIONS_SHA256.to_ascii_uppercase() - ); - assert_eq!( - detect_algo(ALGORITHM_OPTIONS_SHA384, None).unwrap().name, - ALGORITHM_OPTIONS_SHA384.to_ascii_uppercase() - ); - assert_eq!( - detect_algo(ALGORITHM_OPTIONS_SHA512, None).unwrap().name, - ALGORITHM_OPTIONS_SHA512.to_ascii_uppercase() - ); - assert_eq!( - detect_algo(ALGORITHM_OPTIONS_BLAKE2B, None).unwrap().name, - ALGORITHM_OPTIONS_BLAKE2B - ); - assert_eq!( - detect_algo(ALGORITHM_OPTIONS_BLAKE3, None).unwrap().name, - ALGORITHM_OPTIONS_BLAKE3 - ); - assert_eq!( - detect_algo(ALGORITHM_OPTIONS_SM3, None).unwrap().name, - ALGORITHM_OPTIONS_SM3 - ); - assert_eq!( - detect_algo(ALGORITHM_OPTIONS_SHAKE128, Some(128)) - .unwrap() - .name, - ALGORITHM_OPTIONS_SHAKE128 - ); - assert_eq!( - detect_algo(ALGORITHM_OPTIONS_SHAKE256, Some(256)) - .unwrap() - .name, - ALGORITHM_OPTIONS_SHAKE256 - ); - - // Older versions of checksum used to detect the "sha3" prefix, but not - // anymore. - assert!(detect_algo("sha3_224", Some(224)).is_err()); - assert!(detect_algo("sha3_256", Some(256)).is_err()); - assert!(detect_algo("sha3_384", Some(384)).is_err()); - assert!(detect_algo("sha3_512", Some(512)).is_err()); - - let sha3_224 = detect_algo("sha3", Some(224)).unwrap(); - assert_eq!(sha3_224.name, "SHA3-224"); - assert_eq!(sha3_224.bits, 224); - let sha3_256 = detect_algo("sha3", Some(256)).unwrap(); - assert_eq!(sha3_256.name, "SHA3-256"); - assert_eq!(sha3_256.bits, 256); - let sha3_384 = detect_algo("sha3", Some(384)).unwrap(); - assert_eq!(sha3_384.name, "SHA3-384"); - assert_eq!(sha3_384.bits, 384); - let sha3_512 = detect_algo("sha3", Some(512)).unwrap(); - assert_eq!(sha3_512.name, "SHA3-512"); - assert_eq!(sha3_512.bits, 512); - - assert!(detect_algo("sha3", None).is_err()); - - assert_eq!(detect_algo("sha2", Some(224)).unwrap().name, "SHA224"); - assert_eq!(detect_algo("sha2", Some(256)).unwrap().name, "SHA256"); - assert_eq!(detect_algo("sha2", Some(384)).unwrap().name, "SHA384"); - assert_eq!(detect_algo("sha2", Some(512)).unwrap().name, "SHA512"); - - assert!(detect_algo("sha2", None).is_err()); - } + use super::*; #[test] fn test_algo_based_parser() { @@ -1731,7 +1185,7 @@ mod tests { #[test] fn test_print_file_report() { - let opts = ChecksumOptions::default(); + let opts = ChecksumValidateOptions::default(); let cases: &[(&[u8], FileChecksumResult, &str, &[u8])] = &[ (b"filename", FileChecksumResult::Ok, "", b"filename: OK\n"), diff --git a/tests/by-util/test_cksum.rs b/tests/by-util/test_cksum.rs index d966e4b1fe0..4b9459ef5c8 100644 --- a/tests/by-util/test_cksum.rs +++ b/tests/by-util/test_cksum.rs @@ -774,14 +774,31 @@ fn test_blake2b_length() { #[test] fn test_blake2b_length_greater_than_512() { - new_ucmd!() - .arg("--length=1024") - .arg("--algorithm=blake2b") - .arg("lorem_ipsum.txt") - .arg("alice_in_wonderland.txt") - .fails_with_code(1) - .no_stdout() - .stderr_is_fixture("length_larger_than_512.expected"); + for l in ["513", "1024", "73786976294838206464"] { + new_ucmd!() + .arg("--algorithm=blake2b") + .arg("--length") + .arg(l) + .arg("lorem_ipsum.txt") + .fails_with_code(1) + .no_stdout() + .stderr_contains(format!("invalid length: '{l}'")) + .stderr_contains("maximum digest length for 'BLAKE2b' is 512 bits"); + } +} + +#[test] +fn test_blake2b_length_nan() { + for l in ["foo", "512x", "x512", "0xff"] { + new_ucmd!() + .arg("--algorithm=blake2b") + .arg("--length") + .arg(l) + .arg("lorem_ipsum.txt") + .fails_with_code(1) + .no_stdout() + .stderr_contains(format!("invalid length: '{l}'")); + } } #[test] diff --git a/tests/by-util/test_hashsum.rs b/tests/by-util/test_hashsum.rs index b2eb96879b7..beaf994e1e8 100644 --- a/tests/by-util/test_hashsum.rs +++ b/tests/by-util/test_hashsum.rs @@ -107,17 +107,12 @@ macro_rules! test_digest { at.write("a", "file1\n"); at.write("c", "file3\n"); - #[cfg(unix)] - let file_not_found_str = "No such file or directory"; - #[cfg(not(unix))] - let file_not_found_str = "The system cannot find the file specified"; - ts.ucmd() .args(&[DIGEST_ARG, BITS_ARG, "a", "b", "c"]) .fails() .stdout_contains("a\n") .stdout_contains("c\n") - .stderr_contains(format!("b: {file_not_found_str}")); + .stderr_contains("b: No such file or directory"); } } )*) @@ -1097,11 +1092,11 @@ fn test_sha256_stdin_binary() { ); } +// This test is currently disabled on windows #[test] +#[cfg_attr(windows, ignore = "Discussion is in #9168")] fn test_check_sha256_binary() { - let ts = TestScenario::new(util_name!()); - - ts.ucmd() + new_ucmd!() .args(&[ "--sha256", "--bits=256", diff --git a/tests/fixtures/cksum/length_larger_than_512.expected b/tests/fixtures/cksum/length_larger_than_512.expected deleted file mode 100644 index 8b5d3d4c22a..00000000000 --- a/tests/fixtures/cksum/length_larger_than_512.expected +++ /dev/null @@ -1,2 +0,0 @@ -cksum: invalid length: '1024' -cksum: maximum digest length for 'BLAKE2b' is 512 bits