From 76116960aaa155205e4ed470065171c1acacd4a7 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Sun, 30 Jul 2023 12:24:14 +0200 Subject: [PATCH 1/6] cksum: add arguments from hashsum (but still unused) --- src/uu/cksum/src/cksum.rs | 85 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 24576040937..23ecd8522e6 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -10,6 +10,7 @@ use std::ffi::OsStr; use std::fs::File; use std::io::{self, stdin, BufReader, Read}; use std::iter; +use std::num::ParseIntError; use std::path::Path; use uucore::{ error::{FromIo, UResult}, @@ -244,6 +245,88 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { Ok(()) } +/// The arguments to md5sum and similar utilities. +/// +/// GNU documents this as md5sum-style, so that naming makes sense. +pub fn common_args() -> Vec { + #[cfg(windows)] + const BINARY_HELP: &str = "read in binary mode (default)"; + #[cfg(not(windows))] + const BINARY_HELP: &str = "read in binary mode"; + #[cfg(windows)] + const TEXT_HELP: &str = "read in text mode"; + #[cfg(not(windows))] + const TEXT_HELP: &str = "read in text mode (default)"; + + vec![ + Arg::new("binary") + .short('b') + .long("binary") + .help(BINARY_HELP) + .action(ArgAction::SetTrue), + Arg::new("check") + .short('c') + .long("check") + .help("read hashsums from the FILEs and check them") + .action(ArgAction::SetTrue), + // TODO: --ignore-missing + Arg::new("quiet") + .short('q') + .long("quiet") + .help("don't print OK for each successfully verified file") + .action(ArgAction::SetTrue), + Arg::new("status") + .short('s') + .long("status") + .help("don't output anything, status code shows success") + .action(ArgAction::SetTrue), + Arg::new("tag") + .long("tag") + .help("create a BSD-style checksum") + .action(ArgAction::SetTrue), + Arg::new("text") + .short('t') + .long("text") + .help(TEXT_HELP) + .conflicts_with("binary") + .action(ArgAction::SetTrue), + Arg::new("warn") + .short('w') + .long("warn") + .help("warn about improperly formatted checksum lines") + .action(ArgAction::SetTrue), + Arg::new("strict") + .long("strict") + .help("exit non-zero for improperly formatted checksum lines") + .action(ArgAction::SetTrue), + Arg::new("zero") + .short('z') + .long("zero") + .help("end each output line with NUL, not newline") + .action(ArgAction::SetTrue), + ] +} + +/// b2sum-style args +/// +/// Adds a length argument for the number of bits. +pub fn length_arg() -> Arg { + Arg::new("length") + .short('l') + .long("length") + .help( + "digest length in bits; \ + must not exceed the max for the blake2 algorithm (512) and must be a multiple of 8", + ) + .value_name("BITS") + .value_parser(parse_bit_num) +} + +// TODO: return custom error type +fn parse_bit_num(arg: &str) -> Result { + arg.parse() +} + pub fn uu_app() -> Command { Command::new(uucore::util_name()) .version(crate_version!()) @@ -282,5 +365,7 @@ pub fn uu_app() -> Command { .help("create a reversed style checksum, without digest type") .action(ArgAction::SetTrue), ) + .args(common_args()) + .arg(length_arg()) .after_help(AFTER_HELP) } From ffe102e0f92388bc7f71caf39a86be3cffc4314c Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Sun, 30 Jul 2023 12:45:09 +0200 Subject: [PATCH 2/6] cksum: parse new arguments into options struct --- src/uu/cksum/src/cksum.rs | 99 ++++++++++++++++++++++++++++++--------- 1 file changed, 77 insertions(+), 22 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 23ecd8522e6..6101b2323a9 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -37,6 +37,29 @@ const ALGORITHM_OPTIONS_SHA512: &str = "sha512"; const ALGORITHM_OPTIONS_BLAKE2B: &str = "blake2b"; const ALGORITHM_OPTIONS_SM3: &str = "sm3"; +mod options { + // cksum + pub const ALGORITHM: &str = "algorithm"; + pub const FILE: &str = "file"; + pub const UNTAGGED: &str = "untagged"; + + // common + pub const BINARY: &'static str = "binary"; + pub const TEXT: &'static str = "text"; + pub const CHECK: &'static str = "check"; + pub const TAG: &'static str = "tag"; + pub const STATUS: &'static str = "status"; + pub const QUIET: &'static str = "quiet"; + pub const STRICT: &'static str = "strict"; + pub const WARN: &'static str = "warn"; + pub const ZERO: &'static str = "zero"; + + // length argument for variable length utils + pub const LENGTH: &'static str = "length"; +} + +const BINARY_FLAG_DEFAULT: bool = cfg!(windows); + fn detect_algo(program: &str) -> (&'static str, Box, usize) { match program { ALGORITHM_OPTIONS_SYSV => ( @@ -99,10 +122,21 @@ fn detect_algo(program: &str) -> (&'static str, Box, usize } struct Options { + // cksum algo_name: &'static str, digest: Box, - output_bits: usize, untagged: bool, + output_bits: usize, + + // common + binary: bool, + check: bool, + tag: bool, + status: bool, + quiet: bool, + strict: bool, + warn: bool, + zero: bool, } /// Calculate checksum @@ -214,12 +248,6 @@ fn digest_read( } } -mod options { - pub const ALGORITHM: &str = "algorithm"; - pub const FILE: &str = "file"; - pub const UNTAGGED: &str = "untagged"; -} - #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { let matches = uu_app().try_get_matches_from(args)?; @@ -229,12 +257,39 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { None => ALGORITHM_OPTIONS_CRC, }; - let (name, algo, bits) = detect_algo(algo_name); + let (algo_name, digest, output_bits) = detect_algo(algo_name); + + let untagged = matches.get_flag(options::UNTAGGED); + + let binary = if matches.get_flag(options::BINARY) { + true + } else if matches.get_flag(options::TEXT) { + false + } else { + BINARY_FLAG_DEFAULT + }; + let check = matches.get_flag(options::CHECK); + let tag = matches.get_flag(options::TAG); + let status = matches.get_flag(options::STATUS); + let quiet = matches.get_flag(options::QUIET) || status; + let strict = matches.get_flag(options::STRICT); + let warn = matches.get_flag(options::WARN) && !status; + let zero = matches.get_flag(options::ZERO); + let opts = Options { - algo_name: name, - digest: algo, - output_bits: bits, - untagged: matches.get_flag(options::UNTAGGED), + algo_name, + digest, + output_bits, + untagged, + + binary, + check, + tag, + status, + quiet, + strict, + warn, + zero, }; match matches.get_many::(options::FILE) { @@ -259,47 +314,47 @@ pub fn common_args() -> Vec { const TEXT_HELP: &str = "read in text mode (default)"; vec![ - Arg::new("binary") + Arg::new(options::BINARY) .short('b') .long("binary") .help(BINARY_HELP) .action(ArgAction::SetTrue), - Arg::new("check") + Arg::new(options::CHECK) .short('c') .long("check") .help("read hashsums from the FILEs and check them") .action(ArgAction::SetTrue), // TODO: --ignore-missing - Arg::new("quiet") + Arg::new(options::QUIET) .short('q') .long("quiet") .help("don't print OK for each successfully verified file") .action(ArgAction::SetTrue), - Arg::new("status") + Arg::new(options::STATUS) .short('s') .long("status") .help("don't output anything, status code shows success") .action(ArgAction::SetTrue), - Arg::new("tag") + Arg::new(options::TAG) .long("tag") .help("create a BSD-style checksum") .action(ArgAction::SetTrue), - Arg::new("text") + Arg::new(options::TEXT) .short('t') .long("text") .help(TEXT_HELP) .conflicts_with("binary") .action(ArgAction::SetTrue), - Arg::new("warn") + Arg::new(options::WARN) .short('w') .long("warn") .help("warn about improperly formatted checksum lines") .action(ArgAction::SetTrue), - Arg::new("strict") + Arg::new(options::STRICT) .long("strict") .help("exit non-zero for improperly formatted checksum lines") .action(ArgAction::SetTrue), - Arg::new("zero") + Arg::new(options::ZERO) .short('z') .long("zero") .help("end each output line with NUL, not newline") @@ -311,7 +366,7 @@ pub fn common_args() -> Vec { /// /// Adds a length argument for the number of bits. pub fn length_arg() -> Arg { - Arg::new("length") + Arg::new(options::LENGTH) .short('l') .long("length") .help( From 1d867eb31846b50aea648f8194a8a0d46134843b Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Sun, 30 Jul 2023 12:48:38 +0200 Subject: [PATCH 3/6] cksum: extract algorithm constants to module --- src/uu/cksum/src/cksum.rs | 129 +++++++++++++------------------------- 1 file changed, 44 insertions(+), 85 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 6101b2323a9..9b07793c981 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -25,17 +25,19 @@ const USAGE: &str = help_usage!("cksum.md"); const ABOUT: &str = help_about!("cksum.md"); const AFTER_HELP: &str = help_section!("after help", "cksum.md"); -const ALGORITHM_OPTIONS_SYSV: &str = "sysv"; -const ALGORITHM_OPTIONS_BSD: &str = "bsd"; -const ALGORITHM_OPTIONS_CRC: &str = "crc"; -const ALGORITHM_OPTIONS_MD5: &str = "md5"; -const ALGORITHM_OPTIONS_SHA1: &str = "sha1"; -const ALGORITHM_OPTIONS_SHA224: &str = "sha224"; -const ALGORITHM_OPTIONS_SHA256: &str = "sha256"; -const ALGORITHM_OPTIONS_SHA384: &str = "sha384"; -const ALGORITHM_OPTIONS_SHA512: &str = "sha512"; -const ALGORITHM_OPTIONS_BLAKE2B: &str = "blake2b"; -const ALGORITHM_OPTIONS_SM3: &str = "sm3"; +mod algorithm { + pub const SYSV: &str = "sysv"; + pub const BSD: &str = "bsd"; + pub const CRC: &str = "crc"; + pub const MD5: &str = "md5"; + pub const SHA1: &str = "sha1"; + pub const SHA224: &str = "sha224"; + pub const SHA256: &str = "sha256"; + pub const SHA384: &str = "sha384"; + pub const SHA512: &str = "sha512"; + pub const BLAKE2B: &str = "blake2b"; + pub const SM3: &str = "sm3"; +} mod options { // cksum @@ -61,62 +63,19 @@ mod options { const BINARY_FLAG_DEFAULT: bool = cfg!(windows); fn detect_algo(program: &str) -> (&'static str, Box, usize) { + use algorithm::*; match program { - ALGORITHM_OPTIONS_SYSV => ( - ALGORITHM_OPTIONS_SYSV, - Box::new(SYSV::new()) as Box, - 512, - ), - ALGORITHM_OPTIONS_BSD => ( - ALGORITHM_OPTIONS_BSD, - Box::new(BSD::new()) as Box, - 1024, - ), - ALGORITHM_OPTIONS_CRC => ( - ALGORITHM_OPTIONS_CRC, - Box::new(CRC::new()) as Box, - 256, - ), - ALGORITHM_OPTIONS_MD5 => ( - ALGORITHM_OPTIONS_MD5, - Box::new(Md5::new()) as Box, - 128, - ), - ALGORITHM_OPTIONS_SHA1 => ( - ALGORITHM_OPTIONS_SHA1, - Box::new(Sha1::new()) as Box, - 160, - ), - ALGORITHM_OPTIONS_SHA224 => ( - ALGORITHM_OPTIONS_SHA224, - Box::new(Sha224::new()) as Box, - 224, - ), - ALGORITHM_OPTIONS_SHA256 => ( - ALGORITHM_OPTIONS_SHA256, - Box::new(Sha256::new()) as Box, - 256, - ), - ALGORITHM_OPTIONS_SHA384 => ( - ALGORITHM_OPTIONS_SHA384, - Box::new(Sha384::new()) as Box, - 384, - ), - ALGORITHM_OPTIONS_SHA512 => ( - ALGORITHM_OPTIONS_SHA512, - Box::new(Sha512::new()) as Box, - 512, - ), - ALGORITHM_OPTIONS_BLAKE2B => ( - ALGORITHM_OPTIONS_BLAKE2B, - Box::new(Blake2b::new()) as Box, - 512, - ), - ALGORITHM_OPTIONS_SM3 => ( - ALGORITHM_OPTIONS_SM3, - Box::new(Sm3::new()) as Box, - 512, - ), + SYSV => (SYSV, Box::new(SYSV::new()) as Box, 512), + BSD => (BSD, Box::new(BSD::new()) as Box, 1024), + CRC => (CRC, Box::new(CRC::new()) as Box, 256), + MD5 => (MD5, Box::new(Md5::new()) as Box, 128), + SHA1 => (SHA1, Box::new(Sha1::new()) as Box, 160), + SHA224 => (SHA224, Box::new(Sha224::new()) as Box, 224), + SHA256 => (SHA256, Box::new(Sha256::new()) as Box, 256), + SHA384 => (SHA384, Box::new(Sha384::new()) as Box, 384), + SHA512 => (SHA512, Box::new(Sha512::new()) as Box, 512), + BLAKE2B => (BLAKE2B, Box::new(Blake2b::new()) as Box, 512), + SM3 => (SM3, Box::new(Sm3::new()) as Box, 512), _ => unreachable!("unknown algorithm: clap should have prevented this case"), } } @@ -171,31 +130,31 @@ where // The BSD checksum output is 5 digit integer let bsd_width = 5; match (options.algo_name, not_file) { - (ALGORITHM_OPTIONS_SYSV, true) => println!( + (algorithm::SYSV, true) => println!( "{} {}", sum.parse::().unwrap(), div_ceil(sz, options.output_bits) ), - (ALGORITHM_OPTIONS_SYSV, false) => println!( + (algorithm::SYSV, false) => println!( "{} {} {}", sum.parse::().unwrap(), div_ceil(sz, options.output_bits), filename.display() ), - (ALGORITHM_OPTIONS_BSD, true) => println!( + (algorithm::BSD, true) => println!( "{:0bsd_width$} {:bsd_width$}", sum.parse::().unwrap(), div_ceil(sz, options.output_bits) ), - (ALGORITHM_OPTIONS_BSD, false) => println!( + (algorithm::BSD, false) => println!( "{:0bsd_width$} {:bsd_width$} {}", sum.parse::().unwrap(), div_ceil(sz, options.output_bits), filename.display() ), - (ALGORITHM_OPTIONS_CRC, true) => println!("{sum} {sz}"), - (ALGORITHM_OPTIONS_CRC, false) => println!("{sum} {sz} {}", filename.display()), - (ALGORITHM_OPTIONS_BLAKE2B, _) if !options.untagged => { + (algorithm::CRC, true) => println!("{sum} {sz}"), + (algorithm::CRC, false) => println!("{sum} {sz} {}", filename.display()), + (algorithm::BLAKE2B, _) if !options.untagged => { println!("BLAKE2b ({}) = {sum}", filename.display()); } _ => { @@ -254,7 +213,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let algo_name: &str = match matches.get_one::(options::ALGORITHM) { Some(v) => v, - None => ALGORITHM_OPTIONS_CRC, + None => algorithm::CRC, }; let (algo_name, digest, output_bits) = detect_algo(algo_name); @@ -401,17 +360,17 @@ pub fn uu_app() -> Command { .help("select the digest type to use. See DIGEST below") .value_name("ALGORITHM") .value_parser([ - ALGORITHM_OPTIONS_SYSV, - ALGORITHM_OPTIONS_BSD, - ALGORITHM_OPTIONS_CRC, - ALGORITHM_OPTIONS_MD5, - ALGORITHM_OPTIONS_SHA1, - ALGORITHM_OPTIONS_SHA224, - ALGORITHM_OPTIONS_SHA256, - ALGORITHM_OPTIONS_SHA384, - ALGORITHM_OPTIONS_SHA512, - ALGORITHM_OPTIONS_BLAKE2B, - ALGORITHM_OPTIONS_SM3, + algorithm::SYSV, + algorithm::BSD, + algorithm::CRC, + algorithm::MD5, + algorithm::SHA1, + algorithm::SHA224, + algorithm::SHA256, + algorithm::SHA384, + algorithm::SHA512, + algorithm::BLAKE2B, + algorithm::SM3, ]), ) .arg( From f507dcd7b78ce16706a0fb193a161d658527c0cc Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Sun, 30 Jul 2023 13:08:27 +0200 Subject: [PATCH 4/6] cksum: extract open_file function --- src/uu/cksum/src/cksum.rs | 51 +++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 9b07793c981..50c4aad2f00 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -110,26 +110,15 @@ where I: Iterator, { for filename in files { - let filename = Path::new(filename); - let stdin_buf; - let file_buf; - let not_file = filename == OsStr::new("-"); - let mut file = BufReader::new(if not_file { - stdin_buf = stdin(); - Box::new(stdin_buf) as Box - } else if filename.is_dir() { - Box::new(BufReader::new(io::empty())) as Box - } else { - file_buf = - File::open(filename).map_err_context(|| filename.to_str().unwrap().to_string())?; - Box::new(file_buf) as Box - }); + let is_stdin = filename == OsStr::new("-"); + let mut file = open_file(filename)?; + let path = Path::new(filename); let (sum, sz) = digest_read(&mut options.digest, &mut file, options.output_bits) .map_err_context(|| "failed to read input".to_string())?; // The BSD checksum output is 5 digit integer let bsd_width = 5; - match (options.algo_name, not_file) { + match (options.algo_name, is_stdin) { (algorithm::SYSV, true) => println!( "{} {}", sum.parse::().unwrap(), @@ -139,7 +128,7 @@ where "{} {} {}", sum.parse::().unwrap(), div_ceil(sz, options.output_bits), - filename.display() + path.display() ), (algorithm::BSD, true) => println!( "{:0bsd_width$} {:bsd_width$}", @@ -150,21 +139,21 @@ where "{:0bsd_width$} {:bsd_width$} {}", sum.parse::().unwrap(), div_ceil(sz, options.output_bits), - filename.display() + path.display() ), (algorithm::CRC, true) => println!("{sum} {sz}"), - (algorithm::CRC, false) => println!("{sum} {sz} {}", filename.display()), + (algorithm::CRC, false) => println!("{sum} {sz} {}", path.display()), (algorithm::BLAKE2B, _) if !options.untagged => { - println!("BLAKE2b ({}) = {sum}", filename.display()); + println!("BLAKE2b ({}) = {sum}", path.display()); } _ => { if options.untagged { - println!("{sum} {}", filename.display()); + println!("{sum} {}", path.display()); } else { println!( "{} ({}) = {sum}", options.algo_name.to_ascii_uppercase(), - filename.display() + path.display() ); } } @@ -174,6 +163,26 @@ where Ok(()) } +fn open_file(filename: &OsStr) -> UResult>> { + let stdin_buf; + let file_buf; + let is_stdin = filename == OsStr::new("-"); + + let path = Path::new(filename); + let reader = if is_stdin { + stdin_buf = stdin(); + Box::new(stdin_buf) as Box + } else if path.is_dir() { + Box::new(BufReader::new(io::empty())) as Box + } else { + file_buf = + File::open(filename).map_err_context(|| filename.to_str().unwrap().to_string())?; + Box::new(file_buf) as Box + }; + + Ok(BufReader::new(reader)) +} + fn digest_read( digest: &mut Box, reader: &mut BufReader, From 2f7b963d312abf773fcbee093074cb6cefd9fbda Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Sun, 30 Jul 2023 13:12:28 +0200 Subject: [PATCH 5/6] cksum: implement binary option --- src/uu/cksum/src/cksum.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 50c4aad2f00..2b8c32f0c46 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -113,8 +113,13 @@ where let is_stdin = filename == OsStr::new("-"); let mut file = open_file(filename)?; let path = Path::new(filename); - let (sum, sz) = digest_read(&mut options.digest, &mut file, options.output_bits) - .map_err_context(|| "failed to read input".to_string())?; + let (sum, sz) = digest_read( + &mut options.digest, + &mut file, + options.binary, + options.output_bits, + ) + .map_err_context(|| "failed to read input".to_string())?; // The BSD checksum output is 5 digit integer let bsd_width = 5; @@ -186,6 +191,7 @@ fn open_file(filename: &OsStr) -> UResult>> { fn digest_read( digest: &mut Box, reader: &mut BufReader, + binary: bool, output_bits: usize, ) -> io::Result<(String, usize)> { digest.reset(); @@ -202,7 +208,7 @@ fn digest_read( // `DigestWriter` and only written if the following character is // "\n". But when "\r" is the last character read, we need to force // it to be written.) - let mut digest_writer = DigestWriter::new(digest, true); + let mut digest_writer = DigestWriter::new(digest, binary); let output_size = std::io::copy(reader, &mut digest_writer)? as usize; digest_writer.finalize(); @@ -249,7 +255,6 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { digest, output_bits, untagged, - binary, check, tag, From 13002d6b105469e227ab6aa1a97b23d099aea748 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 31 Jul 2023 12:55:29 +0200 Subject: [PATCH 6/6] cksum: implement --check --- Cargo.lock | 1 + src/uu/cksum/Cargo.toml | 1 + src/uu/cksum/src/cksum.rs | 248 +++++++++++++++++++++++++++++++++++--- 3 files changed, 230 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d92a288755c..2c8ea712034 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2297,6 +2297,7 @@ version = "0.0.23" dependencies = [ "clap", "hex", + "regex", "uucore", ] diff --git a/src/uu/cksum/Cargo.toml b/src/uu/cksum/Cargo.toml index 9a811fafdbc..5e2b405fdba 100644 --- a/src/uu/cksum/Cargo.toml +++ b/src/uu/cksum/Cargo.toml @@ -16,6 +16,7 @@ path = "src/cksum.rs" [dependencies] clap = { workspace = true } +regex = { workspace = true } uucore = { workspace = true, features = ["sum"] } hex = { workspace = true } diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 2b8c32f0c46..52243457411 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -6,12 +6,17 @@ // spell-checker:ignore (ToDO) fname, algo use clap::{crate_version, Arg, ArgAction, Command}; use hex::encode; +use regex::{Captures, Regex}; +use std::error::Error; use std::ffi::OsStr; use std::fs::File; -use std::io::{self, stdin, BufReader, Read}; +use std::io::{self, stdin, BufRead, BufReader, Read}; use std::iter; use std::num::ParseIntError; use std::path::Path; +use uucore::display::Quotable; +use uucore::error::UError; +use uucore::show_warning; use uucore::{ error::{FromIo, UResult}, format_usage, help_about, help_section, help_usage, @@ -62,6 +67,22 @@ mod options { const BINARY_FLAG_DEFAULT: bool = cfg!(windows); +#[derive(Debug)] +enum CksumError { + InvalidFormat, +} + +impl Error for CksumError {} +impl UError for CksumError {} + +impl std::fmt::Display for CksumError { + fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::InvalidFormat => Ok(()), + } + } +} + fn detect_algo(program: &str) -> (&'static str, Box, usize) { use algorithm::*; match program { @@ -84,7 +105,6 @@ struct Options { // cksum algo_name: &'static str, digest: Box, - untagged: bool, output_bits: usize, // common @@ -95,7 +115,8 @@ struct Options { quiet: bool, strict: bool, warn: bool, - zero: bool, + // zero is unimplemented + _zero: bool, } /// Calculate checksum @@ -105,7 +126,195 @@ struct Options { /// * `options` - CLI options for the assigning checksum algorithm /// * `files` - A iterator of OsStr which is a bunch of files that are using for calculating checksum #[allow(clippy::cognitive_complexity)] -fn cksum<'a, I>(mut options: Options, files: I) -> UResult<()> +fn cksum<'a, I>(options: Options, files: I) -> UResult<()> +where + I: Iterator, +{ + if options.check { + cksum_check(options, files) + } else { + cksum_print(options, files) + } +} + +/// Creates a Regex for parsing lines based on the given format. +/// The default value of `gnu_re` created with this function has to be recreated +/// after the initial line has been parsed, as this line dictates the format +/// for the rest of them, and mixing of formats is disallowed. +fn gnu_re_template(bytes_marker: &str, format_marker: &str) -> Regex { + Regex::new(&format!( + r"^(?P[a-fA-F0-9]{bytes_marker}) {format_marker}(?P.*)" + )) + .expect("internal error: invalid regex") +} + +fn handle_captures( + caps: &Captures, + bytes_marker: &str, + bsd_reversed: &mut Option, + gnu_re: &mut Regex, +) -> (String, String, bool) { + if bsd_reversed.is_none() { + let is_bsd_reversed = caps.name("binary").is_none(); + let format_marker = if is_bsd_reversed { + "" + } else { + r"(?P[ \*])" + } + .to_string(); + + *bsd_reversed = Some(is_bsd_reversed); + *gnu_re = gnu_re_template(bytes_marker, &format_marker); + } + + ( + caps.name("fileName").unwrap().as_str().to_string(), + caps.name("digest").unwrap().as_str().to_ascii_lowercase(), + if *bsd_reversed == Some(false) { + caps.name("binary").unwrap().as_str() == "*" + } else { + false + }, + ) +} + +fn cksum_check<'a, I>(mut options: Options, files: I) -> UResult<()> +where + I: Iterator, +{ + // Set up Regexes for line validation and parsing + // + // First, we compute the number of bytes we expect to be in + // the digest string. If the algorithm has a variable number + // of output bits, then we use the `+` modifier in the + // regular expression, otherwise we use the `{n}` modifier, + // where `n` is the number of bytes. + let bytes = options.digest.output_bits() / 4; + let bytes_marker = if bytes > 0 { + format!("{{{bytes}}}") + } else { + "+".to_string() + }; + + // BSD reversed mode format is similar to the default mode, but doesn’t use + // a character to distinguish binary and text modes. + let mut bsd_reversed = None; + + let mut gnu_re = gnu_re_template(&bytes_marker, r"(?P[ \*])?"); + let bsd_re = Regex::new(&format!( + r"^{algorithm} \((?P.*)\) = (?P[a-fA-F0-9]{digest_size})", + algorithm = options.algo_name, + digest_size = bytes_marker, + )) + .expect("internal error: invalid regex"); + + // Keep track of the number of errors to report at the end + let mut num_bad_format_errors = 0; + let mut num_failed_checksums = 0; + let mut num_failed_to_open = 0; + + for filename in files { + let buffer = open_file(filename)?; + for (i, maybe_line) in buffer.lines().enumerate() { + let line = match maybe_line { + Ok(l) => l, + Err(e) => return Err(e.map_err_context(|| "failed to read file".to_string())), + }; + let (ck_filename, sum, binary_check) = match gnu_re.captures(&line) { + Some(caps) => handle_captures(&caps, &bytes_marker, &mut bsd_reversed, &mut gnu_re), + None => match bsd_re.captures(&line) { + Some(caps) => ( + caps.name("fileName").unwrap().as_str().to_string(), + caps.name("digest").unwrap().as_str().to_ascii_lowercase(), + true, + ), + None => { + num_bad_format_errors += 1; + if options.strict { + return Err(CksumError::InvalidFormat.into()); + } + if options.warn { + show_warning!( + "{}: {}: improperly formatted {} checksum line", + filename.maybe_quote(), + i + 1, + options.algo_name + ); + } + continue; + } + }, + }; + let f = match File::open(ck_filename.clone()) { + Err(_) => { + num_failed_to_open += 1; + println!( + "{}: {}: No such file or directory", + uucore::util_name(), + ck_filename + ); + println!("{ck_filename}: FAILED open or read"); + continue; + } + Ok(file) => file, + }; + let mut ckf = BufReader::new(Box::new(f) as Box); + let real_sum = digest_read( + &mut options.digest, + &mut ckf, + binary_check, + options.output_bits, + ) + .map_err_context(|| "failed to read input".to_string())? + .0 + .to_ascii_lowercase(); + + // FIXME: Filenames with newlines should be treated specially. + // GNU appears to replace newlines by \n and backslashes by + // \\ and prepend a backslash (to the hash or filename) if it did + // this escaping. + // Different sorts of output (checking vs outputting hashes) may + // handle this differently. Compare carefully to GNU. + // If you can, try to preserve invalid unicode using OsStr(ing)Ext + // and display it using uucore::display::print_verbatim(). This is + // easier (and more important) on Unix than on Windows. + if sum == real_sum { + if !options.quiet { + println!("{ck_filename}: OK"); + } + } else { + if !options.status { + println!("{ck_filename}: FAILED"); + } + num_failed_checksums += 1; + } + } + } + + if !options.status { + match num_bad_format_errors { + 0 => {} + 1 => show_warning!("1 line is improperly formatted"), + _ => show_warning!("{} lines are improperly formatted", num_bad_format_errors), + } + match num_failed_checksums { + 0 => {} + 1 => show_warning!("WARNING: 1 computed checksum did NOT match"), + _ => show_warning!( + "WARNING: {} computed checksum did NOT match", + num_failed_checksums + ), + } + match num_failed_to_open { + 0 => {} + 1 => show_warning!("1 listed file could not be read"), + _ => show_warning!("{} listed file could not be read", num_failed_to_open), + } + } + Ok(()) +} + +fn cksum_print<'a, I>(mut options: Options, files: I) -> UResult<()> where I: Iterator, { @@ -148,39 +357,36 @@ where ), (algorithm::CRC, true) => println!("{sum} {sz}"), (algorithm::CRC, false) => println!("{sum} {sz} {}", path.display()), - (algorithm::BLAKE2B, _) if !options.untagged => { + (algorithm::BLAKE2B, _) if options.tag => { println!("BLAKE2b ({}) = {sum}", path.display()); } _ => { - if options.untagged { - println!("{sum} {}", path.display()); - } else { + if options.tag { println!( "{} ({}) = {sum}", options.algo_name.to_ascii_uppercase(), path.display() ); + } else { + println!("{sum} {}", path.display()); } } - } + }; } - Ok(()) } fn open_file(filename: &OsStr) -> UResult>> { - let stdin_buf; - let file_buf; let is_stdin = filename == OsStr::new("-"); let path = Path::new(filename); let reader = if is_stdin { - stdin_buf = stdin(); + let stdin_buf = stdin(); Box::new(stdin_buf) as Box } else if path.is_dir() { Box::new(BufReader::new(io::empty())) as Box } else { - file_buf = + let file_buf = File::open(filename).map_err_context(|| filename.to_str().unwrap().to_string())?; Box::new(file_buf) as Box }; @@ -233,8 +439,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let (algo_name, digest, output_bits) = detect_algo(algo_name); - let untagged = matches.get_flag(options::UNTAGGED); - + // TODO: This is not supported by GNU. It is added here so we can use cksum + // as a base for the specialized utils, but it should ultimately be hidden + // on cksum itself. let binary = if matches.get_flag(options::BINARY) { true } else if matches.get_flag(options::TEXT) { @@ -242,8 +449,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } else { BINARY_FLAG_DEFAULT }; + let check = matches.get_flag(options::CHECK); - let tag = matches.get_flag(options::TAG); + let tag = matches.get_flag(options::TAG) || !matches.get_flag(options::UNTAGGED); let status = matches.get_flag(options::STATUS); let quiet = matches.get_flag(options::QUIET) || status; let strict = matches.get_flag(options::STRICT); @@ -254,7 +462,6 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { algo_name, digest, output_bits, - untagged, binary, check, tag, @@ -262,7 +469,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { quiet, strict, warn, - zero, + _zero: zero, }; match matches.get_many::(options::FILE) { @@ -391,7 +598,8 @@ pub fn uu_app() -> Command { Arg::new(options::UNTAGGED) .long(options::UNTAGGED) .help("create a reversed style checksum, without digest type") - .action(ArgAction::SetTrue), + .action(ArgAction::SetTrue) + .overrides_with(options::TAG), ) .args(common_args()) .arg(length_arg())